2  * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com> 
   5  * Redistribution and use in source and binary forms, with or without 
   6  * modification, are permitted provided that the following conditions are met: 
   8  *   * Redistributions of source code must retain the above copyright notice, 
   9  *     this list of conditions and the following disclaimer. 
  10  *   * Redistributions in binary form must reproduce the above copyright 
  11  *     notice, this list of conditions and the following disclaimer in the 
  12  *     documentation and/or other materials provided with the distribution. 
  13  *   * Neither the name of Redis nor the names of its contributors may be used 
  14  *     to endorse or promote products derived from this software without 
  15  *     specific prior written permission. 
  17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
  18  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  20  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
  21  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
  22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
  23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
  24  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
  25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
  26  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
  27  * POSSIBILITY OF SUCH DAMAGE. 
  30 #define REDIS_VERSION "1.3.8" 
  40 #define __USE_POSIX199309 
  47 #endif /* HAVE_BACKTRACE */ 
  55 #include <arpa/inet.h> 
  59 #include <sys/resource.h> 
  66 #include "solarisfixes.h" 
  70 #include "ae.h"     /* Event driven programming library */ 
  71 #include "sds.h"    /* Dynamic safe strings */ 
  72 #include "anet.h"   /* Networking the easy way */ 
  73 #include "dict.h"   /* Hash tables */ 
  74 #include "adlist.h" /* Linked lists */ 
  75 #include "zmalloc.h" /* total memory usage aware version of malloc/free */ 
  76 #include "lzf.h"    /* LZF compression library */ 
  77 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */ 
  84 /* Static server configuration */ 
  85 #define REDIS_SERVERPORT        6379    /* TCP port */ 
  86 #define REDIS_MAXIDLETIME       (60*5)  /* default client timeout */ 
  87 #define REDIS_IOBUF_LEN         1024 
  88 #define REDIS_LOADBUF_LEN       1024 
  89 #define REDIS_STATIC_ARGS       8 
  90 #define REDIS_DEFAULT_DBNUM     16 
  91 #define REDIS_CONFIGLINE_MAX    1024 
  92 #define REDIS_OBJFREELIST_MAX   1000000 /* Max number of objects to cache */ 
  93 #define REDIS_MAX_SYNC_TIME     60      /* Slave can't take more to sync */ 
  94 #define REDIS_EXPIRELOOKUPS_PER_CRON    10 /* try to expire 10 keys/loop */ 
  95 #define REDIS_MAX_WRITE_PER_EVENT (1024*64) 
  96 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */ 
  98 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */ 
  99 #define REDIS_WRITEV_THRESHOLD      3 
 100 /* Max number of iovecs used for each writev call */ 
 101 #define REDIS_WRITEV_IOVEC_COUNT    256 
 103 /* Hash table parameters */ 
 104 #define REDIS_HT_MINFILL        10      /* Minimal hash table fill 10% */ 
 107 #define REDIS_CMD_BULK          1       /* Bulk write command */ 
 108 #define REDIS_CMD_INLINE        2       /* Inline command */ 
 109 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with 
 110    this flags will return an error when the 'maxmemory' option is set in the 
 111    config file and the server is using more than maxmemory bytes of memory. 
 112    In short this commands are denied on low memory conditions. */ 
 113 #define REDIS_CMD_DENYOOM       4 
 114 #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */ 
 117 #define REDIS_STRING 0 
 123 /* Objects encoding. Some kind of objects like Strings and Hashes can be 
 124  * internally represented in multiple ways. The 'encoding' field of the object 
 125  * is set to one of this fields for this object. */ 
 126 #define REDIS_ENCODING_RAW 0    /* Raw representation */ 
 127 #define REDIS_ENCODING_INT 1    /* Encoded as integer */ 
 128 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */ 
 129 #define REDIS_ENCODING_HT 3     /* Encoded as an hash table */ 
 131 static char* strencoding
[] = { 
 132     "raw", "int", "zipmap", "hashtable" 
 135 /* Object types only used for dumping to disk */ 
 136 #define REDIS_EXPIRETIME 253 
 137 #define REDIS_SELECTDB 254 
 138 #define REDIS_EOF 255 
 140 /* Defines related to the dump file format. To store 32 bits lengths for short 
 141  * keys requires a lot of space, so we check the most significant 2 bits of 
 142  * the first byte to interpreter the length: 
 144  * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte 
 145  * 01|000000 00000000 =>  01, the len is 14 byes, 6 bits + 8 bits of next byte 
 146  * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow 
 147  * 11|000000 this means: specially encoded object will follow. The six bits 
 148  *           number specify the kind of object that follows. 
 149  *           See the REDIS_RDB_ENC_* defines. 
 151  * Lenghts up to 63 are stored using a single byte, most DB keys, and may 
 152  * values, will fit inside. */ 
 153 #define REDIS_RDB_6BITLEN 0 
 154 #define REDIS_RDB_14BITLEN 1 
 155 #define REDIS_RDB_32BITLEN 2 
 156 #define REDIS_RDB_ENCVAL 3 
 157 #define REDIS_RDB_LENERR UINT_MAX 
 159 /* When a length of a string object stored on disk has the first two bits 
 160  * set, the remaining two bits specify a special encoding for the object 
 161  * accordingly to the following defines: */ 
 162 #define REDIS_RDB_ENC_INT8 0        /* 8 bit signed integer */ 
 163 #define REDIS_RDB_ENC_INT16 1       /* 16 bit signed integer */ 
 164 #define REDIS_RDB_ENC_INT32 2       /* 32 bit signed integer */ 
 165 #define REDIS_RDB_ENC_LZF 3         /* string compressed with FASTLZ */ 
 167 /* Virtual memory object->where field. */ 
 168 #define REDIS_VM_MEMORY 0       /* The object is on memory */ 
 169 #define REDIS_VM_SWAPPED 1      /* The object is on disk */ 
 170 #define REDIS_VM_SWAPPING 2     /* Redis is swapping this object on disk */ 
 171 #define REDIS_VM_LOADING 3      /* Redis is loading this object from disk */ 
 173 /* Virtual memory static configuration stuff. 
 174  * Check vmFindContiguousPages() to know more about this magic numbers. */ 
 175 #define REDIS_VM_MAX_NEAR_PAGES 65536 
 176 #define REDIS_VM_MAX_RANDOM_JUMP 4096 
 177 #define REDIS_VM_MAX_THREADS 32 
 178 #define REDIS_THREAD_STACK_SIZE (1024*1024*4) 
 179 /* The following is the *percentage* of completed I/O jobs to process when the 
 180  * handelr is called. While Virtual Memory I/O operations are performed by 
 181  * threads, this operations must be processed by the main thread when completed 
 182  * in order to take effect. */ 
 183 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1 
 186 #define REDIS_SLAVE 1       /* This client is a slave server */ 
 187 #define REDIS_MASTER 2      /* This client is a master server */ 
 188 #define REDIS_MONITOR 4     /* This client is a slave monitor, see MONITOR */ 
 189 #define REDIS_MULTI 8       /* This client is in a MULTI context */ 
 190 #define REDIS_BLOCKED 16    /* The client is waiting in a blocking operation */ 
 191 #define REDIS_IO_WAIT 32    /* The client is waiting for Virtual Memory I/O */ 
 193 /* Slave replication state - slave side */ 
 194 #define REDIS_REPL_NONE 0   /* No active replication */ 
 195 #define REDIS_REPL_CONNECT 1    /* Must connect to master */ 
 196 #define REDIS_REPL_CONNECTED 2  /* Connected to master */ 
 198 /* Slave replication state - from the point of view of master 
 199  * Note that in SEND_BULK and ONLINE state the slave receives new updates 
 200  * in its output queue. In the WAIT_BGSAVE state instead the server is waiting 
 201  * to start the next background saving in order to send updates to it. */ 
 202 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */ 
 203 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */ 
 204 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */ 
 205 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */ 
 207 /* List related stuff */ 
 211 /* Sort operations */ 
 212 #define REDIS_SORT_GET 0 
 213 #define REDIS_SORT_ASC 1 
 214 #define REDIS_SORT_DESC 2 
 215 #define REDIS_SORTKEY_MAX 1024 
 218 #define REDIS_DEBUG 0 
 219 #define REDIS_VERBOSE 1 
 220 #define REDIS_NOTICE 2 
 221 #define REDIS_WARNING 3 
 223 /* Anti-warning macro... */ 
 224 #define REDIS_NOTUSED(V) ((void) V) 
 226 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */ 
 227 #define ZSKIPLIST_P 0.25      /* Skiplist P = 1/4 */ 
 229 /* Append only defines */ 
 230 #define APPENDFSYNC_NO 0 
 231 #define APPENDFSYNC_ALWAYS 1 
 232 #define APPENDFSYNC_EVERYSEC 2 
 234 /* Hashes related defaults */ 
 235 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64 
 236 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512 
 238 /* We can print the stacktrace, so our assert is defined this way: */ 
 239 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1))) 
 240 static void _redisAssert(char *estr
, char *file
, int line
); 
 242 /*================================= Data types ============================== */ 
 244 /* A redis object, that is a type able to hold a string / list / set */ 
 246 /* The VM object structure */ 
 247 struct redisObjectVM 
{ 
 248     off_t page
;         /* the page at witch the object is stored on disk */ 
 249     off_t usedpages
;    /* number of pages used on disk */ 
 250     time_t atime
;       /* Last access time */ 
 253 /* The actual Redis Object */ 
 254 typedef struct redisObject 
{ 
 257     unsigned char encoding
; 
 258     unsigned char storage
;  /* If this object is a key, where is the value? 
 259                              * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */ 
 260     unsigned char vtype
; /* If this object is a key, and value is swapped out, 
 261                           * this is the type of the swapped out object. */ 
 263     /* VM fields, this are only allocated if VM is active, otherwise the 
 264      * object allocation function will just allocate 
 265      * sizeof(redisObjct) minus sizeof(redisObjectVM), so using 
 266      * Redis without VM active will not have any overhead. */ 
 267     struct redisObjectVM vm
; 
 270 /* Macro used to initalize a Redis object allocated on the stack. 
 271  * Note that this macro is taken near the structure definition to make sure 
 272  * we'll update it when the structure is changed, to avoid bugs like 
 273  * bug #85 introduced exactly in this way. */ 
 274 #define initStaticStringObject(_var,_ptr) do { \ 
 276     _var.type = REDIS_STRING; \ 
 277     _var.encoding = REDIS_ENCODING_RAW; \ 
 279     if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \ 
 282 typedef struct redisDb 
{ 
 283     dict 
*dict
;                 /* The keyspace for this DB */ 
 284     dict 
*expires
;              /* Timeout of keys with a timeout set */ 
 285     dict 
*blockingkeys
;         /* Keys with clients waiting for data (BLPOP) */ 
 286     dict 
*io_keys
;              /* Keys with clients waiting for VM I/O */ 
 290 /* Client MULTI/EXEC state */ 
 291 typedef struct multiCmd 
{ 
 294     struct redisCommand 
*cmd
; 
 297 typedef struct multiState 
{ 
 298     multiCmd 
*commands
;     /* Array of MULTI commands */ 
 299     int count
;              /* Total number of MULTI commands */ 
 302 /* With multiplexing we need to take per-clinet state. 
 303  * Clients are taken in a liked list. */ 
 304 typedef struct redisClient 
{ 
 309     robj 
**argv
, **mbargv
; 
 311     int bulklen
;            /* bulk read len. -1 if not in bulk read mode */ 
 312     int multibulk
;          /* multi bulk command format active */ 
 315     time_t lastinteraction
; /* time of the last interaction, used for timeout */ 
 316     int flags
;              /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */ 
 317     int slaveseldb
;         /* slave selected db, if this client is a slave */ 
 318     int authenticated
;      /* when requirepass is non-NULL */ 
 319     int replstate
;          /* replication state if this is a slave */ 
 320     int repldbfd
;           /* replication DB file descriptor */ 
 321     long repldboff
;         /* replication DB file offset */ 
 322     off_t repldbsize
;       /* replication DB file size */ 
 323     multiState mstate
;      /* MULTI/EXEC state */ 
 324     robj 
**blockingkeys
;    /* The key we are waiting to terminate a blocking 
 325                              * operation such as BLPOP. Otherwise NULL. */ 
 326     int blockingkeysnum
;    /* Number of blocking keys */ 
 327     time_t blockingto
;      /* Blocking operation timeout. If UNIX current time 
 328                              * is >= blockingto then the operation timed out. */ 
 329     list 
*io_keys
;          /* Keys this client is waiting to be loaded from the 
 330                              * swap file in order to continue. */ 
 331     dict 
*pubsub_channels
;  /* channels a client is interested in (SUBSCRIBE) */ 
 332     list 
*pubsub_patterns
;  /* patterns a client is interested in (SUBSCRIBE) */ 
 340 /* Global server state structure */ 
 345     long long dirty
;            /* changes to DB from the last save */ 
 347     list 
*slaves
, *monitors
; 
 348     char neterr
[ANET_ERR_LEN
]; 
 350     int cronloops
;              /* number of times the cron function run */ 
 351     list 
*objfreelist
;          /* A list of freed objects to avoid malloc() */ 
 352     time_t lastsave
;            /* Unix time of last save succeeede */ 
 353     /* Fields used only for stats */ 
 354     time_t stat_starttime
;         /* server start time */ 
 355     long long stat_numcommands
;    /* number of processed commands */ 
 356     long long stat_numconnections
; /* number of connections received */ 
 357     long long stat_expiredkeys
;   /* number of expired keys */ 
 370     pid_t bgsavechildpid
; 
 371     pid_t bgrewritechildpid
; 
 372     sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */ 
 373     struct saveparam 
*saveparams
; 
 378     char *appendfilename
; 
 382     /* Replication related */ 
 387     redisClient 
*master
;    /* client that is master for this slave */ 
 389     unsigned int maxclients
; 
 390     unsigned long long maxmemory
; 
 391     unsigned int blpop_blocked_clients
; 
 392     unsigned int vm_blocked_clients
; 
 393     /* Sort parameters - qsort_r() is only available under BSD so we 
 394      * have to take this state global, in order to pass it to sortCompare() */ 
 398     /* Virtual memory configuration */ 
 403     unsigned long long vm_max_memory
; 
 405     size_t hash_max_zipmap_entries
; 
 406     size_t hash_max_zipmap_value
; 
 407     /* Virtual memory state */ 
 410     off_t vm_next_page
; /* Next probably empty page */ 
 411     off_t vm_near_pages
; /* Number of pages allocated sequentially */ 
 412     unsigned char *vm_bitmap
; /* Bitmap of free/used pages */ 
 413     time_t unixtime
;    /* Unix time sampled every second. */ 
 414     /* Virtual memory I/O threads stuff */ 
 415     /* An I/O thread process an element taken from the io_jobs queue and 
 416      * put the result of the operation in the io_done list. While the 
 417      * job is being processed, it's put on io_processing queue. */ 
 418     list 
*io_newjobs
; /* List of VM I/O jobs yet to be processed */ 
 419     list 
*io_processing
; /* List of VM I/O jobs being processed */ 
 420     list 
*io_processed
; /* List of VM I/O jobs already processed */ 
 421     list 
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */ 
 422     pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */ 
 423     pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */ 
 424     pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */ 
 425     pthread_attr_t io_threads_attr
; /* attributes for threads creation */ 
 426     int io_active_threads
; /* Number of running I/O threads */ 
 427     int vm_max_threads
; /* Max number of I/O threads running at the same time */ 
 428     /* Our main thread is blocked on the event loop, locking for sockets ready 
 429      * to be read or written, so when a threaded I/O operation is ready to be 
 430      * processed by the main thread, the I/O thread will use a unix pipe to 
 431      * awake the main thread. The followings are the two pipe FDs. */ 
 432     int io_ready_pipe_read
; 
 433     int io_ready_pipe_write
; 
 434     /* Virtual memory stats */ 
 435     unsigned long long vm_stats_used_pages
; 
 436     unsigned long long vm_stats_swapped_objects
; 
 437     unsigned long long vm_stats_swapouts
; 
 438     unsigned long long vm_stats_swapins
; 
 440     dict 
*pubsub_channels
; /* Map channels to list of subscribed clients */ 
 441     list 
*pubsub_patterns
; /* A list of pubsub_patterns */ 
 446 typedef struct pubsubPattern 
{ 
 451 typedef void redisCommandProc(redisClient 
*c
); 
 452 struct redisCommand 
{ 
 454     redisCommandProc 
*proc
; 
 457     /* Use a function to determine which keys need to be loaded 
 458      * in the background prior to executing this command. Takes precedence 
 459      * over vm_firstkey and others, ignored when NULL */ 
 460     redisCommandProc 
*vm_preload_proc
; 
 461     /* What keys should be loaded in background when calling this command? */ 
 462     int vm_firstkey
; /* The first argument that's a key (0 = no keys) */ 
 463     int vm_lastkey
;  /* THe last argument that's a key */ 
 464     int vm_keystep
;  /* The step between first and last key */ 
 467 struct redisFunctionSym 
{ 
 469     unsigned long pointer
; 
 472 typedef struct _redisSortObject 
{ 
 480 typedef struct _redisSortOperation 
{ 
 483 } redisSortOperation
; 
 485 /* ZSETs use a specialized version of Skiplists */ 
 487 typedef struct zskiplistNode 
{ 
 488     struct zskiplistNode 
**forward
; 
 489     struct zskiplistNode 
*backward
; 
 495 typedef struct zskiplist 
{ 
 496     struct zskiplistNode 
*header
, *tail
; 
 497     unsigned long length
; 
 501 typedef struct zset 
{ 
 506 /* Our shared "common" objects */ 
 508 struct sharedObjectsStruct 
{ 
 509     robj 
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
, 
 510     *colon
, *nullbulk
, *nullmultibulk
, *queued
, 
 511     *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
, 
 512     *outofrangeerr
, *plus
, 
 513     *select0
, *select1
, *select2
, *select3
, *select4
, 
 514     *select5
, *select6
, *select7
, *select8
, *select9
, 
 515     *messagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
, 
 516     *psubscribebulk
, *punsubscribebulk
; 
 519 /* Global vars that are actally used as constants. The following double 
 520  * values are used for double on-disk serialization, and are initialized 
 521  * at runtime to avoid strange compiler optimizations. */ 
 523 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
; 
 525 /* VM threaded I/O request message */ 
 526 #define REDIS_IOJOB_LOAD 0          /* Load from disk to memory */ 
 527 #define REDIS_IOJOB_PREPARE_SWAP 1  /* Compute needed pages */ 
 528 #define REDIS_IOJOB_DO_SWAP 2       /* Swap from memory to disk */ 
 529 typedef struct iojob 
{ 
 530     int type
;   /* Request type, REDIS_IOJOB_* */ 
 531     redisDb 
*db
;/* Redis database */ 
 532     robj 
*key
;  /* This I/O request is about swapping this key */ 
 533     robj 
*val
;  /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this 
 534                  * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */ 
 535     off_t page
; /* Swap page where to read/write the object */ 
 536     off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */ 
 537     int canceled
; /* True if this command was canceled by blocking side of VM */ 
 538     pthread_t thread
; /* ID of the thread processing this entry */ 
 541 /*================================ Prototypes =============================== */ 
 543 static void freeStringObject(robj 
*o
); 
 544 static void freeListObject(robj 
*o
); 
 545 static void freeSetObject(robj 
*o
); 
 546 static void decrRefCount(void *o
); 
 547 static robj 
*createObject(int type
, void *ptr
); 
 548 static void freeClient(redisClient 
*c
); 
 549 static int rdbLoad(char *filename
); 
 550 static void addReply(redisClient 
*c
, robj 
*obj
); 
 551 static void addReplySds(redisClient 
*c
, sds s
); 
 552 static void incrRefCount(robj 
*o
); 
 553 static int rdbSaveBackground(char *filename
); 
 554 static robj 
*createStringObject(char *ptr
, size_t len
); 
 555 static robj 
*dupStringObject(robj 
*o
); 
 556 static void replicationFeedSlaves(list 
*slaves
, int dictid
, robj 
**argv
, int argc
); 
 557 static void feedAppendOnlyFile(struct redisCommand 
*cmd
, int dictid
, robj 
**argv
, int argc
); 
 558 static int syncWithMaster(void); 
 559 static int tryObjectEncoding(robj 
*o
); 
 560 static robj 
*getDecodedObject(robj 
*o
); 
 561 static int removeExpire(redisDb 
*db
, robj 
*key
); 
 562 static int expireIfNeeded(redisDb 
*db
, robj 
*key
); 
 563 static int deleteIfVolatile(redisDb 
*db
, robj 
*key
); 
 564 static int deleteIfSwapped(redisDb 
*db
, robj 
*key
); 
 565 static int deleteKey(redisDb 
*db
, robj 
*key
); 
 566 static time_t getExpire(redisDb 
*db
, robj 
*key
); 
 567 static int setExpire(redisDb 
*db
, robj 
*key
, time_t when
); 
 568 static void updateSlavesWaitingBgsave(int bgsaveerr
); 
 569 static void freeMemoryIfNeeded(void); 
 570 static int processCommand(redisClient 
*c
); 
 571 static void setupSigSegvAction(void); 
 572 static void rdbRemoveTempFile(pid_t childpid
); 
 573 static void aofRemoveTempFile(pid_t childpid
); 
 574 static size_t stringObjectLen(robj 
*o
); 
 575 static void processInputBuffer(redisClient 
*c
); 
 576 static zskiplist 
*zslCreate(void); 
 577 static void zslFree(zskiplist 
*zsl
); 
 578 static void zslInsert(zskiplist 
*zsl
, double score
, robj 
*obj
); 
 579 static void sendReplyToClientWritev(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 580 static void initClientMultiState(redisClient 
*c
); 
 581 static void freeClientMultiState(redisClient 
*c
); 
 582 static void queueMultiCommand(redisClient 
*c
, struct redisCommand 
*cmd
); 
 583 static void unblockClientWaitingData(redisClient 
*c
); 
 584 static int handleClientsWaitingListPush(redisClient 
*c
, robj 
*key
, robj 
*ele
); 
 585 static void vmInit(void); 
 586 static void vmMarkPagesFree(off_t page
, off_t count
); 
 587 static robj 
*vmLoadObject(robj 
*key
); 
 588 static robj 
*vmPreviewObject(robj 
*key
); 
 589 static int vmSwapOneObjectBlocking(void); 
 590 static int vmSwapOneObjectThreaded(void); 
 591 static int vmCanSwapOut(void); 
 592 static int tryFreeOneObjectFromFreelist(void); 
 593 static void acceptHandler(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 594 static void vmThreadedIOCompletedJob(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 595 static void vmCancelThreadedIOJob(robj 
*o
); 
 596 static void lockThreadedIO(void); 
 597 static void unlockThreadedIO(void); 
 598 static int vmSwapObjectThreaded(robj 
*key
, robj 
*val
, redisDb 
*db
); 
 599 static void freeIOJob(iojob 
*j
); 
 600 static void queueIOJob(iojob 
*j
); 
 601 static int vmWriteObjectOnSwap(robj 
*o
, off_t page
); 
 602 static robj 
*vmReadObjectFromSwap(off_t page
, int type
); 
 603 static void waitEmptyIOJobsQueue(void); 
 604 static void vmReopenSwapFile(void); 
 605 static int vmFreePage(off_t page
); 
 606 static void zunionInterBlockClientOnSwappedKeys(redisClient 
*c
); 
 607 static int blockClientOnSwappedKeys(struct redisCommand 
*cmd
, redisClient 
*c
); 
 608 static int dontWaitForSwappedKey(redisClient 
*c
, robj 
*key
); 
 609 static void handleClientsBlockedOnSwappedKey(redisDb 
*db
, robj 
*key
); 
 610 static void readQueryFromClient(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 611 static struct redisCommand 
*lookupCommand(char *name
); 
 612 static void call(redisClient 
*c
, struct redisCommand 
*cmd
); 
 613 static void resetClient(redisClient 
*c
); 
 614 static void convertToRealHash(robj 
*o
); 
 615 static int pubsubUnsubscribeAllChannels(redisClient 
*c
, int notify
); 
 616 static int pubsubUnsubscribeAllPatterns(redisClient 
*c
, int notify
); 
 617 static void freePubsubPattern(void *p
); 
 618 static int listMatchPubsubPattern(void *a
, void *b
); 
 619 static int compareStringObjects(robj 
*a
, robj 
*b
); 
 622 static void authCommand(redisClient 
*c
); 
 623 static void pingCommand(redisClient 
*c
); 
 624 static void echoCommand(redisClient 
*c
); 
 625 static void setCommand(redisClient 
*c
); 
 626 static void setnxCommand(redisClient 
*c
); 
 627 static void getCommand(redisClient 
*c
); 
 628 static void delCommand(redisClient 
*c
); 
 629 static void existsCommand(redisClient 
*c
); 
 630 static void incrCommand(redisClient 
*c
); 
 631 static void decrCommand(redisClient 
*c
); 
 632 static void incrbyCommand(redisClient 
*c
); 
 633 static void decrbyCommand(redisClient 
*c
); 
 634 static void selectCommand(redisClient 
*c
); 
 635 static void randomkeyCommand(redisClient 
*c
); 
 636 static void keysCommand(redisClient 
*c
); 
 637 static void dbsizeCommand(redisClient 
*c
); 
 638 static void lastsaveCommand(redisClient 
*c
); 
 639 static void saveCommand(redisClient 
*c
); 
 640 static void bgsaveCommand(redisClient 
*c
); 
 641 static void bgrewriteaofCommand(redisClient 
*c
); 
 642 static void shutdownCommand(redisClient 
*c
); 
 643 static void moveCommand(redisClient 
*c
); 
 644 static void renameCommand(redisClient 
*c
); 
 645 static void renamenxCommand(redisClient 
*c
); 
 646 static void lpushCommand(redisClient 
*c
); 
 647 static void rpushCommand(redisClient 
*c
); 
 648 static void lpopCommand(redisClient 
*c
); 
 649 static void rpopCommand(redisClient 
*c
); 
 650 static void llenCommand(redisClient 
*c
); 
 651 static void lindexCommand(redisClient 
*c
); 
 652 static void lrangeCommand(redisClient 
*c
); 
 653 static void ltrimCommand(redisClient 
*c
); 
 654 static void typeCommand(redisClient 
*c
); 
 655 static void lsetCommand(redisClient 
*c
); 
 656 static void saddCommand(redisClient 
*c
); 
 657 static void sremCommand(redisClient 
*c
); 
 658 static void smoveCommand(redisClient 
*c
); 
 659 static void sismemberCommand(redisClient 
*c
); 
 660 static void scardCommand(redisClient 
*c
); 
 661 static void spopCommand(redisClient 
*c
); 
 662 static void srandmemberCommand(redisClient 
*c
); 
 663 static void sinterCommand(redisClient 
*c
); 
 664 static void sinterstoreCommand(redisClient 
*c
); 
 665 static void sunionCommand(redisClient 
*c
); 
 666 static void sunionstoreCommand(redisClient 
*c
); 
 667 static void sdiffCommand(redisClient 
*c
); 
 668 static void sdiffstoreCommand(redisClient 
*c
); 
 669 static void syncCommand(redisClient 
*c
); 
 670 static void flushdbCommand(redisClient 
*c
); 
 671 static void flushallCommand(redisClient 
*c
); 
 672 static void sortCommand(redisClient 
*c
); 
 673 static void lremCommand(redisClient 
*c
); 
 674 static void rpoplpushcommand(redisClient 
*c
); 
 675 static void infoCommand(redisClient 
*c
); 
 676 static void mgetCommand(redisClient 
*c
); 
 677 static void monitorCommand(redisClient 
*c
); 
 678 static void expireCommand(redisClient 
*c
); 
 679 static void expireatCommand(redisClient 
*c
); 
 680 static void getsetCommand(redisClient 
*c
); 
 681 static void ttlCommand(redisClient 
*c
); 
 682 static void slaveofCommand(redisClient 
*c
); 
 683 static void debugCommand(redisClient 
*c
); 
 684 static void msetCommand(redisClient 
*c
); 
 685 static void msetnxCommand(redisClient 
*c
); 
 686 static void zaddCommand(redisClient 
*c
); 
 687 static void zincrbyCommand(redisClient 
*c
); 
 688 static void zrangeCommand(redisClient 
*c
); 
 689 static void zrangebyscoreCommand(redisClient 
*c
); 
 690 static void zcountCommand(redisClient 
*c
); 
 691 static void zrevrangeCommand(redisClient 
*c
); 
 692 static void zcardCommand(redisClient 
*c
); 
 693 static void zremCommand(redisClient 
*c
); 
 694 static void zscoreCommand(redisClient 
*c
); 
 695 static void zremrangebyscoreCommand(redisClient 
*c
); 
 696 static void multiCommand(redisClient 
*c
); 
 697 static void execCommand(redisClient 
*c
); 
 698 static void discardCommand(redisClient 
*c
); 
 699 static void blpopCommand(redisClient 
*c
); 
 700 static void brpopCommand(redisClient 
*c
); 
 701 static void appendCommand(redisClient 
*c
); 
 702 static void substrCommand(redisClient 
*c
); 
 703 static void zrankCommand(redisClient 
*c
); 
 704 static void zrevrankCommand(redisClient 
*c
); 
 705 static void hsetCommand(redisClient 
*c
); 
 706 static void hgetCommand(redisClient 
*c
); 
 707 static void hdelCommand(redisClient 
*c
); 
 708 static void hlenCommand(redisClient 
*c
); 
 709 static void zremrangebyrankCommand(redisClient 
*c
); 
 710 static void zunionCommand(redisClient 
*c
); 
 711 static void zinterCommand(redisClient 
*c
); 
 712 static void hkeysCommand(redisClient 
*c
); 
 713 static void hvalsCommand(redisClient 
*c
); 
 714 static void hgetallCommand(redisClient 
*c
); 
 715 static void hexistsCommand(redisClient 
*c
); 
 716 static void configCommand(redisClient 
*c
); 
 717 static void hincrbyCommand(redisClient 
*c
); 
 718 static void subscribeCommand(redisClient 
*c
); 
 719 static void unsubscribeCommand(redisClient 
*c
); 
 720 static void psubscribeCommand(redisClient 
*c
); 
 721 static void punsubscribeCommand(redisClient 
*c
); 
 722 static void publishCommand(redisClient 
*c
); 
 724 /*================================= Globals ================================= */ 
 727 static struct redisServer server
; /* server global state */ 
 728 static struct redisCommand cmdTable
[] = { 
 729     {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 730     {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0}, 
 731     {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0}, 
 732     {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 733     {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 734     {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 735     {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 736     {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 737     {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 738     {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1}, 
 739     {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 740     {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 741     {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 742     {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 743     {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 744     {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 745     {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 746     {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 747     {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 748     {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 749     {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 750     {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 751     {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1}, 
 752     {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 753     {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 754     {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1}, 
 755     {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 756     {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 757     {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 758     {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 759     {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1}, 
 760     {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1}, 
 761     {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1}, 
 762     {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1}, 
 763     {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1}, 
 764     {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1}, 
 765     {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 766     {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 767     {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 768     {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 769     {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 770     {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 771     {"zunion",zunionCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0}, 
 772     {"zinter",zinterCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0}, 
 773     {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 774     {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 775     {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 776     {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 777     {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 778     {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 779     {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 780     {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 781     {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 782     {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 783     {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 784     {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 785     {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 786     {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 787     {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 788     {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 789     {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 790     {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 791     {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 792     {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 793     {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2}, 
 794     {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2}, 
 795     {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 796     {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 797     {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 798     {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 799     {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 800     {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 801     {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 802     {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 803     {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 804     {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 805     {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 806     {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0}, 
 807     {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 808     {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 809     {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 810     {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 811     {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 812     {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 813     {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 814     {"exec",execCommand
,1,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,0,0,0}, 
 815     {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 816     {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 817     {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 818     {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 819     {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 820     {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 821     {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 822     {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 823     {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 824     {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 825     {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0}, 
 826     {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 827     {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 828     {"psubscribe",psubscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 829     {"punsubscribe",punsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 830     {"publish",publishCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_FORCE_REPLICATION
,NULL
,0,0,0}, 
 831     {NULL
,NULL
,0,0,NULL
,0,0,0} 
 834 /*============================ Utility functions ============================ */ 
 836 /* Glob-style pattern matching. */ 
 837 static int stringmatchlen(const char *pattern
, int patternLen
, 
 838         const char *string
, int stringLen
, int nocase
) 
 843             while (pattern
[1] == '*') { 
 848                 return 1; /* match */ 
 850                 if (stringmatchlen(pattern
+1, patternLen
-1, 
 851                             string
, stringLen
, nocase
)) 
 852                     return 1; /* match */ 
 856             return 0; /* no match */ 
 860                 return 0; /* no match */ 
 870             not = pattern
[0] == '^'; 
 877                 if (pattern
[0] == '\\') { 
 880                     if (pattern
[0] == string
[0]) 
 882                 } else if (pattern
[0] == ']') { 
 884                 } else if (patternLen 
== 0) { 
 888                 } else if (pattern
[1] == '-' && patternLen 
>= 3) { 
 889                     int start 
= pattern
[0]; 
 890                     int end 
= pattern
[2]; 
 898                         start 
= tolower(start
); 
 904                     if (c 
>= start 
&& c 
<= end
) 
 908                         if (pattern
[0] == string
[0]) 
 911                         if (tolower((int)pattern
[0]) == tolower((int)string
[0])) 
 921                 return 0; /* no match */ 
 927             if (patternLen 
>= 2) { 
 934                 if (pattern
[0] != string
[0]) 
 935                     return 0; /* no match */ 
 937                 if (tolower((int)pattern
[0]) != tolower((int)string
[0])) 
 938                     return 0; /* no match */ 
 946         if (stringLen 
== 0) { 
 947             while(*pattern 
== '*') { 
 954     if (patternLen 
== 0 && stringLen 
== 0) 
 959 static int stringmatch(const char *pattern
, const char *string
, int nocase
) { 
 960     return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
); 
 963 static void redisLog(int level
, const char *fmt
, ...) { 
 967     fp 
= (server
.logfile 
== NULL
) ? stdout 
: fopen(server
.logfile
,"a"); 
 971     if (level 
>= server
.verbosity
) { 
 977         strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
)); 
 978         fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]); 
 979         vfprintf(fp
, fmt
, ap
); 
 985     if (server
.logfile
) fclose(fp
); 
 988 /*====================== Hash table type implementation  ==================== */ 
 990 /* This is an hash table type that uses the SDS dynamic strings libary as 
 991  * keys and radis objects as values (objects can hold SDS strings, 
 994 static void dictVanillaFree(void *privdata
, void *val
) 
 996     DICT_NOTUSED(privdata
); 
1000 static void dictListDestructor(void *privdata
, void *val
) 
1002     DICT_NOTUSED(privdata
); 
1003     listRelease((list
*)val
); 
1006 static int sdsDictKeyCompare(void *privdata
, const void *key1
, 
1010     DICT_NOTUSED(privdata
); 
1012     l1 
= sdslen((sds
)key1
); 
1013     l2 
= sdslen((sds
)key2
); 
1014     if (l1 
!= l2
) return 0; 
1015     return memcmp(key1
, key2
, l1
) == 0; 
1018 static void dictRedisObjectDestructor(void *privdata
, void *val
) 
1020     DICT_NOTUSED(privdata
); 
1022     if (val 
== NULL
) return; /* Values of swapped out keys as set to NULL */ 
1026 static int dictObjKeyCompare(void *privdata
, const void *key1
, 
1029     const robj 
*o1 
= key1
, *o2 
= key2
; 
1030     return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
); 
1033 static unsigned int dictObjHash(const void *key
) { 
1034     const robj 
*o 
= key
; 
1035     return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
)); 
1038 static int dictEncObjKeyCompare(void *privdata
, const void *key1
, 
1041     robj 
*o1 
= (robj
*) key1
, *o2 
= (robj
*) key2
; 
1044     if (o1
->encoding 
== REDIS_ENCODING_INT 
&& 
1045         o2
->encoding 
== REDIS_ENCODING_INT 
&& 
1046         o1
->ptr 
== o2
->ptr
) return 1; 
1048     o1 
= getDecodedObject(o1
); 
1049     o2 
= getDecodedObject(o2
); 
1050     cmp 
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
); 
1056 static unsigned int dictEncObjHash(const void *key
) { 
1057     robj 
*o 
= (robj
*) key
; 
1059     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
1060         return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
)); 
1062         if (o
->encoding 
== REDIS_ENCODING_INT
) { 
1066             len 
= snprintf(buf
,32,"%ld",(long)o
->ptr
); 
1067             return dictGenHashFunction((unsigned char*)buf
, len
); 
1071             o 
= getDecodedObject(o
); 
1072             hash 
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
)); 
1079 /* Sets type and expires */ 
1080 static dictType setDictType 
= { 
1081     dictEncObjHash
,            /* hash function */ 
1084     dictEncObjKeyCompare
,      /* key compare */ 
1085     dictRedisObjectDestructor
, /* key destructor */ 
1086     NULL                       
/* val destructor */ 
1089 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */ 
1090 static dictType zsetDictType 
= { 
1091     dictEncObjHash
,            /* hash function */ 
1094     dictEncObjKeyCompare
,      /* key compare */ 
1095     dictRedisObjectDestructor
, /* key destructor */ 
1096     dictVanillaFree            
/* val destructor of malloc(sizeof(double)) */ 
1100 static dictType dbDictType 
= { 
1101     dictObjHash
,                /* hash function */ 
1104     dictObjKeyCompare
,          /* key compare */ 
1105     dictRedisObjectDestructor
,  /* key destructor */ 
1106     dictRedisObjectDestructor   
/* val destructor */ 
1110 static dictType keyptrDictType 
= { 
1111     dictObjHash
,               /* hash function */ 
1114     dictObjKeyCompare
,         /* key compare */ 
1115     dictRedisObjectDestructor
, /* key destructor */ 
1116     NULL                       
/* val destructor */ 
1119 /* Hash type hash table (note that small hashes are represented with zimpaps) */ 
1120 static dictType hashDictType 
= { 
1121     dictEncObjHash
,             /* hash function */ 
1124     dictEncObjKeyCompare
,       /* key compare */ 
1125     dictRedisObjectDestructor
,  /* key destructor */ 
1126     dictRedisObjectDestructor   
/* val destructor */ 
1129 /* Keylist hash table type has unencoded redis objects as keys and 
1130  * lists as values. It's used for blocking operations (BLPOP) and to 
1131  * map swapped keys to a list of clients waiting for this keys to be loaded. */ 
1132 static dictType keylistDictType 
= { 
1133     dictObjHash
,                /* hash function */ 
1136     dictObjKeyCompare
,          /* key compare */ 
1137     dictRedisObjectDestructor
,  /* key destructor */ 
1138     dictListDestructor          
/* val destructor */ 
1141 static void version(); 
1143 /* ========================= Random utility functions ======================= */ 
1145 /* Redis generally does not try to recover from out of memory conditions 
1146  * when allocating objects or strings, it is not clear if it will be possible 
1147  * to report this condition to the client since the networking layer itself 
1148  * is based on heap allocation for send buffers, so we simply abort. 
1149  * At least the code will be simpler to read... */ 
1150 static void oom(const char *msg
) { 
1151     redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
); 
1156 /* ====================== Redis server networking stuff ===================== */ 
1157 static void closeTimedoutClients(void) { 
1160     time_t now 
= time(NULL
); 
1163     listRewind(server
.clients
,&li
); 
1164     while ((ln 
= listNext(&li
)) != NULL
) { 
1165         c 
= listNodeValue(ln
); 
1166         if (server
.maxidletime 
&& 
1167             !(c
->flags 
& REDIS_SLAVE
) &&    /* no timeout for slaves */ 
1168             !(c
->flags 
& REDIS_MASTER
) &&   /* no timeout for masters */ 
1169             dictSize(c
->pubsub_channels
) == 0 && /* no timeout for pubsub */ 
1170             listLength(c
->pubsub_patterns
) == 0 && 
1171             (now 
- c
->lastinteraction 
> server
.maxidletime
)) 
1173             redisLog(REDIS_VERBOSE
,"Closing idle client"); 
1175         } else if (c
->flags 
& REDIS_BLOCKED
) { 
1176             if (c
->blockingto 
!= 0 && c
->blockingto 
< now
) { 
1177                 addReply(c
,shared
.nullmultibulk
); 
1178                 unblockClientWaitingData(c
); 
1184 static int htNeedsResize(dict 
*dict
) { 
1185     long long size
, used
; 
1187     size 
= dictSlots(dict
); 
1188     used 
= dictSize(dict
); 
1189     return (size 
&& used 
&& size 
> DICT_HT_INITIAL_SIZE 
&& 
1190             (used
*100/size 
< REDIS_HT_MINFILL
)); 
1193 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL 
1194  * we resize the hash table to save memory */ 
1195 static void tryResizeHashTables(void) { 
1198     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1199         if (htNeedsResize(server
.db
[j
].dict
)) { 
1200             redisLog(REDIS_VERBOSE
,"The hash table %d is too sparse, resize it...",j
); 
1201             dictResize(server
.db
[j
].dict
); 
1202             redisLog(REDIS_VERBOSE
,"Hash table %d resized.",j
); 
1204         if (htNeedsResize(server
.db
[j
].expires
)) 
1205             dictResize(server
.db
[j
].expires
); 
1209 /* A background saving child (BGSAVE) terminated its work. Handle this. */ 
1210 void backgroundSaveDoneHandler(int statloc
) { 
1211     int exitcode 
= WEXITSTATUS(statloc
); 
1212     int bysignal 
= WIFSIGNALED(statloc
); 
1214     if (!bysignal 
&& exitcode 
== 0) { 
1215         redisLog(REDIS_NOTICE
, 
1216             "Background saving terminated with success"); 
1218         server
.lastsave 
= time(NULL
); 
1219     } else if (!bysignal 
&& exitcode 
!= 0) { 
1220         redisLog(REDIS_WARNING
, "Background saving error"); 
1222         redisLog(REDIS_WARNING
, 
1223             "Background saving terminated by signal %d", WTERMSIG(statloc
)); 
1224         rdbRemoveTempFile(server
.bgsavechildpid
); 
1226     server
.bgsavechildpid 
= -1; 
1227     /* Possibly there are slaves waiting for a BGSAVE in order to be served 
1228      * (the first stage of SYNC is a bulk transfer of dump.rdb) */ 
1229     updateSlavesWaitingBgsave(exitcode 
== 0 ? REDIS_OK 
: REDIS_ERR
); 
1232 /* A background append only file rewriting (BGREWRITEAOF) terminated its work. 
1234 void backgroundRewriteDoneHandler(int statloc
) { 
1235     int exitcode 
= WEXITSTATUS(statloc
); 
1236     int bysignal 
= WIFSIGNALED(statloc
); 
1238     if (!bysignal 
&& exitcode 
== 0) { 
1242         redisLog(REDIS_NOTICE
, 
1243             "Background append only file rewriting terminated with success"); 
1244         /* Now it's time to flush the differences accumulated by the parent */ 
1245         snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
); 
1246         fd 
= open(tmpfile
,O_WRONLY
|O_APPEND
); 
1248             redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
)); 
1251         /* Flush our data... */ 
1252         if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) != 
1253                 (signed) sdslen(server
.bgrewritebuf
)) { 
1254             redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
)); 
1258         redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
)); 
1259         /* Now our work is to rename the temp file into the stable file. And 
1260          * switch the file descriptor used by the server for append only. */ 
1261         if (rename(tmpfile
,server
.appendfilename
) == -1) { 
1262             redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
)); 
1266         /* Mission completed... almost */ 
1267         redisLog(REDIS_NOTICE
,"Append only file successfully rewritten."); 
1268         if (server
.appendfd 
!= -1) { 
1269             /* If append only is actually enabled... */ 
1270             close(server
.appendfd
); 
1271             server
.appendfd 
= fd
; 
1273             server
.appendseldb 
= -1; /* Make sure it will issue SELECT */ 
1274             redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends."); 
1276             /* If append only is disabled we just generate a dump in this 
1277              * format. Why not? */ 
1280     } else if (!bysignal 
&& exitcode 
!= 0) { 
1281         redisLog(REDIS_WARNING
, "Background append only file rewriting error"); 
1283         redisLog(REDIS_WARNING
, 
1284             "Background append only file rewriting terminated by signal %d", 
1288     sdsfree(server
.bgrewritebuf
); 
1289     server
.bgrewritebuf 
= sdsempty(); 
1290     aofRemoveTempFile(server
.bgrewritechildpid
); 
1291     server
.bgrewritechildpid 
= -1; 
1294 /* This function is called once a background process of some kind terminates, 
1295  * as we want to avoid resizing the hash tables when there is a child in order 
1296  * to play well with copy-on-write (otherwise when a resize happens lots of 
1297  * memory pages are copied). The goal of this function is to update the ability 
1298  * for dict.c to resize the hash tables accordingly to the fact we have o not 
1299  * running childs. */ 
1300 static void updateDictResizePolicy(void) { 
1301     if (server
.bgsavechildpid 
== -1 && server
.bgrewritechildpid 
== -1) 
1304         dictDisableResize(); 
1307 static int serverCron(struct aeEventLoop 
*eventLoop
, long long id
, void *clientData
) { 
1308     int j
, loops 
= server
.cronloops
++; 
1309     REDIS_NOTUSED(eventLoop
); 
1311     REDIS_NOTUSED(clientData
); 
1313     /* We take a cached value of the unix time in the global state because 
1314      * with virtual memory and aging there is to store the current time 
1315      * in objects at every object access, and accuracy is not needed. 
1316      * To access a global var is faster than calling time(NULL) */ 
1317     server
.unixtime 
= time(NULL
); 
1319     /* Show some info about non-empty databases */ 
1320     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1321         long long size
, used
, vkeys
; 
1323         size 
= dictSlots(server
.db
[j
].dict
); 
1324         used 
= dictSize(server
.db
[j
].dict
); 
1325         vkeys 
= dictSize(server
.db
[j
].expires
); 
1326         if (!(loops 
% 50) && (used 
|| vkeys
)) { 
1327             redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
); 
1328             /* dictPrintStats(server.dict); */ 
1332     /* We don't want to resize the hash tables while a bacground saving 
1333      * is in progress: the saving child is created using fork() that is 
1334      * implemented with a copy-on-write semantic in most modern systems, so 
1335      * if we resize the HT while there is the saving child at work actually 
1336      * a lot of memory movements in the parent will cause a lot of pages 
1338     if (server
.bgsavechildpid 
== -1 && server
.bgrewritechildpid 
== -1 && 
1341         tryResizeHashTables(); 
1344     /* Show information about connected clients */ 
1345     if (!(loops 
% 50)) { 
1346         redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use", 
1347             listLength(server
.clients
)-listLength(server
.slaves
), 
1348             listLength(server
.slaves
), 
1349             zmalloc_used_memory()); 
1352     /* Close connections of timedout clients */ 
1353     if ((server
.maxidletime 
&& !(loops 
% 100)) || server
.blpop_blocked_clients
) 
1354         closeTimedoutClients(); 
1356     /* Check if a background saving or AOF rewrite in progress terminated */ 
1357     if (server
.bgsavechildpid 
!= -1 || server
.bgrewritechildpid 
!= -1) { 
1361         if ((pid 
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) { 
1362             if (pid 
== server
.bgsavechildpid
) { 
1363                 backgroundSaveDoneHandler(statloc
); 
1365                 backgroundRewriteDoneHandler(statloc
); 
1367             updateDictResizePolicy(); 
1370         /* If there is not a background saving in progress check if 
1371          * we have to save now */ 
1372          time_t now 
= time(NULL
); 
1373          for (j 
= 0; j 
< server
.saveparamslen
; j
++) { 
1374             struct saveparam 
*sp 
= server
.saveparams
+j
; 
1376             if (server
.dirty 
>= sp
->changes 
&& 
1377                 now
-server
.lastsave 
> sp
->seconds
) { 
1378                 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...", 
1379                     sp
->changes
, sp
->seconds
); 
1380                 rdbSaveBackground(server
.dbfilename
); 
1386     /* Try to expire a few timed out keys. The algorithm used is adaptive and 
1387      * will use few CPU cycles if there are few expiring keys, otherwise 
1388      * it will get more aggressive to avoid that too much memory is used by 
1389      * keys that can be removed from the keyspace. */ 
1390     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1392         redisDb 
*db 
= server
.db
+j
; 
1394         /* Continue to expire if at the end of the cycle more than 25% 
1395          * of the keys were expired. */ 
1397             long num 
= dictSize(db
->expires
); 
1398             time_t now 
= time(NULL
); 
1401             if (num 
> REDIS_EXPIRELOOKUPS_PER_CRON
) 
1402                 num 
= REDIS_EXPIRELOOKUPS_PER_CRON
; 
1407                 if ((de 
= dictGetRandomKey(db
->expires
)) == NULL
) break; 
1408                 t 
= (time_t) dictGetEntryVal(de
); 
1410                     deleteKey(db
,dictGetEntryKey(de
)); 
1412                     server
.stat_expiredkeys
++; 
1415         } while (expired 
> REDIS_EXPIRELOOKUPS_PER_CRON
/4); 
1418     /* Swap a few keys on disk if we are over the memory limit and VM 
1419      * is enbled. Try to free objects from the free list first. */ 
1420     if (vmCanSwapOut()) { 
1421         while (server
.vm_enabled 
&& zmalloc_used_memory() > 
1422                 server
.vm_max_memory
) 
1426             if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue; 
1427             retval 
= (server
.vm_max_threads 
== 0) ? 
1428                         vmSwapOneObjectBlocking() : 
1429                         vmSwapOneObjectThreaded(); 
1430             if (retval 
== REDIS_ERR 
&& !(loops 
% 300) && 
1431                 zmalloc_used_memory() > 
1432                 (server
.vm_max_memory
+server
.vm_max_memory
/10)) 
1434                 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!"); 
1436             /* Note that when using threade I/O we free just one object, 
1437              * because anyway when the I/O thread in charge to swap this 
1438              * object out will finish, the handler of completed jobs 
1439              * will try to swap more objects if we are still out of memory. */ 
1440             if (retval 
== REDIS_ERR 
|| server
.vm_max_threads 
> 0) break; 
1444     /* Check if we should connect to a MASTER */ 
1445     if (server
.replstate 
== REDIS_REPL_CONNECT 
&& !(loops 
% 10)) { 
1446         redisLog(REDIS_NOTICE
,"Connecting to MASTER..."); 
1447         if (syncWithMaster() == REDIS_OK
) { 
1448             redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded"); 
1454 /* This function gets called every time Redis is entering the 
1455  * main loop of the event driven library, that is, before to sleep 
1456  * for ready file descriptors. */ 
1457 static void beforeSleep(struct aeEventLoop 
*eventLoop
) { 
1458     REDIS_NOTUSED(eventLoop
); 
1460     if (server
.vm_enabled 
&& listLength(server
.io_ready_clients
)) { 
1464         listRewind(server
.io_ready_clients
,&li
); 
1465         while((ln 
= listNext(&li
))) { 
1466             redisClient 
*c 
= ln
->value
; 
1467             struct redisCommand 
*cmd
; 
1469             /* Resume the client. */ 
1470             listDelNode(server
.io_ready_clients
,ln
); 
1471             c
->flags 
&= (~REDIS_IO_WAIT
); 
1472             server
.vm_blocked_clients
--; 
1473             aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
, 
1474                 readQueryFromClient
, c
); 
1475             cmd 
= lookupCommand(c
->argv
[0]->ptr
); 
1476             assert(cmd 
!= NULL
); 
1479             /* There may be more data to process in the input buffer. */ 
1480             if (c
->querybuf 
&& sdslen(c
->querybuf
) > 0) 
1481                 processInputBuffer(c
); 
1486 static void createSharedObjects(void) { 
1487     shared
.crlf 
= createObject(REDIS_STRING
,sdsnew("\r\n")); 
1488     shared
.ok 
= createObject(REDIS_STRING
,sdsnew("+OK\r\n")); 
1489     shared
.err 
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n")); 
1490     shared
.emptybulk 
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n")); 
1491     shared
.czero 
= createObject(REDIS_STRING
,sdsnew(":0\r\n")); 
1492     shared
.cone 
= createObject(REDIS_STRING
,sdsnew(":1\r\n")); 
1493     shared
.nullbulk 
= createObject(REDIS_STRING
,sdsnew("$-1\r\n")); 
1494     shared
.nullmultibulk 
= createObject(REDIS_STRING
,sdsnew("*-1\r\n")); 
1495     shared
.emptymultibulk 
= createObject(REDIS_STRING
,sdsnew("*0\r\n")); 
1496     shared
.pong 
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n")); 
1497     shared
.queued 
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n")); 
1498     shared
.wrongtypeerr 
= createObject(REDIS_STRING
,sdsnew( 
1499         "-ERR Operation against a key holding the wrong kind of value\r\n")); 
1500     shared
.nokeyerr 
= createObject(REDIS_STRING
,sdsnew( 
1501         "-ERR no such key\r\n")); 
1502     shared
.syntaxerr 
= createObject(REDIS_STRING
,sdsnew( 
1503         "-ERR syntax error\r\n")); 
1504     shared
.sameobjecterr 
= createObject(REDIS_STRING
,sdsnew( 
1505         "-ERR source and destination objects are the same\r\n")); 
1506     shared
.outofrangeerr 
= createObject(REDIS_STRING
,sdsnew( 
1507         "-ERR index out of range\r\n")); 
1508     shared
.space 
= createObject(REDIS_STRING
,sdsnew(" ")); 
1509     shared
.colon 
= createObject(REDIS_STRING
,sdsnew(":")); 
1510     shared
.plus 
= createObject(REDIS_STRING
,sdsnew("+")); 
1511     shared
.select0 
= createStringObject("select 0\r\n",10); 
1512     shared
.select1 
= createStringObject("select 1\r\n",10); 
1513     shared
.select2 
= createStringObject("select 2\r\n",10); 
1514     shared
.select3 
= createStringObject("select 3\r\n",10); 
1515     shared
.select4 
= createStringObject("select 4\r\n",10); 
1516     shared
.select5 
= createStringObject("select 5\r\n",10); 
1517     shared
.select6 
= createStringObject("select 6\r\n",10); 
1518     shared
.select7 
= createStringObject("select 7\r\n",10); 
1519     shared
.select8 
= createStringObject("select 8\r\n",10); 
1520     shared
.select9 
= createStringObject("select 9\r\n",10); 
1521     shared
.messagebulk 
= createStringObject("$7\r\nmessage\r\n",13); 
1522     shared
.subscribebulk 
= createStringObject("$9\r\nsubscribe\r\n",15); 
1523     shared
.unsubscribebulk 
= createStringObject("$11\r\nunsubscribe\r\n",18); 
1524     shared
.psubscribebulk 
= createStringObject("$10\r\npsubscribe\r\n",17); 
1525     shared
.punsubscribebulk 
= createStringObject("$12\r\npunsubscribe\r\n",19); 
1526     shared
.mbulk3 
= createStringObject("*3\r\n",4); 
1529 static void appendServerSaveParams(time_t seconds
, int changes
) { 
1530     server
.saveparams 
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1)); 
1531     server
.saveparams
[server
.saveparamslen
].seconds 
= seconds
; 
1532     server
.saveparams
[server
.saveparamslen
].changes 
= changes
; 
1533     server
.saveparamslen
++; 
1536 static void resetServerSaveParams() { 
1537     zfree(server
.saveparams
); 
1538     server
.saveparams 
= NULL
; 
1539     server
.saveparamslen 
= 0; 
1542 static void initServerConfig() { 
1543     server
.dbnum 
= REDIS_DEFAULT_DBNUM
; 
1544     server
.port 
= REDIS_SERVERPORT
; 
1545     server
.verbosity 
= REDIS_VERBOSE
; 
1546     server
.maxidletime 
= REDIS_MAXIDLETIME
; 
1547     server
.saveparams 
= NULL
; 
1548     server
.logfile 
= NULL
; /* NULL = log on standard output */ 
1549     server
.bindaddr 
= NULL
; 
1550     server
.glueoutputbuf 
= 1; 
1551     server
.daemonize 
= 0; 
1552     server
.appendonly 
= 0; 
1553     server
.appendfsync 
= APPENDFSYNC_ALWAYS
; 
1554     server
.lastfsync 
= time(NULL
); 
1555     server
.appendfd 
= -1; 
1556     server
.appendseldb 
= -1; /* Make sure the first time will not match */ 
1557     server
.pidfile 
= zstrdup("/var/run/redis.pid"); 
1558     server
.dbfilename 
= zstrdup("dump.rdb"); 
1559     server
.appendfilename 
= zstrdup("appendonly.aof"); 
1560     server
.requirepass 
= NULL
; 
1561     server
.shareobjects 
= 0; 
1562     server
.rdbcompression 
= 1; 
1563     server
.maxclients 
= 0; 
1564     server
.blpop_blocked_clients 
= 0; 
1565     server
.maxmemory 
= 0; 
1566     server
.vm_enabled 
= 0; 
1567     server
.vm_swap_file 
= zstrdup("/tmp/redis-%p.vm"); 
1568     server
.vm_page_size 
= 256;          /* 256 bytes per page */ 
1569     server
.vm_pages 
= 1024*1024*100;    /* 104 millions of pages */ 
1570     server
.vm_max_memory 
= 1024LL*1024*1024*1; /* 1 GB of RAM */ 
1571     server
.vm_max_threads 
= 4; 
1572     server
.vm_blocked_clients 
= 0; 
1573     server
.hash_max_zipmap_entries 
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
; 
1574     server
.hash_max_zipmap_value 
= REDIS_HASH_MAX_ZIPMAP_VALUE
; 
1576     resetServerSaveParams(); 
1578     appendServerSaveParams(60*60,1);  /* save after 1 hour and 1 change */ 
1579     appendServerSaveParams(300,100);  /* save after 5 minutes and 100 changes */ 
1580     appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */ 
1581     /* Replication related */ 
1583     server
.masterauth 
= NULL
; 
1584     server
.masterhost 
= NULL
; 
1585     server
.masterport 
= 6379; 
1586     server
.master 
= NULL
; 
1587     server
.replstate 
= REDIS_REPL_NONE
; 
1589     /* Double constants initialization */ 
1591     R_PosInf 
= 1.0/R_Zero
; 
1592     R_NegInf 
= -1.0/R_Zero
; 
1593     R_Nan 
= R_Zero
/R_Zero
; 
1596 static void initServer() { 
1599     signal(SIGHUP
, SIG_IGN
); 
1600     signal(SIGPIPE
, SIG_IGN
); 
1601     setupSigSegvAction(); 
1603     server
.devnull 
= fopen("/dev/null","w"); 
1604     if (server
.devnull 
== NULL
) { 
1605         redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
); 
1608     server
.clients 
= listCreate(); 
1609     server
.slaves 
= listCreate(); 
1610     server
.monitors 
= listCreate(); 
1611     server
.objfreelist 
= listCreate(); 
1612     createSharedObjects(); 
1613     server
.el 
= aeCreateEventLoop(); 
1614     server
.db 
= zmalloc(sizeof(redisDb
)*server
.dbnum
); 
1615     server
.fd 
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
); 
1616     if (server
.fd 
== -1) { 
1617         redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
); 
1620     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1621         server
.db
[j
].dict 
= dictCreate(&dbDictType
,NULL
); 
1622         server
.db
[j
].expires 
= dictCreate(&keyptrDictType
,NULL
); 
1623         server
.db
[j
].blockingkeys 
= dictCreate(&keylistDictType
,NULL
); 
1624         if (server
.vm_enabled
) 
1625             server
.db
[j
].io_keys 
= dictCreate(&keylistDictType
,NULL
); 
1626         server
.db
[j
].id 
= j
; 
1628     server
.pubsub_channels 
= dictCreate(&keylistDictType
,NULL
); 
1629     server
.pubsub_patterns 
= listCreate(); 
1630     listSetFreeMethod(server
.pubsub_patterns
,freePubsubPattern
); 
1631     listSetMatchMethod(server
.pubsub_patterns
,listMatchPubsubPattern
); 
1632     server
.cronloops 
= 0; 
1633     server
.bgsavechildpid 
= -1; 
1634     server
.bgrewritechildpid 
= -1; 
1635     server
.bgrewritebuf 
= sdsempty(); 
1636     server
.lastsave 
= time(NULL
); 
1638     server
.stat_numcommands 
= 0; 
1639     server
.stat_numconnections 
= 0; 
1640     server
.stat_expiredkeys 
= 0; 
1641     server
.stat_starttime 
= time(NULL
); 
1642     server
.unixtime 
= time(NULL
); 
1643     aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
); 
1644     if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
, 
1645         acceptHandler
, NULL
) == AE_ERR
) oom("creating file event"); 
1647     if (server
.appendonly
) { 
1648         server
.appendfd 
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644); 
1649         if (server
.appendfd 
== -1) { 
1650             redisLog(REDIS_WARNING
, "Can't open the append-only file: %s", 
1656     if (server
.vm_enabled
) vmInit(); 
1659 /* Empty the whole database */ 
1660 static long long emptyDb() { 
1662     long long removed 
= 0; 
1664     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1665         removed 
+= dictSize(server
.db
[j
].dict
); 
1666         dictEmpty(server
.db
[j
].dict
); 
1667         dictEmpty(server
.db
[j
].expires
); 
1672 static int yesnotoi(char *s
) { 
1673     if (!strcasecmp(s
,"yes")) return 1; 
1674     else if (!strcasecmp(s
,"no")) return 0; 
1678 /* I agree, this is a very rudimental way to load a configuration... 
1679    will improve later if the config gets more complex */ 
1680 static void loadServerConfig(char *filename
) { 
1682     char buf
[REDIS_CONFIGLINE_MAX
+1], *err 
= NULL
; 
1685     char *errormsg 
= "Fatal error, can't open config file '%s'"; 
1686     char *errorbuf 
= zmalloc(sizeof(char)*(strlen(errormsg
)+strlen(filename
))); 
1687     sprintf(errorbuf
, errormsg
, filename
); 
1689     if (filename
[0] == '-' && filename
[1] == '\0') 
1692         if ((fp 
= fopen(filename
,"r")) == NULL
) { 
1693             redisLog(REDIS_WARNING
, errorbuf
); 
1698     while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) { 
1704         line 
= sdstrim(line
," \t\r\n"); 
1706         /* Skip comments and blank lines*/ 
1707         if (line
[0] == '#' || line
[0] == '\0') { 
1712         /* Split into arguments */ 
1713         argv 
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
); 
1714         sdstolower(argv
[0]); 
1716         /* Execute config directives */ 
1717         if (!strcasecmp(argv
[0],"timeout") && argc 
== 2) { 
1718             server
.maxidletime 
= atoi(argv
[1]); 
1719             if (server
.maxidletime 
< 0) { 
1720                 err 
= "Invalid timeout value"; goto loaderr
; 
1722         } else if (!strcasecmp(argv
[0],"port") && argc 
== 2) { 
1723             server
.port 
= atoi(argv
[1]); 
1724             if (server
.port 
< 1 || server
.port 
> 65535) { 
1725                 err 
= "Invalid port"; goto loaderr
; 
1727         } else if (!strcasecmp(argv
[0],"bind") && argc 
== 2) { 
1728             server
.bindaddr 
= zstrdup(argv
[1]); 
1729         } else if (!strcasecmp(argv
[0],"save") && argc 
== 3) { 
1730             int seconds 
= atoi(argv
[1]); 
1731             int changes 
= atoi(argv
[2]); 
1732             if (seconds 
< 1 || changes 
< 0) { 
1733                 err 
= "Invalid save parameters"; goto loaderr
; 
1735             appendServerSaveParams(seconds
,changes
); 
1736         } else if (!strcasecmp(argv
[0],"dir") && argc 
== 2) { 
1737             if (chdir(argv
[1]) == -1) { 
1738                 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s", 
1739                     argv
[1], strerror(errno
)); 
1742         } else if (!strcasecmp(argv
[0],"loglevel") && argc 
== 2) { 
1743             if (!strcasecmp(argv
[1],"debug")) server
.verbosity 
= REDIS_DEBUG
; 
1744             else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity 
= REDIS_VERBOSE
; 
1745             else if (!strcasecmp(argv
[1],"notice")) server
.verbosity 
= REDIS_NOTICE
; 
1746             else if (!strcasecmp(argv
[1],"warning")) server
.verbosity 
= REDIS_WARNING
; 
1748                 err 
= "Invalid log level. Must be one of debug, notice, warning"; 
1751         } else if (!strcasecmp(argv
[0],"logfile") && argc 
== 2) { 
1754             server
.logfile 
= zstrdup(argv
[1]); 
1755             if (!strcasecmp(server
.logfile
,"stdout")) { 
1756                 zfree(server
.logfile
); 
1757                 server
.logfile 
= NULL
; 
1759             if (server
.logfile
) { 
1760                 /* Test if we are able to open the file. The server will not 
1761                  * be able to abort just for this problem later... */ 
1762                 logfp 
= fopen(server
.logfile
,"a"); 
1763                 if (logfp 
== NULL
) { 
1764                     err 
= sdscatprintf(sdsempty(), 
1765                         "Can't open the log file: %s", strerror(errno
)); 
1770         } else if (!strcasecmp(argv
[0],"databases") && argc 
== 2) { 
1771             server
.dbnum 
= atoi(argv
[1]); 
1772             if (server
.dbnum 
< 1) { 
1773                 err 
= "Invalid number of databases"; goto loaderr
; 
1775         } else if (!strcasecmp(argv
[0],"include") && argc 
== 2) { 
1776             loadServerConfig(argv
[1]); 
1777         } else if (!strcasecmp(argv
[0],"maxclients") && argc 
== 2) { 
1778             server
.maxclients 
= atoi(argv
[1]); 
1779         } else if (!strcasecmp(argv
[0],"maxmemory") && argc 
== 2) { 
1780             server
.maxmemory 
= strtoll(argv
[1], NULL
, 10); 
1781         } else if (!strcasecmp(argv
[0],"slaveof") && argc 
== 3) { 
1782             server
.masterhost 
= sdsnew(argv
[1]); 
1783             server
.masterport 
= atoi(argv
[2]); 
1784             server
.replstate 
= REDIS_REPL_CONNECT
; 
1785         } else if (!strcasecmp(argv
[0],"masterauth") && argc 
== 2) { 
1786                 server
.masterauth 
= zstrdup(argv
[1]); 
1787         } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc 
== 2) { 
1788             if ((server
.glueoutputbuf 
= yesnotoi(argv
[1])) == -1) { 
1789                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1791         } else if (!strcasecmp(argv
[0],"shareobjects") && argc 
== 2) { 
1792             if ((server
.shareobjects 
= yesnotoi(argv
[1])) == -1) { 
1793                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1795         } else if (!strcasecmp(argv
[0],"rdbcompression") && argc 
== 2) { 
1796             if ((server
.rdbcompression 
= yesnotoi(argv
[1])) == -1) { 
1797                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1799         } else if (!strcasecmp(argv
[0],"daemonize") && argc 
== 2) { 
1800             if ((server
.daemonize 
= yesnotoi(argv
[1])) == -1) { 
1801                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1803         } else if (!strcasecmp(argv
[0],"appendonly") && argc 
== 2) { 
1804             if ((server
.appendonly 
= yesnotoi(argv
[1])) == -1) { 
1805                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1807         } else if (!strcasecmp(argv
[0],"appendfsync") && argc 
== 2) { 
1808             if (!strcasecmp(argv
[1],"no")) { 
1809                 server
.appendfsync 
= APPENDFSYNC_NO
; 
1810             } else if (!strcasecmp(argv
[1],"always")) { 
1811                 server
.appendfsync 
= APPENDFSYNC_ALWAYS
; 
1812             } else if (!strcasecmp(argv
[1],"everysec")) { 
1813                 server
.appendfsync 
= APPENDFSYNC_EVERYSEC
; 
1815                 err 
= "argument must be 'no', 'always' or 'everysec'"; 
1818         } else if (!strcasecmp(argv
[0],"requirepass") && argc 
== 2) { 
1819             server
.requirepass 
= zstrdup(argv
[1]); 
1820         } else if (!strcasecmp(argv
[0],"pidfile") && argc 
== 2) { 
1821             zfree(server
.pidfile
); 
1822             server
.pidfile 
= zstrdup(argv
[1]); 
1823         } else if (!strcasecmp(argv
[0],"dbfilename") && argc 
== 2) { 
1824             zfree(server
.dbfilename
); 
1825             server
.dbfilename 
= zstrdup(argv
[1]); 
1826         } else if (!strcasecmp(argv
[0],"vm-enabled") && argc 
== 2) { 
1827             if ((server
.vm_enabled 
= yesnotoi(argv
[1])) == -1) { 
1828                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1830         } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc 
== 2) { 
1831             zfree(server
.vm_swap_file
); 
1832             server
.vm_swap_file 
= zstrdup(argv
[1]); 
1833         } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc 
== 2) { 
1834             server
.vm_max_memory 
= strtoll(argv
[1], NULL
, 10); 
1835         } else if (!strcasecmp(argv
[0],"vm-page-size") && argc 
== 2) { 
1836             server
.vm_page_size 
= strtoll(argv
[1], NULL
, 10); 
1837         } else if (!strcasecmp(argv
[0],"vm-pages") && argc 
== 2) { 
1838             server
.vm_pages 
= strtoll(argv
[1], NULL
, 10); 
1839         } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc 
== 2) { 
1840             server
.vm_max_threads 
= strtoll(argv
[1], NULL
, 10); 
1841         } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc 
== 2){ 
1842             server
.hash_max_zipmap_entries 
= strtol(argv
[1], NULL
, 10); 
1843         } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc 
== 2){ 
1844             server
.hash_max_zipmap_value 
= strtol(argv
[1], NULL
, 10); 
1845         } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc 
== 2) { 
1846             server
.vm_max_threads 
= strtoll(argv
[1], NULL
, 10); 
1848             err 
= "Bad directive or wrong number of arguments"; goto loaderr
; 
1850         for (j 
= 0; j 
< argc
; j
++) 
1855     if (fp 
!= stdin
) fclose(fp
); 
1859     fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n"); 
1860     fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
); 
1861     fprintf(stderr
, ">>> '%s'\n", line
); 
1862     fprintf(stderr
, "%s\n", err
); 
1866 static void freeClientArgv(redisClient 
*c
) { 
1869     for (j 
= 0; j 
< c
->argc
; j
++) 
1870         decrRefCount(c
->argv
[j
]); 
1871     for (j 
= 0; j 
< c
->mbargc
; j
++) 
1872         decrRefCount(c
->mbargv
[j
]); 
1877 static void freeClient(redisClient 
*c
) { 
1880     /* Note that if the client we are freeing is blocked into a blocking 
1881      * call, we have to set querybuf to NULL *before* to call 
1882      * unblockClientWaitingData() to avoid processInputBuffer() will get 
1883      * called. Also it is important to remove the file events after 
1884      * this, because this call adds the READABLE event. */ 
1885     sdsfree(c
->querybuf
); 
1887     if (c
->flags 
& REDIS_BLOCKED
) 
1888         unblockClientWaitingData(c
); 
1890     /* Unsubscribe from all the pubsub channels */ 
1891     pubsubUnsubscribeAllChannels(c
,0); 
1892     pubsubUnsubscribeAllPatterns(c
,0); 
1893     dictRelease(c
->pubsub_channels
); 
1894     listRelease(c
->pubsub_patterns
); 
1895     /* Obvious cleanup */ 
1896     aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
); 
1897     aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
); 
1898     listRelease(c
->reply
); 
1901     /* Remove from the list of clients */ 
1902     ln 
= listSearchKey(server
.clients
,c
); 
1903     redisAssert(ln 
!= NULL
); 
1904     listDelNode(server
.clients
,ln
); 
1905     /* Remove from the list of clients waiting for swapped keys */ 
1906     if (c
->flags 
& REDIS_IO_WAIT 
&& listLength(c
->io_keys
) == 0) { 
1907         ln 
= listSearchKey(server
.io_ready_clients
,c
); 
1909             listDelNode(server
.io_ready_clients
,ln
); 
1910             server
.vm_blocked_clients
--; 
1913     while (server
.vm_enabled 
&& listLength(c
->io_keys
)) { 
1914         ln 
= listFirst(c
->io_keys
); 
1915         dontWaitForSwappedKey(c
,ln
->value
); 
1917     listRelease(c
->io_keys
); 
1918     /* Master/slave cleanup */ 
1919     if (c
->flags 
& REDIS_SLAVE
) { 
1920         if (c
->replstate 
== REDIS_REPL_SEND_BULK 
&& c
->repldbfd 
!= -1) 
1922         list 
*l 
= (c
->flags 
& REDIS_MONITOR
) ? server
.monitors 
: server
.slaves
; 
1923         ln 
= listSearchKey(l
,c
); 
1924         redisAssert(ln 
!= NULL
); 
1927     if (c
->flags 
& REDIS_MASTER
) { 
1928         server
.master 
= NULL
; 
1929         server
.replstate 
= REDIS_REPL_CONNECT
; 
1931     /* Release memory */ 
1934     freeClientMultiState(c
); 
1938 #define GLUEREPLY_UP_TO (1024) 
1939 static void glueReplyBuffersIfNeeded(redisClient 
*c
) { 
1941     char buf
[GLUEREPLY_UP_TO
]; 
1946     listRewind(c
->reply
,&li
); 
1947     while((ln 
= listNext(&li
))) { 
1951         objlen 
= sdslen(o
->ptr
); 
1952         if (copylen 
+ objlen 
<= GLUEREPLY_UP_TO
) { 
1953             memcpy(buf
+copylen
,o
->ptr
,objlen
); 
1955             listDelNode(c
->reply
,ln
); 
1957             if (copylen 
== 0) return; 
1961     /* Now the output buffer is empty, add the new single element */ 
1962     o 
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
)); 
1963     listAddNodeHead(c
->reply
,o
); 
1966 static void sendReplyToClient(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
1967     redisClient 
*c 
= privdata
; 
1968     int nwritten 
= 0, totwritten 
= 0, objlen
; 
1971     REDIS_NOTUSED(mask
); 
1973     /* Use writev() if we have enough buffers to send */ 
1974     if (!server
.glueoutputbuf 
&& 
1975         listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD 
&&  
1976         !(c
->flags 
& REDIS_MASTER
)) 
1978         sendReplyToClientWritev(el
, fd
, privdata
, mask
); 
1982     while(listLength(c
->reply
)) { 
1983         if (server
.glueoutputbuf 
&& listLength(c
->reply
) > 1) 
1984             glueReplyBuffersIfNeeded(c
); 
1986         o 
= listNodeValue(listFirst(c
->reply
)); 
1987         objlen 
= sdslen(o
->ptr
); 
1990             listDelNode(c
->reply
,listFirst(c
->reply
)); 
1994         if (c
->flags 
& REDIS_MASTER
) { 
1995             /* Don't reply to a master */ 
1996             nwritten 
= objlen 
- c
->sentlen
; 
1998             nwritten 
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen 
- c
->sentlen
); 
1999             if (nwritten 
<= 0) break; 
2001         c
->sentlen 
+= nwritten
; 
2002         totwritten 
+= nwritten
; 
2003         /* If we fully sent the object on head go to the next one */ 
2004         if (c
->sentlen 
== objlen
) { 
2005             listDelNode(c
->reply
,listFirst(c
->reply
)); 
2008         /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT 
2009          * bytes, in a single threaded server it's a good idea to serve 
2010          * other clients as well, even if a very large request comes from 
2011          * super fast link that is always able to accept data (in real world 
2012          * scenario think about 'KEYS *' against the loopback interfae) */ 
2013         if (totwritten 
> REDIS_MAX_WRITE_PER_EVENT
) break; 
2015     if (nwritten 
== -1) { 
2016         if (errno 
== EAGAIN
) { 
2019             redisLog(REDIS_VERBOSE
, 
2020                 "Error writing to client: %s", strerror(errno
)); 
2025     if (totwritten 
> 0) c
->lastinteraction 
= time(NULL
); 
2026     if (listLength(c
->reply
) == 0) { 
2028         aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
); 
2032 static void sendReplyToClientWritev(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) 
2034     redisClient 
*c 
= privdata
; 
2035     int nwritten 
= 0, totwritten 
= 0, objlen
, willwrite
; 
2037     struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
]; 
2038     int offset
, ion 
= 0; 
2040     REDIS_NOTUSED(mask
); 
2043     while (listLength(c
->reply
)) { 
2044         offset 
= c
->sentlen
; 
2048         /* fill-in the iov[] array */ 
2049         for(node 
= listFirst(c
->reply
); node
; node 
= listNextNode(node
)) { 
2050             o 
= listNodeValue(node
); 
2051             objlen 
= sdslen(o
->ptr
); 
2053             if (totwritten 
+ objlen 
- offset 
> REDIS_MAX_WRITE_PER_EVENT
)  
2056             if(ion 
== REDIS_WRITEV_IOVEC_COUNT
) 
2057                 break; /* no more iovecs */ 
2059             iov
[ion
].iov_base 
= ((char*)o
->ptr
) + offset
; 
2060             iov
[ion
].iov_len 
= objlen 
- offset
; 
2061             willwrite 
+= objlen 
- offset
; 
2062             offset 
= 0; /* just for the first item */ 
2069         /* write all collected blocks at once */ 
2070         if((nwritten 
= writev(fd
, iov
, ion
)) < 0) { 
2071             if (errno 
!= EAGAIN
) { 
2072                 redisLog(REDIS_VERBOSE
, 
2073                          "Error writing to client: %s", strerror(errno
)); 
2080         totwritten 
+= nwritten
; 
2081         offset 
= c
->sentlen
; 
2083         /* remove written robjs from c->reply */ 
2084         while (nwritten 
&& listLength(c
->reply
)) { 
2085             o 
= listNodeValue(listFirst(c
->reply
)); 
2086             objlen 
= sdslen(o
->ptr
); 
2088             if(nwritten 
>= objlen 
- offset
) { 
2089                 listDelNode(c
->reply
, listFirst(c
->reply
)); 
2090                 nwritten 
-= objlen 
- offset
; 
2094                 c
->sentlen 
+= nwritten
; 
2102         c
->lastinteraction 
= time(NULL
); 
2104     if (listLength(c
->reply
) == 0) { 
2106         aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
); 
2110 static struct redisCommand 
*lookupCommand(char *name
) { 
2112     while(cmdTable
[j
].name 
!= NULL
) { 
2113         if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
]; 
2119 /* resetClient prepare the client to process the next command */ 
2120 static void resetClient(redisClient 
*c
) { 
2126 /* Call() is the core of Redis execution of a command */ 
2127 static void call(redisClient 
*c
, struct redisCommand 
*cmd
) { 
2130     dirty 
= server
.dirty
; 
2132     dirty 
= server
.dirty
-dirty
; 
2134     if (server
.appendonly 
&& dirty
) 
2135         feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
); 
2136     if ((dirty 
|| cmd
->flags 
& REDIS_CMD_FORCE_REPLICATION
) && 
2137         listLength(server
.slaves
)) 
2138         replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
); 
2139     if (listLength(server
.monitors
)) 
2140         replicationFeedSlaves(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
); 
2141     server
.stat_numcommands
++; 
2144 /* If this function gets called we already read a whole 
2145  * command, argments are in the client argv/argc fields. 
2146  * processCommand() execute the command or prepare the 
2147  * server for a bulk read from the client. 
2149  * If 1 is returned the client is still alive and valid and 
2150  * and other operations can be performed by the caller. Otherwise 
2151  * if 0 is returned the client was destroied (i.e. after QUIT). */ 
2152 static int processCommand(redisClient 
*c
) { 
2153     struct redisCommand 
*cmd
; 
2155     /* Free some memory if needed (maxmemory setting) */ 
2156     if (server
.maxmemory
) freeMemoryIfNeeded(); 
2158     /* Handle the multi bulk command type. This is an alternative protocol 
2159      * supported by Redis in order to receive commands that are composed of 
2160      * multiple binary-safe "bulk" arguments. The latency of processing is 
2161      * a bit higher but this allows things like multi-sets, so if this 
2162      * protocol is used only for MSET and similar commands this is a big win. */ 
2163     if (c
->multibulk 
== 0 && c
->argc 
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') { 
2164         c
->multibulk 
= atoi(((char*)c
->argv
[0]->ptr
)+1); 
2165         if (c
->multibulk 
<= 0) { 
2169             decrRefCount(c
->argv
[c
->argc
-1]); 
2173     } else if (c
->multibulk
) { 
2174         if (c
->bulklen 
== -1) { 
2175             if (((char*)c
->argv
[0]->ptr
)[0] != '$') { 
2176                 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n")); 
2180                 int bulklen 
= atoi(((char*)c
->argv
[0]->ptr
)+1); 
2181                 decrRefCount(c
->argv
[0]); 
2182                 if (bulklen 
< 0 || bulklen 
> 1024*1024*1024) { 
2184                     addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n")); 
2189                 c
->bulklen 
= bulklen
+2; /* add two bytes for CR+LF */ 
2193             c
->mbargv 
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1)); 
2194             c
->mbargv
[c
->mbargc
] = c
->argv
[0]; 
2198             if (c
->multibulk 
== 0) { 
2202                 /* Here we need to swap the multi-bulk argc/argv with the 
2203                  * normal argc/argv of the client structure. */ 
2205                 c
->argv 
= c
->mbargv
; 
2206                 c
->mbargv 
= auxargv
; 
2209                 c
->argc 
= c
->mbargc
; 
2210                 c
->mbargc 
= auxargc
; 
2212                 /* We need to set bulklen to something different than -1 
2213                  * in order for the code below to process the command without 
2214                  * to try to read the last argument of a bulk command as 
2215                  * a special argument. */ 
2217                 /* continue below and process the command */ 
2224     /* -- end of multi bulk commands processing -- */ 
2226     /* The QUIT command is handled as a special case. Normal command 
2227      * procs are unable to close the client connection safely */ 
2228     if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) { 
2233     /* Now lookup the command and check ASAP about trivial error conditions 
2234      * such wrong arity, bad command name and so forth. */ 
2235     cmd 
= lookupCommand(c
->argv
[0]->ptr
); 
2238             sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n", 
2239                 (char*)c
->argv
[0]->ptr
)); 
2242     } else if ((cmd
->arity 
> 0 && cmd
->arity 
!= c
->argc
) || 
2243                (c
->argc 
< -cmd
->arity
)) { 
2245             sdscatprintf(sdsempty(), 
2246                 "-ERR wrong number of arguments for '%s' command\r\n", 
2250     } else if (cmd
->flags 
& REDIS_CMD_BULK 
&& c
->bulklen 
== -1) { 
2251         /* This is a bulk command, we have to read the last argument yet. */ 
2252         int bulklen 
= atoi(c
->argv
[c
->argc
-1]->ptr
); 
2254         decrRefCount(c
->argv
[c
->argc
-1]); 
2255         if (bulklen 
< 0 || bulklen 
> 1024*1024*1024) { 
2257             addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n")); 
2262         c
->bulklen 
= bulklen
+2; /* add two bytes for CR+LF */ 
2263         /* It is possible that the bulk read is already in the 
2264          * buffer. Check this condition and handle it accordingly. 
2265          * This is just a fast path, alternative to call processInputBuffer(). 
2266          * It's a good idea since the code is small and this condition 
2267          * happens most of the times. */ 
2268         if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) { 
2269             c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2); 
2271             c
->querybuf 
= sdsrange(c
->querybuf
,c
->bulklen
,-1); 
2273             /* Otherwise return... there is to read the last argument 
2274              * from the socket. */ 
2278     /* Let's try to encode the bulk object to save space. */ 
2279     if (cmd
->flags 
& REDIS_CMD_BULK
) 
2280         tryObjectEncoding(c
->argv
[c
->argc
-1]); 
2282     /* Check if the user is authenticated */ 
2283     if (server
.requirepass 
&& !c
->authenticated 
&& cmd
->proc 
!= authCommand
) { 
2284         addReplySds(c
,sdsnew("-ERR operation not permitted\r\n")); 
2289     /* Handle the maxmemory directive */ 
2290     if (server
.maxmemory 
&& (cmd
->flags 
& REDIS_CMD_DENYOOM
) && 
2291         zmalloc_used_memory() > server
.maxmemory
) 
2293         addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n")); 
2298     /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */ 
2299     if (dictSize(c
->pubsub_channels
) > 0 && 
2300         cmd
->proc 
!= subscribeCommand 
&& cmd
->proc 
!= unsubscribeCommand 
&& 
2301         cmd
->proc 
!= psubscribeCommand 
&& cmd
->proc 
!= punsubscribeCommand
) { 
2302         addReplySds(c
,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n")); 
2307     /* Exec the command */ 
2308     if (c
->flags 
& REDIS_MULTI 
&& cmd
->proc 
!= execCommand 
&& cmd
->proc 
!= discardCommand
) { 
2309         queueMultiCommand(c
,cmd
); 
2310         addReply(c
,shared
.queued
); 
2312         if (server
.vm_enabled 
&& server
.vm_max_threads 
> 0 && 
2313             blockClientOnSwappedKeys(cmd
,c
)) return 1; 
2317     /* Prepare the client for the next command */ 
2322 static void replicationFeedSlaves(list 
*slaves
, int dictid
, robj 
**argv
, int argc
) { 
2327     /* We need 1+(ARGS*3) objects since commands are using the new protocol 
2328      * and we one 1 object for the first "*<count>\r\n" multibulk count, then 
2329      * for every additional object we have "$<count>\r\n" + object + "\r\n". */ 
2330     robj 
*static_outv
[REDIS_STATIC_ARGS
*3+1]; 
2333     if (argc 
<= REDIS_STATIC_ARGS
) { 
2336         outv 
= zmalloc(sizeof(robj
*)*(argc
*3+1)); 
2339     lenobj 
= createObject(REDIS_STRING
, 
2340             sdscatprintf(sdsempty(), "*%d\r\n", argc
)); 
2341     lenobj
->refcount 
= 0; 
2342     outv
[outc
++] = lenobj
; 
2343     for (j 
= 0; j 
< argc
; j
++) { 
2344         lenobj 
= createObject(REDIS_STRING
, 
2345             sdscatprintf(sdsempty(),"$%lu\r\n", 
2346                 (unsigned long) stringObjectLen(argv
[j
]))); 
2347         lenobj
->refcount 
= 0; 
2348         outv
[outc
++] = lenobj
; 
2349         outv
[outc
++] = argv
[j
]; 
2350         outv
[outc
++] = shared
.crlf
; 
2353     /* Increment all the refcounts at start and decrement at end in order to 
2354      * be sure to free objects if there is no slave in a replication state 
2355      * able to be feed with commands */ 
2356     for (j 
= 0; j 
< outc
; j
++) incrRefCount(outv
[j
]); 
2357     listRewind(slaves
,&li
); 
2358     while((ln 
= listNext(&li
))) { 
2359         redisClient 
*slave 
= ln
->value
; 
2361         /* Don't feed slaves that are still waiting for BGSAVE to start */ 
2362         if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_START
) continue; 
2364         /* Feed all the other slaves, MONITORs and so on */ 
2365         if (slave
->slaveseldb 
!= dictid
) { 
2369             case 0: selectcmd 
= shared
.select0
; break; 
2370             case 1: selectcmd 
= shared
.select1
; break; 
2371             case 2: selectcmd 
= shared
.select2
; break; 
2372             case 3: selectcmd 
= shared
.select3
; break; 
2373             case 4: selectcmd 
= shared
.select4
; break; 
2374             case 5: selectcmd 
= shared
.select5
; break; 
2375             case 6: selectcmd 
= shared
.select6
; break; 
2376             case 7: selectcmd 
= shared
.select7
; break; 
2377             case 8: selectcmd 
= shared
.select8
; break; 
2378             case 9: selectcmd 
= shared
.select9
; break; 
2380                 selectcmd 
= createObject(REDIS_STRING
, 
2381                     sdscatprintf(sdsempty(),"select %d\r\n",dictid
)); 
2382                 selectcmd
->refcount 
= 0; 
2385             addReply(slave
,selectcmd
); 
2386             slave
->slaveseldb 
= dictid
; 
2388         for (j 
= 0; j 
< outc
; j
++) addReply(slave
,outv
[j
]); 
2390     for (j 
= 0; j 
< outc
; j
++) decrRefCount(outv
[j
]); 
2391     if (outv 
!= static_outv
) zfree(outv
); 
2394 static void processInputBuffer(redisClient 
*c
) { 
2396     /* Before to process the input buffer, make sure the client is not 
2397      * waitig for a blocking operation such as BLPOP. Note that the first 
2398      * iteration the client is never blocked, otherwise the processInputBuffer 
2399      * would not be called at all, but after the execution of the first commands 
2400      * in the input buffer the client may be blocked, and the "goto again" 
2401      * will try to reiterate. The following line will make it return asap. */ 
2402     if (c
->flags 
& REDIS_BLOCKED 
|| c
->flags 
& REDIS_IO_WAIT
) return; 
2403     if (c
->bulklen 
== -1) { 
2404         /* Read the first line of the query */ 
2405         char *p 
= strchr(c
->querybuf
,'\n'); 
2412             query 
= c
->querybuf
; 
2413             c
->querybuf 
= sdsempty(); 
2414             querylen 
= 1+(p
-(query
)); 
2415             if (sdslen(query
) > querylen
) { 
2416                 /* leave data after the first line of the query in the buffer */ 
2417                 c
->querybuf 
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
); 
2419             *p 
= '\0'; /* remove "\n" */ 
2420             if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */ 
2421             sdsupdatelen(query
); 
2423             /* Now we can split the query in arguments */ 
2424             argv 
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
); 
2427             if (c
->argv
) zfree(c
->argv
); 
2428             c
->argv 
= zmalloc(sizeof(robj
*)*argc
); 
2430             for (j 
= 0; j 
< argc
; j
++) { 
2431                 if (sdslen(argv
[j
])) { 
2432                     c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]); 
2440                 /* Execute the command. If the client is still valid 
2441                  * after processCommand() return and there is something 
2442                  * on the query buffer try to process the next command. */ 
2443                 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
; 
2445                 /* Nothing to process, argc == 0. Just process the query 
2446                  * buffer if it's not empty or return to the caller */ 
2447                 if (sdslen(c
->querybuf
)) goto again
; 
2450         } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) { 
2451             redisLog(REDIS_VERBOSE
, "Client protocol error"); 
2456         /* Bulk read handling. Note that if we are at this point 
2457            the client already sent a command terminated with a newline, 
2458            we are reading the bulk data that is actually the last 
2459            argument of the command. */ 
2460         int qbl 
= sdslen(c
->querybuf
); 
2462         if (c
->bulklen 
<= qbl
) { 
2463             /* Copy everything but the final CRLF as final argument */ 
2464             c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2); 
2466             c
->querybuf 
= sdsrange(c
->querybuf
,c
->bulklen
,-1); 
2467             /* Process the command. If the client is still valid after 
2468              * the processing and there is more data in the buffer 
2469              * try to parse it. */ 
2470             if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
; 
2476 static void readQueryFromClient(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
2477     redisClient 
*c 
= (redisClient
*) privdata
; 
2478     char buf
[REDIS_IOBUF_LEN
]; 
2481     REDIS_NOTUSED(mask
); 
2483     nread 
= read(fd
, buf
, REDIS_IOBUF_LEN
); 
2485         if (errno 
== EAGAIN
) { 
2488             redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
)); 
2492     } else if (nread 
== 0) { 
2493         redisLog(REDIS_VERBOSE
, "Client closed connection"); 
2498         c
->querybuf 
= sdscatlen(c
->querybuf
, buf
, nread
); 
2499         c
->lastinteraction 
= time(NULL
); 
2503     processInputBuffer(c
); 
2506 static int selectDb(redisClient 
*c
, int id
) { 
2507     if (id 
< 0 || id 
>= server
.dbnum
) 
2509     c
->db 
= &server
.db
[id
]; 
2513 static void *dupClientReplyValue(void *o
) { 
2514     incrRefCount((robj
*)o
); 
2518 static int listMatchObjects(void *a
, void *b
) { 
2519     return compareStringObjects(a
,b
) == 0; 
2522 static redisClient 
*createClient(int fd
) { 
2523     redisClient 
*c 
= zmalloc(sizeof(*c
)); 
2525     anetNonBlock(NULL
,fd
); 
2526     anetTcpNoDelay(NULL
,fd
); 
2527     if (!c
) return NULL
; 
2530     c
->querybuf 
= sdsempty(); 
2539     c
->lastinteraction 
= time(NULL
); 
2540     c
->authenticated 
= 0; 
2541     c
->replstate 
= REDIS_REPL_NONE
; 
2542     c
->reply 
= listCreate(); 
2543     listSetFreeMethod(c
->reply
,decrRefCount
); 
2544     listSetDupMethod(c
->reply
,dupClientReplyValue
); 
2545     c
->blockingkeys 
= NULL
; 
2546     c
->blockingkeysnum 
= 0; 
2547     c
->io_keys 
= listCreate(); 
2548     listSetFreeMethod(c
->io_keys
,decrRefCount
); 
2549     c
->pubsub_channels 
= dictCreate(&setDictType
,NULL
); 
2550     c
->pubsub_patterns 
= listCreate(); 
2551     listSetFreeMethod(c
->pubsub_patterns
,decrRefCount
); 
2552     listSetMatchMethod(c
->pubsub_patterns
,listMatchObjects
); 
2553     if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
, 
2554         readQueryFromClient
, c
) == AE_ERR
) { 
2558     listAddNodeTail(server
.clients
,c
); 
2559     initClientMultiState(c
); 
2563 static void addReply(redisClient 
*c
, robj 
*obj
) { 
2564     if (listLength(c
->reply
) == 0 && 
2565         (c
->replstate 
== REDIS_REPL_NONE 
|| 
2566          c
->replstate 
== REDIS_REPL_ONLINE
) && 
2567         aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
, 
2568         sendReplyToClient
, c
) == AE_ERR
) return; 
2570     if (server
.vm_enabled 
&& obj
->storage 
!= REDIS_VM_MEMORY
) { 
2571         obj 
= dupStringObject(obj
); 
2572         obj
->refcount 
= 0; /* getDecodedObject() will increment the refcount */ 
2574     listAddNodeTail(c
->reply
,getDecodedObject(obj
)); 
2577 static void addReplySds(redisClient 
*c
, sds s
) { 
2578     robj 
*o 
= createObject(REDIS_STRING
,s
); 
2583 static void addReplyDouble(redisClient 
*c
, double d
) { 
2586     snprintf(buf
,sizeof(buf
),"%.17g",d
); 
2587     addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n", 
2588         (unsigned long) strlen(buf
),buf
)); 
2591 static void addReplyLong(redisClient 
*c
, long l
) { 
2596         addReply(c
,shared
.czero
); 
2598     } else if (l 
== 1) { 
2599         addReply(c
,shared
.cone
); 
2602     len 
= snprintf(buf
,sizeof(buf
),":%ld\r\n",l
); 
2603     addReplySds(c
,sdsnewlen(buf
,len
)); 
2606 static void addReplyLongLong(redisClient 
*c
, long long ll
) { 
2611         addReply(c
,shared
.czero
); 
2613     } else if (ll 
== 1) { 
2614         addReply(c
,shared
.cone
); 
2617     len 
= snprintf(buf
,sizeof(buf
),":%lld\r\n",ll
); 
2618     addReplySds(c
,sdsnewlen(buf
,len
)); 
2621 static void addReplyUlong(redisClient 
*c
, unsigned long ul
) { 
2626         addReply(c
,shared
.czero
); 
2628     } else if (ul 
== 1) { 
2629         addReply(c
,shared
.cone
); 
2632     len 
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
); 
2633     addReplySds(c
,sdsnewlen(buf
,len
)); 
2636 static void addReplyBulkLen(redisClient 
*c
, robj 
*obj
) { 
2639     if (obj
->encoding 
== REDIS_ENCODING_RAW
) { 
2640         len 
= sdslen(obj
->ptr
); 
2642         long n 
= (long)obj
->ptr
; 
2644         /* Compute how many bytes will take this integer as a radix 10 string */ 
2650         while((n 
= n
/10) != 0) { 
2654     addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",(unsigned long)len
)); 
2657 static void addReplyBulk(redisClient 
*c
, robj 
*obj
) { 
2658     addReplyBulkLen(c
,obj
); 
2660     addReply(c
,shared
.crlf
); 
2663 /* In the CONFIG command we need to add vanilla C string as bulk replies */ 
2664 static void addReplyBulkCString(redisClient 
*c
, char *s
) { 
2666         addReply(c
,shared
.nullbulk
); 
2668         robj 
*o 
= createStringObject(s
,strlen(s
)); 
2674 static void acceptHandler(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
2679     REDIS_NOTUSED(mask
); 
2680     REDIS_NOTUSED(privdata
); 
2682     cfd 
= anetAccept(server
.neterr
, fd
, cip
, &cport
); 
2683     if (cfd 
== AE_ERR
) { 
2684         redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
); 
2687     redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
); 
2688     if ((c 
= createClient(cfd
)) == NULL
) { 
2689         redisLog(REDIS_WARNING
,"Error allocating resoures for the client"); 
2690         close(cfd
); /* May be already closed, just ingore errors */ 
2693     /* If maxclient directive is set and this is one client more... close the 
2694      * connection. Note that we create the client instead to check before 
2695      * for this condition, since now the socket is already set in nonblocking 
2696      * mode and we can send an error for free using the Kernel I/O */ 
2697     if (server
.maxclients 
&& listLength(server
.clients
) > server
.maxclients
) { 
2698         char *err 
= "-ERR max number of clients reached\r\n"; 
2700         /* That's a best effort error message, don't check write errors */ 
2701         if (write(c
->fd
,err
,strlen(err
)) == -1) { 
2702             /* Nothing to do, Just to avoid the warning... */ 
2707     server
.stat_numconnections
++; 
2710 /* ======================= Redis objects implementation ===================== */ 
2712 static robj 
*createObject(int type
, void *ptr
) { 
2715     if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
); 
2716     if (listLength(server
.objfreelist
)) { 
2717         listNode 
*head 
= listFirst(server
.objfreelist
); 
2718         o 
= listNodeValue(head
); 
2719         listDelNode(server
.objfreelist
,head
); 
2720         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
2722         if (server
.vm_enabled
) { 
2723             pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
2724             o 
= zmalloc(sizeof(*o
)); 
2726             o 
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
)); 
2730     o
->encoding 
= REDIS_ENCODING_RAW
; 
2733     if (server
.vm_enabled
) { 
2734         /* Note that this code may run in the context of an I/O thread 
2735          * and accessing to server.unixtime in theory is an error 
2736          * (no locks). But in practice this is safe, and even if we read 
2737          * garbage Redis will not fail, as it's just a statistical info */ 
2738         o
->vm
.atime 
= server
.unixtime
; 
2739         o
->storage 
= REDIS_VM_MEMORY
; 
2744 static robj 
*createStringObject(char *ptr
, size_t len
) { 
2745     return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
)); 
2748 static robj 
*dupStringObject(robj 
*o
) { 
2749     assert(o
->encoding 
== REDIS_ENCODING_RAW
); 
2750     return createStringObject(o
->ptr
,sdslen(o
->ptr
)); 
2753 static robj 
*createListObject(void) { 
2754     list 
*l 
= listCreate(); 
2756     listSetFreeMethod(l
,decrRefCount
); 
2757     return createObject(REDIS_LIST
,l
); 
2760 static robj 
*createSetObject(void) { 
2761     dict 
*d 
= dictCreate(&setDictType
,NULL
); 
2762     return createObject(REDIS_SET
,d
); 
2765 static robj 
*createHashObject(void) { 
2766     /* All the Hashes start as zipmaps. Will be automatically converted 
2767      * into hash tables if there are enough elements or big elements 
2769     unsigned char *zm 
= zipmapNew(); 
2770     robj 
*o 
= createObject(REDIS_HASH
,zm
); 
2771     o
->encoding 
= REDIS_ENCODING_ZIPMAP
; 
2775 static robj 
*createZsetObject(void) { 
2776     zset 
*zs 
= zmalloc(sizeof(*zs
)); 
2778     zs
->dict 
= dictCreate(&zsetDictType
,NULL
); 
2779     zs
->zsl 
= zslCreate(); 
2780     return createObject(REDIS_ZSET
,zs
); 
2783 static void freeStringObject(robj 
*o
) { 
2784     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
2789 static void freeListObject(robj 
*o
) { 
2790     listRelease((list
*) o
->ptr
); 
2793 static void freeSetObject(robj 
*o
) { 
2794     dictRelease((dict
*) o
->ptr
); 
2797 static void freeZsetObject(robj 
*o
) { 
2800     dictRelease(zs
->dict
); 
2805 static void freeHashObject(robj 
*o
) { 
2806     switch (o
->encoding
) { 
2807     case REDIS_ENCODING_HT
: 
2808         dictRelease((dict
*) o
->ptr
); 
2810     case REDIS_ENCODING_ZIPMAP
: 
2819 static void incrRefCount(robj 
*o
) { 
2823 static void decrRefCount(void *obj
) { 
2826     /* Object is a key of a swapped out value, or in the process of being 
2828     if (server
.vm_enabled 
&& 
2829         (o
->storage 
== REDIS_VM_SWAPPED 
|| o
->storage 
== REDIS_VM_LOADING
)) 
2831         if (o
->storage 
== REDIS_VM_SWAPPED 
|| o
->storage 
== REDIS_VM_LOADING
) { 
2832             redisAssert(o
->refcount 
== 1); 
2834         if (o
->storage 
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
); 
2835         redisAssert(o
->type 
== REDIS_STRING
); 
2836         freeStringObject(o
); 
2837         vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
); 
2838         pthread_mutex_lock(&server
.obj_freelist_mutex
); 
2839         if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX 
|| 
2840             !listAddNodeHead(server
.objfreelist
,o
)) 
2842         pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
2843         server
.vm_stats_swapped_objects
--; 
2846     /* Object is in memory, or in the process of being swapped out. */ 
2847     if (--(o
->refcount
) == 0) { 
2848         if (server
.vm_enabled 
&& o
->storage 
== REDIS_VM_SWAPPING
) 
2849             vmCancelThreadedIOJob(obj
); 
2851         case REDIS_STRING
: freeStringObject(o
); break; 
2852         case REDIS_LIST
: freeListObject(o
); break; 
2853         case REDIS_SET
: freeSetObject(o
); break; 
2854         case REDIS_ZSET
: freeZsetObject(o
); break; 
2855         case REDIS_HASH
: freeHashObject(o
); break; 
2856         default: redisAssert(0); break; 
2858         if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
); 
2859         if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX 
|| 
2860             !listAddNodeHead(server
.objfreelist
,o
)) 
2862         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
2866 static robj 
*lookupKey(redisDb 
*db
, robj 
*key
) { 
2867     dictEntry 
*de 
= dictFind(db
->dict
,key
); 
2869         robj 
*key 
= dictGetEntryKey(de
); 
2870         robj 
*val 
= dictGetEntryVal(de
); 
2872         if (server
.vm_enabled
) { 
2873             if (key
->storage 
== REDIS_VM_MEMORY 
|| 
2874                 key
->storage 
== REDIS_VM_SWAPPING
) 
2876                 /* If we were swapping the object out, stop it, this key 
2878                 if (key
->storage 
== REDIS_VM_SWAPPING
) 
2879                     vmCancelThreadedIOJob(key
); 
2880                 /* Update the access time of the key for the aging algorithm. */ 
2881                 key
->vm
.atime 
= server
.unixtime
; 
2883                 int notify 
= (key
->storage 
== REDIS_VM_LOADING
); 
2885                 /* Our value was swapped on disk. Bring it at home. */ 
2886                 redisAssert(val 
== NULL
); 
2887                 val 
= vmLoadObject(key
); 
2888                 dictGetEntryVal(de
) = val
; 
2890                 /* Clients blocked by the VM subsystem may be waiting for 
2892                 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
); 
2901 static robj 
*lookupKeyRead(redisDb 
*db
, robj 
*key
) { 
2902     expireIfNeeded(db
,key
); 
2903     return lookupKey(db
,key
); 
2906 static robj 
*lookupKeyWrite(redisDb 
*db
, robj 
*key
) { 
2907     deleteIfVolatile(db
,key
); 
2908     return lookupKey(db
,key
); 
2911 static robj 
*lookupKeyReadOrReply(redisClient 
*c
, robj 
*key
, robj 
*reply
) { 
2912     robj 
*o 
= lookupKeyRead(c
->db
, key
); 
2913     if (!o
) addReply(c
,reply
); 
2917 static robj 
*lookupKeyWriteOrReply(redisClient 
*c
, robj 
*key
, robj 
*reply
) { 
2918     robj 
*o 
= lookupKeyWrite(c
->db
, key
); 
2919     if (!o
) addReply(c
,reply
); 
2923 static int checkType(redisClient 
*c
, robj 
*o
, int type
) { 
2924     if (o
->type 
!= type
) { 
2925         addReply(c
,shared
.wrongtypeerr
); 
2931 static int deleteKey(redisDb 
*db
, robj 
*key
) { 
2934     /* We need to protect key from destruction: after the first dictDelete() 
2935      * it may happen that 'key' is no longer valid if we don't increment 
2936      * it's count. This may happen when we get the object reference directly 
2937      * from the hash table with dictRandomKey() or dict iterators */ 
2939     if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
); 
2940     retval 
= dictDelete(db
->dict
,key
); 
2943     return retval 
== DICT_OK
; 
2946 /* Check if the nul-terminated string 's' can be represented by a long 
2947  * (that is, is a number that fits into long without any other space or 
2948  * character before or after the digits). 
2950  * If so, the function returns REDIS_OK and *longval is set to the value 
2951  * of the number. Otherwise REDIS_ERR is returned */ 
2952 static int isStringRepresentableAsLong(sds s
, long *longval
) { 
2953     char buf
[32], *endptr
; 
2957     value 
= strtol(s
, &endptr
, 10); 
2958     if (endptr
[0] != '\0') return REDIS_ERR
; 
2959     slen 
= snprintf(buf
,32,"%ld",value
); 
2961     /* If the number converted back into a string is not identical 
2962      * then it's not possible to encode the string as integer */ 
2963     if (sdslen(s
) != (unsigned)slen 
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
; 
2964     if (longval
) *longval 
= value
; 
2968 /* Try to encode a string object in order to save space */ 
2969 static int tryObjectEncoding(robj 
*o
) { 
2973     if (o
->encoding 
!= REDIS_ENCODING_RAW
) 
2974         return REDIS_ERR
; /* Already encoded */ 
2976     /* It's not save to encode shared objects: shared objects can be shared 
2977      * everywhere in the "object space" of Redis. Encoded objects can only 
2978      * appear as "values" (and not, for instance, as keys) */ 
2979      if (o
->refcount 
> 1) return REDIS_ERR
; 
2981     /* Currently we try to encode only strings */ 
2982     redisAssert(o
->type 
== REDIS_STRING
); 
2984     /* Check if we can represent this string as a long integer */ 
2985     if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return REDIS_ERR
; 
2987     /* Ok, this object can be encoded */ 
2988     o
->encoding 
= REDIS_ENCODING_INT
; 
2990     o
->ptr 
= (void*) value
; 
2994 /* Get a decoded version of an encoded object (returned as a new object). 
2995  * If the object is already raw-encoded just increment the ref count. */ 
2996 static robj 
*getDecodedObject(robj 
*o
) { 
2999     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
3003     if (o
->type 
== REDIS_STRING 
&& o
->encoding 
== REDIS_ENCODING_INT
) { 
3006         snprintf(buf
,32,"%ld",(long)o
->ptr
); 
3007         dec 
= createStringObject(buf
,strlen(buf
)); 
3010         redisAssert(1 != 1); 
3014 /* Compare two string objects via strcmp() or alike. 
3015  * Note that the objects may be integer-encoded. In such a case we 
3016  * use snprintf() to get a string representation of the numbers on the stack 
3017  * and compare the strings, it's much faster than calling getDecodedObject(). 
3019  * Important note: if objects are not integer encoded, but binary-safe strings, 
3020  * sdscmp() from sds.c will apply memcmp() so this function ca be considered 
3022 static int compareStringObjects(robj 
*a
, robj 
*b
) { 
3023     redisAssert(a
->type 
== REDIS_STRING 
&& b
->type 
== REDIS_STRING
); 
3024     char bufa
[128], bufb
[128], *astr
, *bstr
; 
3027     if (a 
== b
) return 0; 
3028     if (a
->encoding 
!= REDIS_ENCODING_RAW
) { 
3029         snprintf(bufa
,sizeof(bufa
),"%ld",(long) a
->ptr
); 
3035     if (b
->encoding 
!= REDIS_ENCODING_RAW
) { 
3036         snprintf(bufb
,sizeof(bufb
),"%ld",(long) b
->ptr
); 
3042     return bothsds 
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
); 
3045 static size_t stringObjectLen(robj 
*o
) { 
3046     redisAssert(o
->type 
== REDIS_STRING
); 
3047     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
3048         return sdslen(o
->ptr
); 
3052         return snprintf(buf
,32,"%ld",(long)o
->ptr
); 
3056 /*============================ RDB saving/loading =========================== */ 
3058 static int rdbSaveType(FILE *fp
, unsigned char type
) { 
3059     if (fwrite(&type
,1,1,fp
) == 0) return -1; 
3063 static int rdbSaveTime(FILE *fp
, time_t t
) { 
3064     int32_t t32 
= (int32_t) t
; 
3065     if (fwrite(&t32
,4,1,fp
) == 0) return -1; 
3069 /* check rdbLoadLen() comments for more info */ 
3070 static int rdbSaveLen(FILE *fp
, uint32_t len
) { 
3071     unsigned char buf
[2]; 
3074         /* Save a 6 bit len */ 
3075         buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6); 
3076         if (fwrite(buf
,1,1,fp
) == 0) return -1; 
3077     } else if (len 
< (1<<14)) { 
3078         /* Save a 14 bit len */ 
3079         buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6); 
3081         if (fwrite(buf
,2,1,fp
) == 0) return -1; 
3083         /* Save a 32 bit len */ 
3084         buf
[0] = (REDIS_RDB_32BITLEN
<<6); 
3085         if (fwrite(buf
,1,1,fp
) == 0) return -1; 
3087         if (fwrite(&len
,4,1,fp
) == 0) return -1; 
3092 /* String objects in the form "2391" "-100" without any space and with a 
3093  * range of values that can fit in an 8, 16 or 32 bit signed value can be 
3094  * encoded as integers to save space */ 
3095 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) { 
3097     char *endptr
, buf
[32]; 
3099     /* Check if it's possible to encode this value as a number */ 
3100     value 
= strtoll(s
, &endptr
, 10); 
3101     if (endptr
[0] != '\0') return 0; 
3102     snprintf(buf
,32,"%lld",value
); 
3104     /* If the number converted back into a string is not identical 
3105      * then it's not possible to encode the string as integer */ 
3106     if (strlen(buf
) != len 
|| memcmp(buf
,s
,len
)) return 0; 
3108     /* Finally check if it fits in our ranges */ 
3109     if (value 
>= -(1<<7) && value 
<= (1<<7)-1) { 
3110         enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
; 
3111         enc
[1] = value
&0xFF; 
3113     } else if (value 
>= -(1<<15) && value 
<= (1<<15)-1) { 
3114         enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
; 
3115         enc
[1] = value
&0xFF; 
3116         enc
[2] = (value
>>8)&0xFF; 
3118     } else if (value 
>= -((long long)1<<31) && value 
<= ((long long)1<<31)-1) { 
3119         enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
; 
3120         enc
[1] = value
&0xFF; 
3121         enc
[2] = (value
>>8)&0xFF; 
3122         enc
[3] = (value
>>16)&0xFF; 
3123         enc
[4] = (value
>>24)&0xFF; 
3130 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) { 
3131     size_t comprlen
, outlen
; 
3135     /* We require at least four bytes compression for this to be worth it */ 
3136     if (len 
<= 4) return 0; 
3138     if ((out 
= zmalloc(outlen
+1)) == NULL
) return 0; 
3139     comprlen 
= lzf_compress(s
, len
, out
, outlen
); 
3140     if (comprlen 
== 0) { 
3144     /* Data compressed! Let's save it on disk */ 
3145     byte 
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
; 
3146     if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
; 
3147     if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
; 
3148     if (rdbSaveLen(fp
,len
) == -1) goto writeerr
; 
3149     if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
; 
3158 /* Save a string objet as [len][data] on disk. If the object is a string 
3159  * representation of an integer value we try to safe it in a special form */ 
3160 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) { 
3163     /* Try integer encoding */ 
3165         unsigned char buf
[5]; 
3166         if ((enclen 
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) { 
3167             if (fwrite(buf
,enclen
,1,fp
) == 0) return -1; 
3172     /* Try LZF compression - under 20 bytes it's unable to compress even 
3173      * aaaaaaaaaaaaaaaaaa so skip it */ 
3174     if (server
.rdbcompression 
&& len 
> 20) { 
3177         retval 
= rdbSaveLzfStringObject(fp
,s
,len
); 
3178         if (retval 
== -1) return -1; 
3179         if (retval 
> 0) return 0; 
3180         /* retval == 0 means data can't be compressed, save the old way */ 
3183     /* Store verbatim */ 
3184     if (rdbSaveLen(fp
,len
) == -1) return -1; 
3185     if (len 
&& fwrite(s
,len
,1,fp
) == 0) return -1; 
3189 /* Like rdbSaveStringObjectRaw() but handle encoded objects */ 
3190 static int rdbSaveStringObject(FILE *fp
, robj 
*obj
) { 
3193     /* Avoid incr/decr ref count business when possible. 
3194      * This plays well with copy-on-write given that we are probably 
3195      * in a child process (BGSAVE). Also this makes sure key objects 
3196      * of swapped objects are not incRefCount-ed (an assert does not allow 
3197      * this in order to avoid bugs) */ 
3198     if (obj
->encoding 
!= REDIS_ENCODING_RAW
) { 
3199         obj 
= getDecodedObject(obj
); 
3200         retval 
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
)); 
3203         retval 
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
)); 
3208 /* Save a double value. Doubles are saved as strings prefixed by an unsigned 
3209  * 8 bit integer specifing the length of the representation. 
3210  * This 8 bit integer has special values in order to specify the following 
3216 static int rdbSaveDoubleValue(FILE *fp
, double val
) { 
3217     unsigned char buf
[128]; 
3223     } else if (!isfinite(val
)) { 
3225         buf
[0] = (val 
< 0) ? 255 : 254; 
3227         snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
); 
3228         buf
[0] = strlen((char*)buf
+1); 
3231     if (fwrite(buf
,len
,1,fp
) == 0) return -1; 
3235 /* Save a Redis object. */ 
3236 static int rdbSaveObject(FILE *fp
, robj 
*o
) { 
3237     if (o
->type 
== REDIS_STRING
) { 
3238         /* Save a string value */ 
3239         if (rdbSaveStringObject(fp
,o
) == -1) return -1; 
3240     } else if (o
->type 
== REDIS_LIST
) { 
3241         /* Save a list value */ 
3242         list 
*list 
= o
->ptr
; 
3246         if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1; 
3247         listRewind(list
,&li
); 
3248         while((ln 
= listNext(&li
))) { 
3249             robj 
*eleobj 
= listNodeValue(ln
); 
3251             if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1; 
3253     } else if (o
->type 
== REDIS_SET
) { 
3254         /* Save a set value */ 
3256         dictIterator 
*di 
= dictGetIterator(set
); 
3259         if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1; 
3260         while((de 
= dictNext(di
)) != NULL
) { 
3261             robj 
*eleobj 
= dictGetEntryKey(de
); 
3263             if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1; 
3265         dictReleaseIterator(di
); 
3266     } else if (o
->type 
== REDIS_ZSET
) { 
3267         /* Save a set value */ 
3269         dictIterator 
*di 
= dictGetIterator(zs
->dict
); 
3272         if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1; 
3273         while((de 
= dictNext(di
)) != NULL
) { 
3274             robj 
*eleobj 
= dictGetEntryKey(de
); 
3275             double *score 
= dictGetEntryVal(de
); 
3277             if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1; 
3278             if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1; 
3280         dictReleaseIterator(di
); 
3281     } else if (o
->type 
== REDIS_HASH
) { 
3282         /* Save a hash value */ 
3283         if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
3284             unsigned char *p 
= zipmapRewind(o
->ptr
); 
3285             unsigned int count 
= zipmapLen(o
->ptr
); 
3286             unsigned char *key
, *val
; 
3287             unsigned int klen
, vlen
; 
3289             if (rdbSaveLen(fp
,count
) == -1) return -1; 
3290             while((p 
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) { 
3291                 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1; 
3292                 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1; 
3295             dictIterator 
*di 
= dictGetIterator(o
->ptr
); 
3298             if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1; 
3299             while((de 
= dictNext(di
)) != NULL
) { 
3300                 robj 
*key 
= dictGetEntryKey(de
); 
3301                 robj 
*val 
= dictGetEntryVal(de
); 
3303                 if (rdbSaveStringObject(fp
,key
) == -1) return -1; 
3304                 if (rdbSaveStringObject(fp
,val
) == -1) return -1; 
3306             dictReleaseIterator(di
); 
3314 /* Return the length the object will have on disk if saved with 
3315  * the rdbSaveObject() function. Currently we use a trick to get 
3316  * this length with very little changes to the code. In the future 
3317  * we could switch to a faster solution. */ 
3318 static off_t 
rdbSavedObjectLen(robj 
*o
, FILE *fp
) { 
3319     if (fp 
== NULL
) fp 
= server
.devnull
; 
3321     assert(rdbSaveObject(fp
,o
) != 1); 
3325 /* Return the number of pages required to save this object in the swap file */ 
3326 static off_t 
rdbSavedObjectPages(robj 
*o
, FILE *fp
) { 
3327     off_t bytes 
= rdbSavedObjectLen(o
,fp
); 
3329     return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
; 
3332 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */ 
3333 static int rdbSave(char *filename
) { 
3334     dictIterator 
*di 
= NULL
; 
3339     time_t now 
= time(NULL
); 
3341     /* Wait for I/O therads to terminate, just in case this is a 
3342      * foreground-saving, to avoid seeking the swap file descriptor at the 
3344     if (server
.vm_enabled
) 
3345         waitEmptyIOJobsQueue(); 
3347     snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid()); 
3348     fp 
= fopen(tmpfile
,"w"); 
3350         redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
)); 
3353     if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
; 
3354     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
3355         redisDb 
*db 
= server
.db
+j
; 
3357         if (dictSize(d
) == 0) continue; 
3358         di 
= dictGetIterator(d
); 
3364         /* Write the SELECT DB opcode */ 
3365         if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
; 
3366         if (rdbSaveLen(fp
,j
) == -1) goto werr
; 
3368         /* Iterate this DB writing every entry */ 
3369         while((de 
= dictNext(di
)) != NULL
) { 
3370             robj 
*key 
= dictGetEntryKey(de
); 
3371             robj 
*o 
= dictGetEntryVal(de
); 
3372             time_t expiretime 
= getExpire(db
,key
); 
3374             /* Save the expire time */ 
3375             if (expiretime 
!= -1) { 
3376                 /* If this key is already expired skip it */ 
3377                 if (expiretime 
< now
) continue; 
3378                 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
; 
3379                 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
; 
3381             /* Save the key and associated value. This requires special 
3382              * handling if the value is swapped out. */ 
3383             if (!server
.vm_enabled 
|| key
->storage 
== REDIS_VM_MEMORY 
|| 
3384                                       key
->storage 
== REDIS_VM_SWAPPING
) { 
3385                 /* Save type, key, value */ 
3386                 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
; 
3387                 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
; 
3388                 if (rdbSaveObject(fp
,o
) == -1) goto werr
; 
3390                 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */ 
3392                 /* Get a preview of the object in memory */ 
3393                 po 
= vmPreviewObject(key
); 
3394                 /* Save type, key, value */ 
3395                 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
; 
3396                 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
; 
3397                 if (rdbSaveObject(fp
,po
) == -1) goto werr
; 
3398                 /* Remove the loaded object from memory */ 
3402         dictReleaseIterator(di
); 
3405     if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
; 
3407     /* Make sure data will not remain on the OS's output buffers */ 
3412     /* Use RENAME to make sure the DB file is changed atomically only 
3413      * if the generate DB file is ok. */ 
3414     if (rename(tmpfile
,filename
) == -1) { 
3415         redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
)); 
3419     redisLog(REDIS_NOTICE
,"DB saved on disk"); 
3421     server
.lastsave 
= time(NULL
); 
3427     redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
)); 
3428     if (di
) dictReleaseIterator(di
); 
3432 static int rdbSaveBackground(char *filename
) { 
3435     if (server
.bgsavechildpid 
!= -1) return REDIS_ERR
; 
3436     if (server
.vm_enabled
) waitEmptyIOJobsQueue(); 
3437     if ((childpid 
= fork()) == 0) { 
3439         if (server
.vm_enabled
) vmReopenSwapFile(); 
3441         if (rdbSave(filename
) == REDIS_OK
) { 
3448         if (childpid 
== -1) { 
3449             redisLog(REDIS_WARNING
,"Can't save in background: fork: %s", 
3453         redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
); 
3454         server
.bgsavechildpid 
= childpid
; 
3455         updateDictResizePolicy(); 
3458     return REDIS_OK
; /* unreached */ 
3461 static void rdbRemoveTempFile(pid_t childpid
) { 
3464     snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
); 
3468 static int rdbLoadType(FILE *fp
) { 
3470     if (fread(&type
,1,1,fp
) == 0) return -1; 
3474 static time_t rdbLoadTime(FILE *fp
) { 
3476     if (fread(&t32
,4,1,fp
) == 0) return -1; 
3477     return (time_t) t32
; 
3480 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top 
3481  * of this file for a description of how this are stored on disk. 
3483  * isencoded is set to 1 if the readed length is not actually a length but 
3484  * an "encoding type", check the above comments for more info */ 
3485 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) { 
3486     unsigned char buf
[2]; 
3490     if (isencoded
) *isencoded 
= 0; 
3491     if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
; 
3492     type 
= (buf
[0]&0xC0)>>6; 
3493     if (type 
== REDIS_RDB_6BITLEN
) { 
3494         /* Read a 6 bit len */ 
3496     } else if (type 
== REDIS_RDB_ENCVAL
) { 
3497         /* Read a 6 bit len encoding type */ 
3498         if (isencoded
) *isencoded 
= 1; 
3500     } else if (type 
== REDIS_RDB_14BITLEN
) { 
3501         /* Read a 14 bit len */ 
3502         if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
; 
3503         return ((buf
[0]&0x3F)<<8)|buf
[1]; 
3505         /* Read a 32 bit len */ 
3506         if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
; 
3511 static robj 
*rdbLoadIntegerObject(FILE *fp
, int enctype
) { 
3512     unsigned char enc
[4]; 
3515     if (enctype 
== REDIS_RDB_ENC_INT8
) { 
3516         if (fread(enc
,1,1,fp
) == 0) return NULL
; 
3517         val 
= (signed char)enc
[0]; 
3518     } else if (enctype 
== REDIS_RDB_ENC_INT16
) { 
3520         if (fread(enc
,2,1,fp
) == 0) return NULL
; 
3521         v 
= enc
[0]|(enc
[1]<<8); 
3523     } else if (enctype 
== REDIS_RDB_ENC_INT32
) { 
3525         if (fread(enc
,4,1,fp
) == 0) return NULL
; 
3526         v 
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24); 
3529         val 
= 0; /* anti-warning */ 
3532     return createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",val
)); 
3535 static robj 
*rdbLoadLzfStringObject(FILE*fp
) { 
3536     unsigned int len
, clen
; 
3537     unsigned char *c 
= NULL
; 
3540     if ((clen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
3541     if ((len 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
3542     if ((c 
= zmalloc(clen
)) == NULL
) goto err
; 
3543     if ((val 
= sdsnewlen(NULL
,len
)) == NULL
) goto err
; 
3544     if (fread(c
,clen
,1,fp
) == 0) goto err
; 
3545     if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
; 
3547     return createObject(REDIS_STRING
,val
); 
3554 static robj 
*rdbLoadStringObject(FILE*fp
) { 
3559     len 
= rdbLoadLen(fp
,&isencoded
); 
3562         case REDIS_RDB_ENC_INT8
: 
3563         case REDIS_RDB_ENC_INT16
: 
3564         case REDIS_RDB_ENC_INT32
: 
3565             return rdbLoadIntegerObject(fp
,len
); 
3566         case REDIS_RDB_ENC_LZF
: 
3567             return rdbLoadLzfStringObject(fp
); 
3573     if (len 
== REDIS_RDB_LENERR
) return NULL
; 
3574     val 
= sdsnewlen(NULL
,len
); 
3575     if (len 
&& fread(val
,len
,1,fp
) == 0) { 
3579     return createObject(REDIS_STRING
,val
); 
3582 /* For information about double serialization check rdbSaveDoubleValue() */ 
3583 static int rdbLoadDoubleValue(FILE *fp
, double *val
) { 
3587     if (fread(&len
,1,1,fp
) == 0) return -1; 
3589     case 255: *val 
= R_NegInf
; return 0; 
3590     case 254: *val 
= R_PosInf
; return 0; 
3591     case 253: *val 
= R_Nan
; return 0; 
3593         if (fread(buf
,len
,1,fp
) == 0) return -1; 
3595         sscanf(buf
, "%lg", val
); 
3600 /* Load a Redis object of the specified type from the specified file. 
3601  * On success a newly allocated object is returned, otherwise NULL. */ 
3602 static robj 
*rdbLoadObject(int type
, FILE *fp
) { 
3605     redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
)); 
3606     if (type 
== REDIS_STRING
) { 
3607         /* Read string value */ 
3608         if ((o 
= rdbLoadStringObject(fp
)) == NULL
) return NULL
; 
3609         tryObjectEncoding(o
); 
3610     } else if (type 
== REDIS_LIST 
|| type 
== REDIS_SET
) { 
3611         /* Read list/set value */ 
3614         if ((listlen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
3615         o 
= (type 
== REDIS_LIST
) ? createListObject() : createSetObject(); 
3616         /* It's faster to expand the dict to the right size asap in order 
3617          * to avoid rehashing */ 
3618         if (type 
== REDIS_SET 
&& listlen 
> DICT_HT_INITIAL_SIZE
) 
3619             dictExpand(o
->ptr
,listlen
); 
3620         /* Load every single element of the list/set */ 
3624             if ((ele 
= rdbLoadStringObject(fp
)) == NULL
) return NULL
; 
3625             tryObjectEncoding(ele
); 
3626             if (type 
== REDIS_LIST
) { 
3627                 listAddNodeTail((list
*)o
->ptr
,ele
); 
3629                 dictAdd((dict
*)o
->ptr
,ele
,NULL
); 
3632     } else if (type 
== REDIS_ZSET
) { 
3633         /* Read list/set value */ 
3637         if ((zsetlen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
3638         o 
= createZsetObject(); 
3640         /* Load every single element of the list/set */ 
3643             double *score 
= zmalloc(sizeof(double)); 
3645             if ((ele 
= rdbLoadStringObject(fp
)) == NULL
) return NULL
; 
3646             tryObjectEncoding(ele
); 
3647             if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
; 
3648             dictAdd(zs
->dict
,ele
,score
); 
3649             zslInsert(zs
->zsl
,*score
,ele
); 
3650             incrRefCount(ele
); /* added to skiplist */ 
3652     } else if (type 
== REDIS_HASH
) { 
3655         if ((hashlen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
3656         o 
= createHashObject(); 
3657         /* Too many entries? Use an hash table. */ 
3658         if (hashlen 
> server
.hash_max_zipmap_entries
) 
3659             convertToRealHash(o
); 
3660         /* Load every key/value, then set it into the zipmap or hash 
3661          * table, as needed. */ 
3665             if ((key 
= rdbLoadStringObject(fp
)) == NULL
) return NULL
; 
3666             if ((val 
= rdbLoadStringObject(fp
)) == NULL
) return NULL
; 
3667             /* If we are using a zipmap and there are too big values 
3668              * the object is converted to real hash table encoding. */ 
3669             if (o
->encoding 
!= REDIS_ENCODING_HT 
&& 
3670                (sdslen(key
->ptr
) > server
.hash_max_zipmap_value 
|| 
3671                 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
)) 
3673                     convertToRealHash(o
); 
3676             if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
3677                 unsigned char *zm 
= o
->ptr
; 
3679                 zm 
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
), 
3680                                   val
->ptr
,sdslen(val
->ptr
),NULL
); 
3685                 tryObjectEncoding(key
); 
3686                 tryObjectEncoding(val
); 
3687                 dictAdd((dict
*)o
->ptr
,key
,val
); 
3696 static int rdbLoad(char *filename
) { 
3698     robj 
*keyobj 
= NULL
; 
3700     int type
, retval
, rdbver
; 
3701     dict 
*d 
= server
.db
[0].dict
; 
3702     redisDb 
*db 
= server
.db
+0; 
3704     time_t expiretime 
= -1, now 
= time(NULL
); 
3705     long long loadedkeys 
= 0; 
3707     fp 
= fopen(filename
,"r"); 
3708     if (!fp
) return REDIS_ERR
; 
3709     if (fread(buf
,9,1,fp
) == 0) goto eoferr
; 
3711     if (memcmp(buf
,"REDIS",5) != 0) { 
3713         redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file"); 
3716     rdbver 
= atoi(buf
+5); 
3719         redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
); 
3726         if ((type 
= rdbLoadType(fp
)) == -1) goto eoferr
; 
3727         if (type 
== REDIS_EXPIRETIME
) { 
3728             if ((expiretime 
= rdbLoadTime(fp
)) == -1) goto eoferr
; 
3729             /* We read the time so we need to read the object type again */ 
3730             if ((type 
= rdbLoadType(fp
)) == -1) goto eoferr
; 
3732         if (type 
== REDIS_EOF
) break; 
3733         /* Handle SELECT DB opcode as a special case */ 
3734         if (type 
== REDIS_SELECTDB
) { 
3735             if ((dbid 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) 
3737             if (dbid 
>= (unsigned)server
.dbnum
) { 
3738                 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
); 
3741             db 
= server
.db
+dbid
; 
3746         if ((keyobj 
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
; 
3748         if ((o 
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
; 
3749         /* Add the new object in the hash table */ 
3750         retval 
= dictAdd(d
,keyobj
,o
); 
3751         if (retval 
== DICT_ERR
) { 
3752             redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj
->ptr
); 
3755         /* Set the expire time if needed */ 
3756         if (expiretime 
!= -1) { 
3757             setExpire(db
,keyobj
,expiretime
); 
3758             /* Delete this key if already expired */ 
3759             if (expiretime 
< now
) deleteKey(db
,keyobj
); 
3763         /* Handle swapping while loading big datasets when VM is on */ 
3765         if (server
.vm_enabled 
&& (loadedkeys 
% 5000) == 0) { 
3766             while (zmalloc_used_memory() > server
.vm_max_memory
) { 
3767                 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break; 
3774 eoferr
: /* unexpected end of file is handled here with a fatal exit */ 
3775     if (keyobj
) decrRefCount(keyobj
); 
3776     redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now."); 
3778     return REDIS_ERR
; /* Just to avoid warning */ 
3781 /*================================== Commands =============================== */ 
3783 static void authCommand(redisClient 
*c
) { 
3784     if (!server
.requirepass 
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) { 
3785       c
->authenticated 
= 1; 
3786       addReply(c
,shared
.ok
); 
3788       c
->authenticated 
= 0; 
3789       addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n")); 
3793 static void pingCommand(redisClient 
*c
) { 
3794     addReply(c
,shared
.pong
); 
3797 static void echoCommand(redisClient 
*c
) { 
3798     addReplyBulk(c
,c
->argv
[1]); 
3801 /*=================================== Strings =============================== */ 
3803 static void setGenericCommand(redisClient 
*c
, int nx
) { 
3806     if (nx
) deleteIfVolatile(c
->db
,c
->argv
[1]); 
3807     retval 
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]); 
3808     if (retval 
== DICT_ERR
) { 
3810             /* If the key is about a swapped value, we want a new key object 
3811              * to overwrite the old. So we delete the old key in the database. 
3812              * This will also make sure that swap pages about the old object 
3813              * will be marked as free. */ 
3814             if (server
.vm_enabled 
&& deleteIfSwapped(c
->db
,c
->argv
[1])) 
3815                 incrRefCount(c
->argv
[1]); 
3816             dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]); 
3817             incrRefCount(c
->argv
[2]); 
3819             addReply(c
,shared
.czero
); 
3823         incrRefCount(c
->argv
[1]); 
3824         incrRefCount(c
->argv
[2]); 
3827     removeExpire(c
->db
,c
->argv
[1]); 
3828     addReply(c
, nx 
? shared
.cone 
: shared
.ok
); 
3831 static void setCommand(redisClient 
*c
) { 
3832     setGenericCommand(c
,0); 
3835 static void setnxCommand(redisClient 
*c
) { 
3836     setGenericCommand(c
,1); 
3839 static int getGenericCommand(redisClient 
*c
) { 
3842     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
) 
3845     if (o
->type 
!= REDIS_STRING
) { 
3846         addReply(c
,shared
.wrongtypeerr
); 
3854 static void getCommand(redisClient 
*c
) { 
3855     getGenericCommand(c
); 
3858 static void getsetCommand(redisClient 
*c
) { 
3859     if (getGenericCommand(c
) == REDIS_ERR
) return; 
3860     if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) { 
3861         dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]); 
3863         incrRefCount(c
->argv
[1]); 
3865     incrRefCount(c
->argv
[2]); 
3867     removeExpire(c
->db
,c
->argv
[1]); 
3870 static void mgetCommand(redisClient 
*c
) { 
3873     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1)); 
3874     for (j 
= 1; j 
< c
->argc
; j
++) { 
3875         robj 
*o 
= lookupKeyRead(c
->db
,c
->argv
[j
]); 
3877             addReply(c
,shared
.nullbulk
); 
3879             if (o
->type 
!= REDIS_STRING
) { 
3880                 addReply(c
,shared
.nullbulk
); 
3888 static void msetGenericCommand(redisClient 
*c
, int nx
) { 
3889     int j
, busykeys 
= 0; 
3891     if ((c
->argc 
% 2) == 0) { 
3892         addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n")); 
3895     /* Handle the NX flag. The MSETNX semantic is to return zero and don't 
3896      * set nothing at all if at least one already key exists. */ 
3898         for (j 
= 1; j 
< c
->argc
; j 
+= 2) { 
3899             if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) { 
3905         addReply(c
, shared
.czero
); 
3909     for (j 
= 1; j 
< c
->argc
; j 
+= 2) { 
3912         tryObjectEncoding(c
->argv
[j
+1]); 
3913         retval 
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]); 
3914         if (retval 
== DICT_ERR
) { 
3915             dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]); 
3916             incrRefCount(c
->argv
[j
+1]); 
3918             incrRefCount(c
->argv
[j
]); 
3919             incrRefCount(c
->argv
[j
+1]); 
3921         removeExpire(c
->db
,c
->argv
[j
]); 
3923     server
.dirty 
+= (c
->argc
-1)/2; 
3924     addReply(c
, nx 
? shared
.cone 
: shared
.ok
); 
3927 static void msetCommand(redisClient 
*c
) { 
3928     msetGenericCommand(c
,0); 
3931 static void msetnxCommand(redisClient 
*c
) { 
3932     msetGenericCommand(c
,1); 
3935 static void incrDecrCommand(redisClient 
*c
, long long incr
) { 
3940     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
3944         if (o
->type 
!= REDIS_STRING
) { 
3949             if (o
->encoding 
== REDIS_ENCODING_RAW
) 
3950                 value 
= strtoll(o
->ptr
, &eptr
, 10); 
3951             else if (o
->encoding 
== REDIS_ENCODING_INT
) 
3952                 value 
= (long)o
->ptr
; 
3954                 redisAssert(1 != 1); 
3959     o 
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
)); 
3960     tryObjectEncoding(o
); 
3961     retval 
= dictAdd(c
->db
->dict
,c
->argv
[1],o
); 
3962     if (retval 
== DICT_ERR
) { 
3963         dictReplace(c
->db
->dict
,c
->argv
[1],o
); 
3964         removeExpire(c
->db
,c
->argv
[1]); 
3966         incrRefCount(c
->argv
[1]); 
3969     addReply(c
,shared
.colon
); 
3971     addReply(c
,shared
.crlf
); 
3974 static void incrCommand(redisClient 
*c
) { 
3975     incrDecrCommand(c
,1); 
3978 static void decrCommand(redisClient 
*c
) { 
3979     incrDecrCommand(c
,-1); 
3982 static void incrbyCommand(redisClient 
*c
) { 
3983     long long incr 
= strtoll(c
->argv
[2]->ptr
, NULL
, 10); 
3984     incrDecrCommand(c
,incr
); 
3987 static void decrbyCommand(redisClient 
*c
) { 
3988     long long incr 
= strtoll(c
->argv
[2]->ptr
, NULL
, 10); 
3989     incrDecrCommand(c
,-incr
); 
3992 static void appendCommand(redisClient 
*c
) { 
3997     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
3999         /* Create the key */ 
4000         retval 
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]); 
4001         incrRefCount(c
->argv
[1]); 
4002         incrRefCount(c
->argv
[2]); 
4003         totlen 
= stringObjectLen(c
->argv
[2]); 
4007         de 
= dictFind(c
->db
->dict
,c
->argv
[1]); 
4010         o 
= dictGetEntryVal(de
); 
4011         if (o
->type 
!= REDIS_STRING
) { 
4012             addReply(c
,shared
.wrongtypeerr
); 
4015         /* If the object is specially encoded or shared we have to make 
4017         if (o
->refcount 
!= 1 || o
->encoding 
!= REDIS_ENCODING_RAW
) { 
4018             robj 
*decoded 
= getDecodedObject(o
); 
4020             o 
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
)); 
4021             decrRefCount(decoded
); 
4022             dictReplace(c
->db
->dict
,c
->argv
[1],o
); 
4025         if (c
->argv
[2]->encoding 
== REDIS_ENCODING_RAW
) { 
4026             o
->ptr 
= sdscatlen(o
->ptr
, 
4027                 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
)); 
4029             o
->ptr 
= sdscatprintf(o
->ptr
, "%ld", 
4030                 (unsigned long) c
->argv
[2]->ptr
); 
4032         totlen 
= sdslen(o
->ptr
); 
4035     addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
)); 
4038 static void substrCommand(redisClient 
*c
) { 
4040     long start 
= atoi(c
->argv
[2]->ptr
); 
4041     long end 
= atoi(c
->argv
[3]->ptr
); 
4042     size_t rangelen
, strlen
; 
4045     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
4046         checkType(c
,o
,REDIS_STRING
)) return; 
4048     o 
= getDecodedObject(o
); 
4049     strlen 
= sdslen(o
->ptr
); 
4051     /* convert negative indexes */ 
4052     if (start 
< 0) start 
= strlen
+start
; 
4053     if (end 
< 0) end 
= strlen
+end
; 
4054     if (start 
< 0) start 
= 0; 
4055     if (end 
< 0) end 
= 0; 
4057     /* indexes sanity checks */ 
4058     if (start 
> end 
|| (size_t)start 
>= strlen
) { 
4059         /* Out of range start or start > end result in null reply */ 
4060         addReply(c
,shared
.nullbulk
); 
4064     if ((size_t)end 
>= strlen
) end 
= strlen
-1; 
4065     rangelen 
= (end
-start
)+1; 
4067     /* Return the result */ 
4068     addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
)); 
4069     range 
= sdsnewlen((char*)o
->ptr
+start
,rangelen
); 
4070     addReplySds(c
,range
); 
4071     addReply(c
,shared
.crlf
); 
4075 /* ========================= Type agnostic commands ========================= */ 
4077 static void delCommand(redisClient 
*c
) { 
4080     for (j 
= 1; j 
< c
->argc
; j
++) { 
4081         if (deleteKey(c
->db
,c
->argv
[j
])) { 
4086     addReplyLong(c
,deleted
); 
4089 static void existsCommand(redisClient 
*c
) { 
4090     addReply(c
,lookupKeyRead(c
->db
,c
->argv
[1]) ? shared
.cone 
: shared
.czero
); 
4093 static void selectCommand(redisClient 
*c
) { 
4094     int id 
= atoi(c
->argv
[1]->ptr
); 
4096     if (selectDb(c
,id
) == REDIS_ERR
) { 
4097         addReplySds(c
,sdsnew("-ERR invalid DB index\r\n")); 
4099         addReply(c
,shared
.ok
); 
4103 static void randomkeyCommand(redisClient 
*c
) { 
4107         de 
= dictGetRandomKey(c
->db
->dict
); 
4108         if (!de 
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break; 
4111         addReply(c
,shared
.plus
); 
4112         addReply(c
,shared
.crlf
); 
4114         addReply(c
,shared
.plus
); 
4115         addReply(c
,dictGetEntryKey(de
)); 
4116         addReply(c
,shared
.crlf
); 
4120 static void keysCommand(redisClient 
*c
) { 
4123     sds pattern 
= c
->argv
[1]->ptr
; 
4124     int plen 
= sdslen(pattern
); 
4125     unsigned long numkeys 
= 0; 
4126     robj 
*lenobj 
= createObject(REDIS_STRING
,NULL
); 
4128     di 
= dictGetIterator(c
->db
->dict
); 
4130     decrRefCount(lenobj
); 
4131     while((de 
= dictNext(di
)) != NULL
) { 
4132         robj 
*keyobj 
= dictGetEntryKey(de
); 
4134         sds key 
= keyobj
->ptr
; 
4135         if ((pattern
[0] == '*' && pattern
[1] == '\0') || 
4136             stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) { 
4137             if (expireIfNeeded(c
->db
,keyobj
) == 0) { 
4138                 addReplyBulk(c
,keyobj
); 
4143     dictReleaseIterator(di
); 
4144     lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
); 
4147 static void dbsizeCommand(redisClient 
*c
) { 
4149         sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
))); 
4152 static void lastsaveCommand(redisClient 
*c
) { 
4154         sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
)); 
4157 static void typeCommand(redisClient 
*c
) { 
4161     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
4166         case REDIS_STRING
: type 
= "+string"; break; 
4167         case REDIS_LIST
: type 
= "+list"; break; 
4168         case REDIS_SET
: type 
= "+set"; break; 
4169         case REDIS_ZSET
: type 
= "+zset"; break; 
4170         case REDIS_HASH
: type 
= "+hash"; break; 
4171         default: type 
= "+unknown"; break; 
4174     addReplySds(c
,sdsnew(type
)); 
4175     addReply(c
,shared
.crlf
); 
4178 static void saveCommand(redisClient 
*c
) { 
4179     if (server
.bgsavechildpid 
!= -1) { 
4180         addReplySds(c
,sdsnew("-ERR background save in progress\r\n")); 
4183     if (rdbSave(server
.dbfilename
) == REDIS_OK
) { 
4184         addReply(c
,shared
.ok
); 
4186         addReply(c
,shared
.err
); 
4190 static void bgsaveCommand(redisClient 
*c
) { 
4191     if (server
.bgsavechildpid 
!= -1) { 
4192         addReplySds(c
,sdsnew("-ERR background save already in progress\r\n")); 
4195     if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) { 
4196         char *status 
= "+Background saving started\r\n"; 
4197         addReplySds(c
,sdsnew(status
)); 
4199         addReply(c
,shared
.err
); 
4203 static void shutdownCommand(redisClient 
*c
) { 
4204     redisLog(REDIS_WARNING
,"User requested shutdown, saving DB..."); 
4205     /* Kill the saving child if there is a background saving in progress. 
4206        We want to avoid race conditions, for instance our saving child may 
4207        overwrite the synchronous saving did by SHUTDOWN. */ 
4208     if (server
.bgsavechildpid 
!= -1) { 
4209         redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!"); 
4210         kill(server
.bgsavechildpid
,SIGKILL
); 
4211         rdbRemoveTempFile(server
.bgsavechildpid
); 
4213     if (server
.appendonly
) { 
4214         /* Append only file: fsync() the AOF and exit */ 
4215         fsync(server
.appendfd
); 
4216         if (server
.vm_enabled
) unlink(server
.vm_swap_file
); 
4219         /* Snapshotting. Perform a SYNC SAVE and exit */ 
4220         if (rdbSave(server
.dbfilename
) == REDIS_OK
) { 
4221             if (server
.daemonize
) 
4222                 unlink(server
.pidfile
); 
4223             redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory()); 
4224             redisLog(REDIS_WARNING
,"Server exit now, bye bye..."); 
4225             if (server
.vm_enabled
) unlink(server
.vm_swap_file
); 
4228             /* Ooops.. error saving! The best we can do is to continue 
4229              * operating. Note that if there was a background saving process, 
4230              * in the next cron() Redis will be notified that the background 
4231              * saving aborted, handling special stuff like slaves pending for 
4232              * synchronization... */ 
4233             redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");  
4235                 sdsnew("-ERR can't quit, problems saving the DB\r\n")); 
4240 static void renameGenericCommand(redisClient 
*c
, int nx
) { 
4243     /* To use the same key as src and dst is probably an error */ 
4244     if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) { 
4245         addReply(c
,shared
.sameobjecterr
); 
4249     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
) 
4253     deleteIfVolatile(c
->db
,c
->argv
[2]); 
4254     if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) { 
4257             addReply(c
,shared
.czero
); 
4260         dictReplace(c
->db
->dict
,c
->argv
[2],o
); 
4262         incrRefCount(c
->argv
[2]); 
4264     deleteKey(c
->db
,c
->argv
[1]); 
4266     addReply(c
,nx 
? shared
.cone 
: shared
.ok
); 
4269 static void renameCommand(redisClient 
*c
) { 
4270     renameGenericCommand(c
,0); 
4273 static void renamenxCommand(redisClient 
*c
) { 
4274     renameGenericCommand(c
,1); 
4277 static void moveCommand(redisClient 
*c
) { 
4282     /* Obtain source and target DB pointers */ 
4285     if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) { 
4286         addReply(c
,shared
.outofrangeerr
); 
4290     selectDb(c
,srcid
); /* Back to the source DB */ 
4292     /* If the user is moving using as target the same 
4293      * DB as the source DB it is probably an error. */ 
4295         addReply(c
,shared
.sameobjecterr
); 
4299     /* Check if the element exists and get a reference */ 
4300     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4302         addReply(c
,shared
.czero
); 
4306     /* Try to add the element to the target DB */ 
4307     deleteIfVolatile(dst
,c
->argv
[1]); 
4308     if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) { 
4309         addReply(c
,shared
.czero
); 
4312     incrRefCount(c
->argv
[1]); 
4315     /* OK! key moved, free the entry in the source DB */ 
4316     deleteKey(src
,c
->argv
[1]); 
4318     addReply(c
,shared
.cone
); 
4321 /* =================================== Lists ================================ */ 
4322 static void pushGenericCommand(redisClient 
*c
, int where
) { 
4326     lobj 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4328         if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) { 
4329             addReply(c
,shared
.cone
); 
4332         lobj 
= createListObject(); 
4334         if (where 
== REDIS_HEAD
) { 
4335             listAddNodeHead(list
,c
->argv
[2]); 
4337             listAddNodeTail(list
,c
->argv
[2]); 
4339         dictAdd(c
->db
->dict
,c
->argv
[1],lobj
); 
4340         incrRefCount(c
->argv
[1]); 
4341         incrRefCount(c
->argv
[2]); 
4343         if (lobj
->type 
!= REDIS_LIST
) { 
4344             addReply(c
,shared
.wrongtypeerr
); 
4347         if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) { 
4348             addReply(c
,shared
.cone
); 
4352         if (where 
== REDIS_HEAD
) { 
4353             listAddNodeHead(list
,c
->argv
[2]); 
4355             listAddNodeTail(list
,c
->argv
[2]); 
4357         incrRefCount(c
->argv
[2]); 
4360     addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",listLength(list
))); 
4363 static void lpushCommand(redisClient 
*c
) { 
4364     pushGenericCommand(c
,REDIS_HEAD
); 
4367 static void rpushCommand(redisClient 
*c
) { 
4368     pushGenericCommand(c
,REDIS_TAIL
); 
4371 static void llenCommand(redisClient 
*c
) { 
4375     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
4376         checkType(c
,o
,REDIS_LIST
)) return; 
4379     addReplyUlong(c
,listLength(l
)); 
4382 static void lindexCommand(redisClient 
*c
) { 
4384     int index 
= atoi(c
->argv
[2]->ptr
); 
4388     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
4389         checkType(c
,o
,REDIS_LIST
)) return; 
4392     ln 
= listIndex(list
, index
); 
4394         addReply(c
,shared
.nullbulk
); 
4396         robj 
*ele 
= listNodeValue(ln
); 
4397         addReplyBulk(c
,ele
); 
4401 static void lsetCommand(redisClient 
*c
) { 
4403     int index 
= atoi(c
->argv
[2]->ptr
); 
4407     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL 
|| 
4408         checkType(c
,o
,REDIS_LIST
)) return; 
4411     ln 
= listIndex(list
, index
); 
4413         addReply(c
,shared
.outofrangeerr
); 
4415         robj 
*ele 
= listNodeValue(ln
); 
4418         listNodeValue(ln
) = c
->argv
[3]; 
4419         incrRefCount(c
->argv
[3]); 
4420         addReply(c
,shared
.ok
); 
4425 static void popGenericCommand(redisClient 
*c
, int where
) { 
4430     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
4431         checkType(c
,o
,REDIS_LIST
)) return; 
4434     if (where 
== REDIS_HEAD
) 
4435         ln 
= listFirst(list
); 
4437         ln 
= listLast(list
); 
4440         addReply(c
,shared
.nullbulk
); 
4442         robj 
*ele 
= listNodeValue(ln
); 
4443         addReplyBulk(c
,ele
); 
4444         listDelNode(list
,ln
); 
4445         if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]); 
4450 static void lpopCommand(redisClient 
*c
) { 
4451     popGenericCommand(c
,REDIS_HEAD
); 
4454 static void rpopCommand(redisClient 
*c
) { 
4455     popGenericCommand(c
,REDIS_TAIL
); 
4458 static void lrangeCommand(redisClient 
*c
) { 
4460     int start 
= atoi(c
->argv
[2]->ptr
); 
4461     int end 
= atoi(c
->argv
[3]->ptr
); 
4468     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL 
|| 
4469         checkType(c
,o
,REDIS_LIST
)) return; 
4471     llen 
= listLength(list
); 
4473     /* convert negative indexes */ 
4474     if (start 
< 0) start 
= llen
+start
; 
4475     if (end 
< 0) end 
= llen
+end
; 
4476     if (start 
< 0) start 
= 0; 
4477     if (end 
< 0) end 
= 0; 
4479     /* indexes sanity checks */ 
4480     if (start 
> end 
|| start 
>= llen
) { 
4481         /* Out of range start or start > end result in empty list */ 
4482         addReply(c
,shared
.emptymultibulk
); 
4485     if (end 
>= llen
) end 
= llen
-1; 
4486     rangelen 
= (end
-start
)+1; 
4488     /* Return the result in form of a multi-bulk reply */ 
4489     ln 
= listIndex(list
, start
); 
4490     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
)); 
4491     for (j 
= 0; j 
< rangelen
; j
++) { 
4492         ele 
= listNodeValue(ln
); 
4493         addReplyBulk(c
,ele
); 
4498 static void ltrimCommand(redisClient 
*c
) { 
4500     int start 
= atoi(c
->argv
[2]->ptr
); 
4501     int end 
= atoi(c
->argv
[3]->ptr
); 
4503     int j
, ltrim
, rtrim
; 
4507     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL 
|| 
4508         checkType(c
,o
,REDIS_LIST
)) return; 
4510     llen 
= listLength(list
); 
4512     /* convert negative indexes */ 
4513     if (start 
< 0) start 
= llen
+start
; 
4514     if (end 
< 0) end 
= llen
+end
; 
4515     if (start 
< 0) start 
= 0; 
4516     if (end 
< 0) end 
= 0; 
4518     /* indexes sanity checks */ 
4519     if (start 
> end 
|| start 
>= llen
) { 
4520         /* Out of range start or start > end result in empty list */ 
4524         if (end 
>= llen
) end 
= llen
-1; 
4529     /* Remove list elements to perform the trim */ 
4530     for (j 
= 0; j 
< ltrim
; j
++) { 
4531         ln 
= listFirst(list
); 
4532         listDelNode(list
,ln
); 
4534     for (j 
= 0; j 
< rtrim
; j
++) { 
4535         ln 
= listLast(list
); 
4536         listDelNode(list
,ln
); 
4538     if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]); 
4540     addReply(c
,shared
.ok
); 
4543 static void lremCommand(redisClient 
*c
) { 
4546     listNode 
*ln
, *next
; 
4547     int toremove 
= atoi(c
->argv
[2]->ptr
); 
4551     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
4552         checkType(c
,o
,REDIS_LIST
)) return; 
4556         toremove 
= -toremove
; 
4559     ln 
= fromtail 
? list
->tail 
: list
->head
; 
4561         robj 
*ele 
= listNodeValue(ln
); 
4563         next 
= fromtail 
? ln
->prev 
: ln
->next
; 
4564         if (compareStringObjects(ele
,c
->argv
[3]) == 0) { 
4565             listDelNode(list
,ln
); 
4568             if (toremove 
&& removed 
== toremove
) break; 
4572     if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]); 
4573     addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
)); 
4576 /* This is the semantic of this command: 
4577  *  RPOPLPUSH srclist dstlist: 
4578  *   IF LLEN(srclist) > 0 
4579  *     element = RPOP srclist 
4580  *     LPUSH dstlist element 
4587  * The idea is to be able to get an element from a list in a reliable way 
4588  * since the element is not just returned but pushed against another list 
4589  * as well. This command was originally proposed by Ezra Zygmuntowicz. 
4591 static void rpoplpushcommand(redisClient 
*c
) { 
4596     if ((sobj 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
4597         checkType(c
,sobj
,REDIS_LIST
)) return; 
4598     srclist 
= sobj
->ptr
; 
4599     ln 
= listLast(srclist
); 
4602         addReply(c
,shared
.nullbulk
); 
4604         robj 
*dobj 
= lookupKeyWrite(c
->db
,c
->argv
[2]); 
4605         robj 
*ele 
= listNodeValue(ln
); 
4608         if (dobj 
&& dobj
->type 
!= REDIS_LIST
) { 
4609             addReply(c
,shared
.wrongtypeerr
); 
4613         /* Add the element to the target list (unless it's directly 
4614          * passed to some BLPOP-ing client */ 
4615         if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) { 
4617                 /* Create the list if the key does not exist */ 
4618                 dobj 
= createListObject(); 
4619                 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
); 
4620                 incrRefCount(c
->argv
[2]); 
4622             dstlist 
= dobj
->ptr
; 
4623             listAddNodeHead(dstlist
,ele
); 
4627         /* Send the element to the client as reply as well */ 
4628         addReplyBulk(c
,ele
); 
4630         /* Finally remove the element from the source list */ 
4631         listDelNode(srclist
,ln
); 
4632         if (listLength(srclist
) == 0) deleteKey(c
->db
,c
->argv
[1]); 
4637 /* ==================================== Sets ================================ */ 
4639 static void saddCommand(redisClient 
*c
) { 
4642     set 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4644         set 
= createSetObject(); 
4645         dictAdd(c
->db
->dict
,c
->argv
[1],set
); 
4646         incrRefCount(c
->argv
[1]); 
4648         if (set
->type 
!= REDIS_SET
) { 
4649             addReply(c
,shared
.wrongtypeerr
); 
4653     if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) { 
4654         incrRefCount(c
->argv
[2]); 
4656         addReply(c
,shared
.cone
); 
4658         addReply(c
,shared
.czero
); 
4662 static void sremCommand(redisClient 
*c
) { 
4665     if ((set 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
4666         checkType(c
,set
,REDIS_SET
)) return; 
4668     if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) { 
4670         if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
); 
4671         if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]); 
4672         addReply(c
,shared
.cone
); 
4674         addReply(c
,shared
.czero
); 
4678 static void smoveCommand(redisClient 
*c
) { 
4679     robj 
*srcset
, *dstset
; 
4681     srcset 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4682     dstset 
= lookupKeyWrite(c
->db
,c
->argv
[2]); 
4684     /* If the source key does not exist return 0, if it's of the wrong type 
4686     if (srcset 
== NULL 
|| srcset
->type 
!= REDIS_SET
) { 
4687         addReply(c
, srcset 
? shared
.wrongtypeerr 
: shared
.czero
); 
4690     /* Error if the destination key is not a set as well */ 
4691     if (dstset 
&& dstset
->type 
!= REDIS_SET
) { 
4692         addReply(c
,shared
.wrongtypeerr
); 
4695     /* Remove the element from the source set */ 
4696     if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) { 
4697         /* Key not found in the src set! return zero */ 
4698         addReply(c
,shared
.czero
); 
4701     if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset 
!= dstset
) 
4702         deleteKey(c
->db
,c
->argv
[1]); 
4704     /* Add the element to the destination set */ 
4706         dstset 
= createSetObject(); 
4707         dictAdd(c
->db
->dict
,c
->argv
[2],dstset
); 
4708         incrRefCount(c
->argv
[2]); 
4710     if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
) 
4711         incrRefCount(c
->argv
[3]); 
4712     addReply(c
,shared
.cone
); 
4715 static void sismemberCommand(redisClient 
*c
) { 
4718     if ((set 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
4719         checkType(c
,set
,REDIS_SET
)) return; 
4721     if (dictFind(set
->ptr
,c
->argv
[2])) 
4722         addReply(c
,shared
.cone
); 
4724         addReply(c
,shared
.czero
); 
4727 static void scardCommand(redisClient 
*c
) { 
4731     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
4732         checkType(c
,o
,REDIS_SET
)) return; 
4735     addReplyUlong(c
,dictSize(s
)); 
4738 static void spopCommand(redisClient 
*c
) { 
4742     if ((set 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
4743         checkType(c
,set
,REDIS_SET
)) return; 
4745     de 
= dictGetRandomKey(set
->ptr
); 
4747         addReply(c
,shared
.nullbulk
); 
4749         robj 
*ele 
= dictGetEntryKey(de
); 
4751         addReplyBulk(c
,ele
); 
4752         dictDelete(set
->ptr
,ele
); 
4753         if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
); 
4754         if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]); 
4759 static void srandmemberCommand(redisClient 
*c
) { 
4763     if ((set 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
4764         checkType(c
,set
,REDIS_SET
)) return; 
4766     de 
= dictGetRandomKey(set
->ptr
); 
4768         addReply(c
,shared
.nullbulk
); 
4770         robj 
*ele 
= dictGetEntryKey(de
); 
4772         addReplyBulk(c
,ele
); 
4776 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) { 
4777     dict 
**d1 
= (void*) s1
, **d2 
= (void*) s2
; 
4779     return dictSize(*d1
)-dictSize(*d2
); 
4782 static void sinterGenericCommand(redisClient 
*c
, robj 
**setskeys
, unsigned long setsnum
, robj 
*dstkey
) { 
4783     dict 
**dv 
= zmalloc(sizeof(dict
*)*setsnum
); 
4786     robj 
*lenobj 
= NULL
, *dstset 
= NULL
; 
4787     unsigned long j
, cardinality 
= 0; 
4789     for (j 
= 0; j 
< setsnum
; j
++) { 
4793                     lookupKeyWrite(c
->db
,setskeys
[j
]) : 
4794                     lookupKeyRead(c
->db
,setskeys
[j
]); 
4798                 if (deleteKey(c
->db
,dstkey
)) 
4800                 addReply(c
,shared
.czero
); 
4802                 addReply(c
,shared
.nullmultibulk
); 
4806         if (setobj
->type 
!= REDIS_SET
) { 
4808             addReply(c
,shared
.wrongtypeerr
); 
4811         dv
[j
] = setobj
->ptr
; 
4813     /* Sort sets from the smallest to largest, this will improve our 
4814      * algorithm's performace */ 
4815     qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
); 
4817     /* The first thing we should output is the total number of elements... 
4818      * since this is a multi-bulk write, but at this stage we don't know 
4819      * the intersection set size, so we use a trick, append an empty object 
4820      * to the output list and save the pointer to later modify it with the 
4823         lenobj 
= createObject(REDIS_STRING
,NULL
); 
4825         decrRefCount(lenobj
); 
4827         /* If we have a target key where to store the resulting set 
4828          * create this key with an empty set inside */ 
4829         dstset 
= createSetObject(); 
4832     /* Iterate all the elements of the first (smallest) set, and test 
4833      * the element against all the other sets, if at least one set does 
4834      * not include the element it is discarded */ 
4835     di 
= dictGetIterator(dv
[0]); 
4837     while((de 
= dictNext(di
)) != NULL
) { 
4840         for (j 
= 1; j 
< setsnum
; j
++) 
4841             if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break; 
4843             continue; /* at least one set does not contain the member */ 
4844         ele 
= dictGetEntryKey(de
); 
4846             addReplyBulk(c
,ele
); 
4849             dictAdd(dstset
->ptr
,ele
,NULL
); 
4853     dictReleaseIterator(di
); 
4856         /* Store the resulting set into the target, if the intersection 
4857          * is not an empty set. */ 
4858         deleteKey(c
->db
,dstkey
); 
4859         if (dictSize((dict
*)dstset
->ptr
) > 0) { 
4860             dictAdd(c
->db
->dict
,dstkey
,dstset
); 
4861             incrRefCount(dstkey
); 
4862             addReplyLong(c
,dictSize((dict
*)dstset
->ptr
)); 
4864             decrRefCount(dstset
); 
4865             addReply(c
,shared
.czero
); 
4869         lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
); 
4874 static void sinterCommand(redisClient 
*c
) { 
4875     sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
); 
4878 static void sinterstoreCommand(redisClient 
*c
) { 
4879     sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]); 
4882 #define REDIS_OP_UNION 0 
4883 #define REDIS_OP_DIFF 1 
4884 #define REDIS_OP_INTER 2 
4886 static void sunionDiffGenericCommand(redisClient 
*c
, robj 
**setskeys
, int setsnum
, robj 
*dstkey
, int op
) { 
4887     dict 
**dv 
= zmalloc(sizeof(dict
*)*setsnum
); 
4890     robj 
*dstset 
= NULL
; 
4891     int j
, cardinality 
= 0; 
4893     for (j 
= 0; j 
< setsnum
; j
++) { 
4897                     lookupKeyWrite(c
->db
,setskeys
[j
]) : 
4898                     lookupKeyRead(c
->db
,setskeys
[j
]); 
4903         if (setobj
->type 
!= REDIS_SET
) { 
4905             addReply(c
,shared
.wrongtypeerr
); 
4908         dv
[j
] = setobj
->ptr
; 
4911     /* We need a temp set object to store our union. If the dstkey 
4912      * is not NULL (that is, we are inside an SUNIONSTORE operation) then 
4913      * this set object will be the resulting object to set into the target key*/ 
4914     dstset 
= createSetObject(); 
4916     /* Iterate all the elements of all the sets, add every element a single 
4917      * time to the result set */ 
4918     for (j 
= 0; j 
< setsnum
; j
++) { 
4919         if (op 
== REDIS_OP_DIFF 
&& j 
== 0 && !dv
[j
]) break; /* result set is empty */ 
4920         if (!dv
[j
]) continue; /* non existing keys are like empty sets */ 
4922         di 
= dictGetIterator(dv
[j
]); 
4924         while((de 
= dictNext(di
)) != NULL
) { 
4927             /* dictAdd will not add the same element multiple times */ 
4928             ele 
= dictGetEntryKey(de
); 
4929             if (op 
== REDIS_OP_UNION 
|| j 
== 0) { 
4930                 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) { 
4934             } else if (op 
== REDIS_OP_DIFF
) { 
4935                 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) { 
4940         dictReleaseIterator(di
); 
4942         /* result set is empty? Exit asap. */ 
4943         if (op 
== REDIS_OP_DIFF 
&& cardinality 
== 0) break; 
4946     /* Output the content of the resulting set, if not in STORE mode */ 
4948         addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
)); 
4949         di 
= dictGetIterator(dstset
->ptr
); 
4950         while((de 
= dictNext(di
)) != NULL
) { 
4953             ele 
= dictGetEntryKey(de
); 
4954             addReplyBulk(c
,ele
); 
4956         dictReleaseIterator(di
); 
4957         decrRefCount(dstset
); 
4959         /* If we have a target key where to store the resulting set 
4960          * create this key with the result set inside */ 
4961         deleteKey(c
->db
,dstkey
); 
4962         if (dictSize((dict
*)dstset
->ptr
) > 0) { 
4963             dictAdd(c
->db
->dict
,dstkey
,dstset
); 
4964             incrRefCount(dstkey
); 
4965             addReplyLong(c
,dictSize((dict
*)dstset
->ptr
)); 
4967             decrRefCount(dstset
); 
4968             addReply(c
,shared
.czero
); 
4975 static void sunionCommand(redisClient 
*c
) { 
4976     sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
); 
4979 static void sunionstoreCommand(redisClient 
*c
) { 
4980     sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
); 
4983 static void sdiffCommand(redisClient 
*c
) { 
4984     sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
); 
4987 static void sdiffstoreCommand(redisClient 
*c
) { 
4988     sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
); 
4991 /* ==================================== ZSets =============================== */ 
4993 /* ZSETs are ordered sets using two data structures to hold the same elements 
4994  * in order to get O(log(N)) INSERT and REMOVE operations into a sorted 
4997  * The elements are added to an hash table mapping Redis objects to scores. 
4998  * At the same time the elements are added to a skip list mapping scores 
4999  * to Redis objects (so objects are sorted by scores in this "view"). */ 
5001 /* This skiplist implementation is almost a C translation of the original 
5002  * algorithm described by William Pugh in "Skip Lists: A Probabilistic 
5003  * Alternative to Balanced Trees", modified in three ways: 
5004  * a) this implementation allows for repeated values. 
5005  * b) the comparison is not just by key (our 'score') but by satellite data. 
5006  * c) there is a back pointer, so it's a doubly linked list with the back 
5007  * pointers being only at "level 1". This allows to traverse the list 
5008  * from tail to head, useful for ZREVRANGE. */ 
5010 static zskiplistNode 
*zslCreateNode(int level
, double score
, robj 
*obj
) { 
5011     zskiplistNode 
*zn 
= zmalloc(sizeof(*zn
)); 
5013     zn
->forward 
= zmalloc(sizeof(zskiplistNode
*) * level
); 
5015         zn
->span 
= zmalloc(sizeof(unsigned int) * (level 
- 1)); 
5021 static zskiplist 
*zslCreate(void) { 
5025     zsl 
= zmalloc(sizeof(*zsl
)); 
5028     zsl
->header 
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
); 
5029     for (j 
= 0; j 
< ZSKIPLIST_MAXLEVEL
; j
++) { 
5030         zsl
->header
->forward
[j
] = NULL
; 
5032         /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */ 
5033         if (j 
< ZSKIPLIST_MAXLEVEL
-1) 
5034             zsl
->header
->span
[j
] = 0; 
5036     zsl
->header
->backward 
= NULL
; 
5041 static void zslFreeNode(zskiplistNode 
*node
) { 
5042     decrRefCount(node
->obj
); 
5043     zfree(node
->forward
); 
5048 static void zslFree(zskiplist 
*zsl
) { 
5049     zskiplistNode 
*node 
= zsl
->header
->forward
[0], *next
; 
5051     zfree(zsl
->header
->forward
); 
5052     zfree(zsl
->header
->span
); 
5055         next 
= node
->forward
[0]; 
5062 static int zslRandomLevel(void) { 
5064     while ((random()&0xFFFF) < (ZSKIPLIST_P 
* 0xFFFF)) 
5066     return (level
<ZSKIPLIST_MAXLEVEL
) ? level 
: ZSKIPLIST_MAXLEVEL
; 
5069 static void zslInsert(zskiplist 
*zsl
, double score
, robj 
*obj
) { 
5070     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
5071     unsigned int rank
[ZSKIPLIST_MAXLEVEL
]; 
5075     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5076         /* store rank that is crossed to reach the insert position */ 
5077         rank
[i
] = i 
== (zsl
->level
-1) ? 0 : rank
[i
+1]; 
5079         while (x
->forward
[i
] && 
5080             (x
->forward
[i
]->score 
< score 
|| 
5081                 (x
->forward
[i
]->score 
== score 
&& 
5082                 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) { 
5083             rank
[i
] += i 
> 0 ? x
->span
[i
-1] : 1; 
5088     /* we assume the key is not already inside, since we allow duplicated 
5089      * scores, and the re-insertion of score and redis object should never 
5090      * happpen since the caller of zslInsert() should test in the hash table 
5091      * if the element is already inside or not. */ 
5092     level 
= zslRandomLevel(); 
5093     if (level 
> zsl
->level
) { 
5094         for (i 
= zsl
->level
; i 
< level
; i
++) { 
5096             update
[i
] = zsl
->header
; 
5097             update
[i
]->span
[i
-1] = zsl
->length
; 
5101     x 
= zslCreateNode(level
,score
,obj
); 
5102     for (i 
= 0; i 
< level
; i
++) { 
5103         x
->forward
[i
] = update
[i
]->forward
[i
]; 
5104         update
[i
]->forward
[i
] = x
; 
5106         /* update span covered by update[i] as x is inserted here */ 
5108             x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]); 
5109             update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1; 
5113     /* increment span for untouched levels */ 
5114     for (i 
= level
; i 
< zsl
->level
; i
++) { 
5115         update
[i
]->span
[i
-1]++; 
5118     x
->backward 
= (update
[0] == zsl
->header
) ? NULL 
: update
[0]; 
5120         x
->forward
[0]->backward 
= x
; 
5126 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */ 
5127 void zslDeleteNode(zskiplist 
*zsl
, zskiplistNode 
*x
, zskiplistNode 
**update
) { 
5129     for (i 
= 0; i 
< zsl
->level
; i
++) { 
5130         if (update
[i
]->forward
[i
] == x
) { 
5132                 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1; 
5134             update
[i
]->forward
[i
] = x
->forward
[i
]; 
5136             /* invariant: i > 0, because update[0]->forward[0] 
5137              * is always equal to x */ 
5138             update
[i
]->span
[i
-1] -= 1; 
5141     if (x
->forward
[0]) { 
5142         x
->forward
[0]->backward 
= x
->backward
; 
5144         zsl
->tail 
= x
->backward
; 
5146     while(zsl
->level 
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
) 
5151 /* Delete an element with matching score/object from the skiplist. */ 
5152 static int zslDelete(zskiplist 
*zsl
, double score
, robj 
*obj
) { 
5153     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
5157     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5158         while (x
->forward
[i
] && 
5159             (x
->forward
[i
]->score 
< score 
|| 
5160                 (x
->forward
[i
]->score 
== score 
&& 
5161                 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) 
5165     /* We may have multiple elements with the same score, what we need 
5166      * is to find the element with both the right score and object. */ 
5168     if (x 
&& score 
== x
->score 
&& compareStringObjects(x
->obj
,obj
) == 0) { 
5169         zslDeleteNode(zsl
, x
, update
); 
5173         return 0; /* not found */ 
5175     return 0; /* not found */ 
5178 /* Delete all the elements with score between min and max from the skiplist. 
5179  * Min and mx are inclusive, so a score >= min || score <= max is deleted. 
5180  * Note that this function takes the reference to the hash table view of the 
5181  * sorted set, in order to remove the elements from the hash table too. */ 
5182 static unsigned long zslDeleteRangeByScore(zskiplist 
*zsl
, double min
, double max
, dict 
*dict
) { 
5183     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
5184     unsigned long removed 
= 0; 
5188     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5189         while (x
->forward
[i
] && x
->forward
[i
]->score 
< min
) 
5193     /* We may have multiple elements with the same score, what we need 
5194      * is to find the element with both the right score and object. */ 
5196     while (x 
&& x
->score 
<= max
) { 
5197         zskiplistNode 
*next 
= x
->forward
[0]; 
5198         zslDeleteNode(zsl
, x
, update
); 
5199         dictDelete(dict
,x
->obj
); 
5204     return removed
; /* not found */ 
5207 /* Delete all the elements with rank between start and end from the skiplist. 
5208  * Start and end are inclusive. Note that start and end need to be 1-based */ 
5209 static unsigned long zslDeleteRangeByRank(zskiplist 
*zsl
, unsigned int start
, unsigned int end
, dict 
*dict
) { 
5210     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
5211     unsigned long traversed 
= 0, removed 
= 0; 
5215     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5216         while (x
->forward
[i
] && (traversed 
+ (i 
> 0 ? x
->span
[i
-1] : 1)) < start
) { 
5217             traversed 
+= i 
> 0 ? x
->span
[i
-1] : 1; 
5225     while (x 
&& traversed 
<= end
) { 
5226         zskiplistNode 
*next 
= x
->forward
[0]; 
5227         zslDeleteNode(zsl
, x
, update
); 
5228         dictDelete(dict
,x
->obj
); 
5237 /* Find the first node having a score equal or greater than the specified one. 
5238  * Returns NULL if there is no match. */ 
5239 static zskiplistNode 
*zslFirstWithScore(zskiplist 
*zsl
, double score
) { 
5244     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5245         while (x
->forward
[i
] && x
->forward
[i
]->score 
< score
) 
5248     /* We may have multiple elements with the same score, what we need 
5249      * is to find the element with both the right score and object. */ 
5250     return x
->forward
[0]; 
5253 /* Find the rank for an element by both score and key. 
5254  * Returns 0 when the element cannot be found, rank otherwise. 
5255  * Note that the rank is 1-based due to the span of zsl->header to the 
5257 static unsigned long zslGetRank(zskiplist 
*zsl
, double score
, robj 
*o
) { 
5259     unsigned long rank 
= 0; 
5263     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5264         while (x
->forward
[i
] && 
5265             (x
->forward
[i
]->score 
< score 
|| 
5266                 (x
->forward
[i
]->score 
== score 
&& 
5267                 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) { 
5268             rank 
+= i 
> 0 ? x
->span
[i
-1] : 1; 
5272         /* x might be equal to zsl->header, so test if obj is non-NULL */ 
5273         if (x
->obj 
&& compareStringObjects(x
->obj
,o
) == 0) { 
5280 /* Finds an element by its rank. The rank argument needs to be 1-based. */ 
5281 zskiplistNode
* zslGetElementByRank(zskiplist 
*zsl
, unsigned long rank
) { 
5283     unsigned long traversed 
= 0; 
5287     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5288         while (x
->forward
[i
] && (traversed 
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
) 
5290             traversed 
+= i 
> 0 ? x
->span
[i
-1] : 1; 
5293         if (traversed 
== rank
) { 
5300 /* The actual Z-commands implementations */ 
5302 /* This generic command implements both ZADD and ZINCRBY. 
5303  * scoreval is the score if the operation is a ZADD (doincrement == 0) or 
5304  * the increment if the operation is a ZINCRBY (doincrement == 1). */ 
5305 static void zaddGenericCommand(redisClient 
*c
, robj 
*key
, robj 
*ele
, double scoreval
, int doincrement
) { 
5310     zsetobj 
= lookupKeyWrite(c
->db
,key
); 
5311     if (zsetobj 
== NULL
) { 
5312         zsetobj 
= createZsetObject(); 
5313         dictAdd(c
->db
->dict
,key
,zsetobj
); 
5316         if (zsetobj
->type 
!= REDIS_ZSET
) { 
5317             addReply(c
,shared
.wrongtypeerr
); 
5323     /* Ok now since we implement both ZADD and ZINCRBY here the code 
5324      * needs to handle the two different conditions. It's all about setting 
5325      * '*score', that is, the new score to set, to the right value. */ 
5326     score 
= zmalloc(sizeof(double)); 
5330         /* Read the old score. If the element was not present starts from 0 */ 
5331         de 
= dictFind(zs
->dict
,ele
); 
5333             double *oldscore 
= dictGetEntryVal(de
); 
5334             *score 
= *oldscore 
+ scoreval
; 
5342     /* What follows is a simple remove and re-insert operation that is common 
5343      * to both ZADD and ZINCRBY... */ 
5344     if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) { 
5345         /* case 1: New element */ 
5346         incrRefCount(ele
); /* added to hash */ 
5347         zslInsert(zs
->zsl
,*score
,ele
); 
5348         incrRefCount(ele
); /* added to skiplist */ 
5351             addReplyDouble(c
,*score
); 
5353             addReply(c
,shared
.cone
); 
5358         /* case 2: Score update operation */ 
5359         de 
= dictFind(zs
->dict
,ele
); 
5360         redisAssert(de 
!= NULL
); 
5361         oldscore 
= dictGetEntryVal(de
); 
5362         if (*score 
!= *oldscore
) { 
5365             /* Remove and insert the element in the skip list with new score */ 
5366             deleted 
= zslDelete(zs
->zsl
,*oldscore
,ele
); 
5367             redisAssert(deleted 
!= 0); 
5368             zslInsert(zs
->zsl
,*score
,ele
); 
5370             /* Update the score in the hash table */ 
5371             dictReplace(zs
->dict
,ele
,score
); 
5377             addReplyDouble(c
,*score
); 
5379             addReply(c
,shared
.czero
); 
5383 static void zaddCommand(redisClient 
*c
) { 
5386     scoreval 
= strtod(c
->argv
[2]->ptr
,NULL
); 
5387     zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0); 
5390 static void zincrbyCommand(redisClient 
*c
) { 
5393     scoreval 
= strtod(c
->argv
[2]->ptr
,NULL
); 
5394     zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1); 
5397 static void zremCommand(redisClient 
*c
) { 
5404     if ((zsetobj 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
5405         checkType(c
,zsetobj
,REDIS_ZSET
)) return; 
5408     de 
= dictFind(zs
->dict
,c
->argv
[2]); 
5410         addReply(c
,shared
.czero
); 
5413     /* Delete from the skiplist */ 
5414     oldscore 
= dictGetEntryVal(de
); 
5415     deleted 
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]); 
5416     redisAssert(deleted 
!= 0); 
5418     /* Delete from the hash table */ 
5419     dictDelete(zs
->dict
,c
->argv
[2]); 
5420     if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
); 
5421     if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]); 
5423     addReply(c
,shared
.cone
); 
5426 static void zremrangebyscoreCommand(redisClient 
*c
) { 
5427     double min 
= strtod(c
->argv
[2]->ptr
,NULL
); 
5428     double max 
= strtod(c
->argv
[3]->ptr
,NULL
); 
5433     if ((zsetobj 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
5434         checkType(c
,zsetobj
,REDIS_ZSET
)) return; 
5437     deleted 
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
); 
5438     if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
); 
5439     if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]); 
5440     server
.dirty 
+= deleted
; 
5441     addReplyLong(c
,deleted
); 
5444 static void zremrangebyrankCommand(redisClient 
*c
) { 
5445     int start 
= atoi(c
->argv
[2]->ptr
); 
5446     int end 
= atoi(c
->argv
[3]->ptr
); 
5452     if ((zsetobj 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
5453         checkType(c
,zsetobj
,REDIS_ZSET
)) return; 
5455     llen 
= zs
->zsl
->length
; 
5457     /* convert negative indexes */ 
5458     if (start 
< 0) start 
= llen
+start
; 
5459     if (end 
< 0) end 
= llen
+end
; 
5460     if (start 
< 0) start 
= 0; 
5461     if (end 
< 0) end 
= 0; 
5463     /* indexes sanity checks */ 
5464     if (start 
> end 
|| start 
>= llen
) { 
5465         addReply(c
,shared
.czero
); 
5468     if (end 
>= llen
) end 
= llen
-1; 
5470     /* increment start and end because zsl*Rank functions 
5471      * use 1-based rank */ 
5472     deleted 
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
); 
5473     if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
); 
5474     if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]); 
5475     server
.dirty 
+= deleted
; 
5476     addReplyLong(c
, deleted
); 
5484 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) { 
5485     zsetopsrc 
*d1 
= (void*) s1
, *d2 
= (void*) s2
; 
5486     unsigned long size1
, size2
; 
5487     size1 
= d1
->dict 
? dictSize(d1
->dict
) : 0; 
5488     size2 
= d2
->dict 
? dictSize(d2
->dict
) : 0; 
5489     return size1 
- size2
; 
5492 #define REDIS_AGGR_SUM 1 
5493 #define REDIS_AGGR_MIN 2 
5494 #define REDIS_AGGR_MAX 3 
5496 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) { 
5497     if (aggregate 
== REDIS_AGGR_SUM
) { 
5498         *target 
= *target 
+ val
; 
5499     } else if (aggregate 
== REDIS_AGGR_MIN
) { 
5500         *target 
= val 
< *target 
? val 
: *target
; 
5501     } else if (aggregate 
== REDIS_AGGR_MAX
) { 
5502         *target 
= val 
> *target 
? val 
: *target
; 
5505         redisAssert(0 != 0); 
5509 static void zunionInterGenericCommand(redisClient 
*c
, robj 
*dstkey
, int op
) { 
5511     int aggregate 
= REDIS_AGGR_SUM
; 
5518     /* expect zsetnum input keys to be given */ 
5519     zsetnum 
= atoi(c
->argv
[2]->ptr
); 
5521         addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNION/ZINTER\r\n")); 
5525     /* test if the expected number of keys would overflow */ 
5526     if (3+zsetnum 
> c
->argc
) { 
5527         addReply(c
,shared
.syntaxerr
); 
5531     /* read keys to be used for input */ 
5532     src 
= zmalloc(sizeof(zsetopsrc
) * zsetnum
); 
5533     for (i 
= 0, j 
= 3; i 
< zsetnum
; i
++, j
++) { 
5534         robj 
*zsetobj 
= lookupKeyWrite(c
->db
,c
->argv
[j
]); 
5538             if (zsetobj
->type 
!= REDIS_ZSET
) { 
5540                 addReply(c
,shared
.wrongtypeerr
); 
5543             src
[i
].dict 
= ((zset
*)zsetobj
->ptr
)->dict
; 
5546         /* default all weights to 1 */ 
5547         src
[i
].weight 
= 1.0; 
5550     /* parse optional extra arguments */ 
5552         int remaining 
= c
->argc 
- j
; 
5555             if (remaining 
>= (zsetnum 
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) { 
5557                 for (i 
= 0; i 
< zsetnum
; i
++, j
++, remaining
--) { 
5558                     src
[i
].weight 
= strtod(c
->argv
[j
]->ptr
, NULL
); 
5560             } else if (remaining 
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) { 
5562                 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) { 
5563                     aggregate 
= REDIS_AGGR_SUM
; 
5564                 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) { 
5565                     aggregate 
= REDIS_AGGR_MIN
; 
5566                 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) { 
5567                     aggregate 
= REDIS_AGGR_MAX
; 
5570                     addReply(c
,shared
.syntaxerr
); 
5576                 addReply(c
,shared
.syntaxerr
); 
5582     /* sort sets from the smallest to largest, this will improve our 
5583      * algorithm's performance */ 
5584     qsort(src
,zsetnum
,sizeof(zsetopsrc
), qsortCompareZsetopsrcByCardinality
); 
5586     dstobj 
= createZsetObject(); 
5587     dstzset 
= dstobj
->ptr
; 
5589     if (op 
== REDIS_OP_INTER
) { 
5590         /* skip going over all entries if the smallest zset is NULL or empty */ 
5591         if (src
[0].dict 
&& dictSize(src
[0].dict
) > 0) { 
5592             /* precondition: as src[0].dict is non-empty and the zsets are ordered 
5593              * from small to large, all src[i > 0].dict are non-empty too */ 
5594             di 
= dictGetIterator(src
[0].dict
); 
5595             while((de 
= dictNext(di
)) != NULL
) { 
5596                 double *score 
= zmalloc(sizeof(double)), value
; 
5597                 *score 
= src
[0].weight 
* (*(double*)dictGetEntryVal(de
)); 
5599                 for (j 
= 1; j 
< zsetnum
; j
++) { 
5600                     dictEntry 
*other 
= dictFind(src
[j
].dict
,dictGetEntryKey(de
)); 
5602                         value 
= src
[j
].weight 
* (*(double*)dictGetEntryVal(other
)); 
5603                         zunionInterAggregate(score
, value
, aggregate
); 
5609                 /* skip entry when not present in every source dict */ 
5613                     robj 
*o 
= dictGetEntryKey(de
); 
5614                     dictAdd(dstzset
->dict
,o
,score
); 
5615                     incrRefCount(o
); /* added to dictionary */ 
5616                     zslInsert(dstzset
->zsl
,*score
,o
); 
5617                     incrRefCount(o
); /* added to skiplist */ 
5620             dictReleaseIterator(di
); 
5622     } else if (op 
== REDIS_OP_UNION
) { 
5623         for (i 
= 0; i 
< zsetnum
; i
++) { 
5624             if (!src
[i
].dict
) continue; 
5626             di 
= dictGetIterator(src
[i
].dict
); 
5627             while((de 
= dictNext(di
)) != NULL
) { 
5628                 /* skip key when already processed */ 
5629                 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue; 
5631                 double *score 
= zmalloc(sizeof(double)), value
; 
5632                 *score 
= src
[i
].weight 
* (*(double*)dictGetEntryVal(de
)); 
5634                 /* because the zsets are sorted by size, its only possible 
5635                  * for sets at larger indices to hold this entry */ 
5636                 for (j 
= (i
+1); j 
< zsetnum
; j
++) { 
5637                     dictEntry 
*other 
= dictFind(src
[j
].dict
,dictGetEntryKey(de
)); 
5639                         value 
= src
[j
].weight 
* (*(double*)dictGetEntryVal(other
)); 
5640                         zunionInterAggregate(score
, value
, aggregate
); 
5644                 robj 
*o 
= dictGetEntryKey(de
); 
5645                 dictAdd(dstzset
->dict
,o
,score
); 
5646                 incrRefCount(o
); /* added to dictionary */ 
5647                 zslInsert(dstzset
->zsl
,*score
,o
); 
5648                 incrRefCount(o
); /* added to skiplist */ 
5650             dictReleaseIterator(di
); 
5653         /* unknown operator */ 
5654         redisAssert(op 
== REDIS_OP_INTER 
|| op 
== REDIS_OP_UNION
); 
5657     deleteKey(c
->db
,dstkey
); 
5658     if (dstzset
->zsl
->length
) { 
5659         dictAdd(c
->db
->dict
,dstkey
,dstobj
); 
5660         incrRefCount(dstkey
); 
5661         addReplyLong(c
, dstzset
->zsl
->length
); 
5664         decrRefCount(dstobj
); 
5665         addReply(c
, shared
.czero
); 
5670 static void zunionCommand(redisClient 
*c
) { 
5671     zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
); 
5674 static void zinterCommand(redisClient 
*c
) { 
5675     zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
); 
5678 static void zrangeGenericCommand(redisClient 
*c
, int reverse
) { 
5680     int start 
= atoi(c
->argv
[2]->ptr
); 
5681     int end 
= atoi(c
->argv
[3]->ptr
); 
5690     if (c
->argc 
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) { 
5692     } else if (c
->argc 
>= 5) { 
5693         addReply(c
,shared
.syntaxerr
); 
5697     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL 
|| 
5698         checkType(c
,o
,REDIS_ZSET
)) return; 
5703     /* convert negative indexes */ 
5704     if (start 
< 0) start 
= llen
+start
; 
5705     if (end 
< 0) end 
= llen
+end
; 
5706     if (start 
< 0) start 
= 0; 
5707     if (end 
< 0) end 
= 0; 
5709     /* indexes sanity checks */ 
5710     if (start 
> end 
|| start 
>= llen
) { 
5711         /* Out of range start or start > end result in empty list */ 
5712         addReply(c
,shared
.emptymultibulk
); 
5715     if (end 
>= llen
) end 
= llen
-1; 
5716     rangelen 
= (end
-start
)+1; 
5718     /* check if starting point is trivial, before searching 
5719      * the element in log(N) time */ 
5721         ln 
= start 
== 0 ? zsl
->tail 
: zslGetElementByRank(zsl
, llen
-start
); 
5724             zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1); 
5727     /* Return the result in form of a multi-bulk reply */ 
5728     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n", 
5729         withscores 
? (rangelen
*2) : rangelen
)); 
5730     for (j 
= 0; j 
< rangelen
; j
++) { 
5732         addReplyBulk(c
,ele
); 
5734             addReplyDouble(c
,ln
->score
); 
5735         ln 
= reverse 
? ln
->backward 
: ln
->forward
[0]; 
5739 static void zrangeCommand(redisClient 
*c
) { 
5740     zrangeGenericCommand(c
,0); 
5743 static void zrevrangeCommand(redisClient 
*c
) { 
5744     zrangeGenericCommand(c
,1); 
5747 /* This command implements both ZRANGEBYSCORE and ZCOUNT. 
5748  * If justcount is non-zero, just the count is returned. */ 
5749 static void genericZrangebyscoreCommand(redisClient 
*c
, int justcount
) { 
5752     int minex 
= 0, maxex 
= 0; /* are min or max exclusive? */ 
5753     int offset 
= 0, limit 
= -1; 
5757     /* Parse the min-max interval. If one of the values is prefixed 
5758      * by the "(" character, it's considered "open". For instance 
5759      * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max 
5760      * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */ 
5761     if (((char*)c
->argv
[2]->ptr
)[0] == '(') { 
5762         min 
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
); 
5765         min 
= strtod(c
->argv
[2]->ptr
,NULL
); 
5767     if (((char*)c
->argv
[3]->ptr
)[0] == '(') { 
5768         max 
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
); 
5771         max 
= strtod(c
->argv
[3]->ptr
,NULL
); 
5774     /* Parse "WITHSCORES": note that if the command was called with 
5775      * the name ZCOUNT then we are sure that c->argc == 4, so we'll never 
5776      * enter the following paths to parse WITHSCORES and LIMIT. */ 
5777     if (c
->argc 
== 5 || c
->argc 
== 8) { 
5778         if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0) 
5783     if (c
->argc 
!= (4 + withscores
) && c
->argc 
!= (7 + withscores
)) 
5787             sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n")); 
5792     if (c
->argc 
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) { 
5793         addReply(c
,shared
.syntaxerr
); 
5795     } else if (c
->argc 
== (7 + withscores
)) { 
5796         offset 
= atoi(c
->argv
[5]->ptr
); 
5797         limit 
= atoi(c
->argv
[6]->ptr
); 
5798         if (offset 
< 0) offset 
= 0; 
5801     /* Ok, lookup the key and get the range */ 
5802     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
5804         addReply(c
,justcount 
? shared
.czero 
: shared
.nullmultibulk
); 
5806         if (o
->type 
!= REDIS_ZSET
) { 
5807             addReply(c
,shared
.wrongtypeerr
); 
5809             zset 
*zsetobj 
= o
->ptr
; 
5810             zskiplist 
*zsl 
= zsetobj
->zsl
; 
5812             robj 
*ele
, *lenobj 
= NULL
; 
5813             unsigned long rangelen 
= 0; 
5815             /* Get the first node with the score >= min, or with 
5816              * score > min if 'minex' is true. */ 
5817             ln 
= zslFirstWithScore(zsl
,min
); 
5818             while (minex 
&& ln 
&& ln
->score 
== min
) ln 
= ln
->forward
[0]; 
5821                 /* No element matching the speciifed interval */ 
5822                 addReply(c
,justcount 
? shared
.czero 
: shared
.emptymultibulk
); 
5826             /* We don't know in advance how many matching elements there 
5827              * are in the list, so we push this object that will represent 
5828              * the multi-bulk length in the output buffer, and will "fix" 
5831                 lenobj 
= createObject(REDIS_STRING
,NULL
); 
5833                 decrRefCount(lenobj
); 
5836             while(ln 
&& (maxex 
? (ln
->score 
< max
) : (ln
->score 
<= max
))) { 
5839                     ln 
= ln
->forward
[0]; 
5842                 if (limit 
== 0) break; 
5845                     addReplyBulk(c
,ele
); 
5847                         addReplyDouble(c
,ln
->score
); 
5849                 ln 
= ln
->forward
[0]; 
5851                 if (limit 
> 0) limit
--; 
5854                 addReplyLong(c
,(long)rangelen
); 
5856                 lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n", 
5857                      withscores 
? (rangelen
*2) : rangelen
); 
5863 static void zrangebyscoreCommand(redisClient 
*c
) { 
5864     genericZrangebyscoreCommand(c
,0); 
5867 static void zcountCommand(redisClient 
*c
) { 
5868     genericZrangebyscoreCommand(c
,1); 
5871 static void zcardCommand(redisClient 
*c
) { 
5875     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
5876         checkType(c
,o
,REDIS_ZSET
)) return; 
5879     addReplyUlong(c
,zs
->zsl
->length
); 
5882 static void zscoreCommand(redisClient 
*c
) { 
5887     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
5888         checkType(c
,o
,REDIS_ZSET
)) return; 
5891     de 
= dictFind(zs
->dict
,c
->argv
[2]); 
5893         addReply(c
,shared
.nullbulk
); 
5895         double *score 
= dictGetEntryVal(de
); 
5897         addReplyDouble(c
,*score
); 
5901 static void zrankGenericCommand(redisClient 
*c
, int reverse
) { 
5909     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
5910         checkType(c
,o
,REDIS_ZSET
)) return; 
5914     de 
= dictFind(zs
->dict
,c
->argv
[2]); 
5916         addReply(c
,shared
.nullbulk
); 
5920     score 
= dictGetEntryVal(de
); 
5921     rank 
= zslGetRank(zsl
, *score
, c
->argv
[2]); 
5924             addReplyLong(c
, zsl
->length 
- rank
); 
5926             addReplyLong(c
, rank
-1); 
5929         addReply(c
,shared
.nullbulk
); 
5933 static void zrankCommand(redisClient 
*c
) { 
5934     zrankGenericCommand(c
, 0); 
5937 static void zrevrankCommand(redisClient 
*c
) { 
5938     zrankGenericCommand(c
, 1); 
5941 /* =================================== Hashes =============================== */ 
5942 static void hsetCommand(redisClient 
*c
) { 
5944     robj 
*o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
5947         o 
= createHashObject(); 
5948         dictAdd(c
->db
->dict
,c
->argv
[1],o
); 
5949         incrRefCount(c
->argv
[1]); 
5951         if (o
->type 
!= REDIS_HASH
) { 
5952             addReply(c
,shared
.wrongtypeerr
); 
5956     /* We want to convert the zipmap into an hash table right now if the 
5957      * entry to be added is too big. Note that we check if the object 
5958      * is integer encoded before to try fetching the length in the test below. 
5959      * This is because integers are small, but currently stringObjectLen() 
5960      * performs a slow conversion: not worth it. */ 
5961     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP 
&& 
5962         ((c
->argv
[2]->encoding 
== REDIS_ENCODING_RAW 
&& 
5963           sdslen(c
->argv
[2]->ptr
) > server
.hash_max_zipmap_value
) || 
5964          (c
->argv
[3]->encoding 
== REDIS_ENCODING_RAW 
&& 
5965           sdslen(c
->argv
[3]->ptr
) > server
.hash_max_zipmap_value
))) 
5967         convertToRealHash(o
); 
5970     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
5971         unsigned char *zm 
= o
->ptr
; 
5972         robj 
*valobj 
= getDecodedObject(c
->argv
[3]); 
5974         zm 
= zipmapSet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
), 
5975             valobj
->ptr
,sdslen(valobj
->ptr
),&update
); 
5976         decrRefCount(valobj
); 
5979         /* And here there is the second check for hash conversion. */ 
5980         if (zipmapLen(zm
) > server
.hash_max_zipmap_entries
) 
5981             convertToRealHash(o
); 
5983         tryObjectEncoding(c
->argv
[2]); 
5984         /* note that c->argv[3] is already encoded, as the latest arg 
5985          * of a bulk command is always integer encoded if possible. */ 
5986         if (dictReplace(o
->ptr
,c
->argv
[2],c
->argv
[3])) { 
5987             incrRefCount(c
->argv
[2]); 
5991         incrRefCount(c
->argv
[3]); 
5994     addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",update 
== 0)); 
5997 static void hincrbyCommand(redisClient 
*c
) { 
5998     long long value 
= 0, incr 
= 0; 
5999     robj 
*o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
6002         o 
= createHashObject(); 
6003         dictAdd(c
->db
->dict
,c
->argv
[1],o
); 
6004         incrRefCount(c
->argv
[1]); 
6006         if (o
->type 
!= REDIS_HASH
) { 
6007             addReply(c
,shared
.wrongtypeerr
); 
6012     incr 
= strtoll(c
->argv
[3]->ptr
, NULL
, 10); 
6013     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6014         unsigned char *zm 
= o
->ptr
; 
6015         unsigned char *zval
; 
6018         /* Find value if already present in hash */ 
6019         if (zipmapGet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
), 
6021             /* strtoll needs the char* to have a trailing \0, but 
6022              * the zipmap doesn't include them. */ 
6023             sds szval 
= sdsnewlen(zval
, zvlen
); 
6024             value 
= strtoll(szval
,NULL
,10); 
6029         sds svalue 
= sdscatprintf(sdsempty(),"%lld",value
); 
6030         zm 
= zipmapSet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
), 
6031             (unsigned char*)svalue
,sdslen(svalue
),NULL
); 
6035         /* Check if the zipmap needs to be converted. */ 
6036         if (zipmapLen(zm
) > server
.hash_max_zipmap_entries
) 
6037             convertToRealHash(o
); 
6042         /* Find value if already present in hash */ 
6043         de 
= dictFind(o
->ptr
,c
->argv
[2]); 
6045             hval 
= dictGetEntryVal(de
); 
6046             if (hval
->encoding 
== REDIS_ENCODING_RAW
) 
6047                 value 
= strtoll(hval
->ptr
,NULL
,10); 
6048             else if (hval
->encoding 
== REDIS_ENCODING_INT
) 
6049                 value 
= (long)hval
->ptr
; 
6051                 redisAssert(1 != 1); 
6055         hval 
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
)); 
6056         tryObjectEncoding(hval
); 
6057         if (dictReplace(o
->ptr
,c
->argv
[2],hval
)) { 
6058             incrRefCount(c
->argv
[2]); 
6063     addReplyLongLong(c
, value
); 
6066 static void hgetCommand(redisClient 
*c
) { 
6069     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
6070         checkType(c
,o
,REDIS_HASH
)) return; 
6072     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6073         unsigned char *zm 
= o
->ptr
; 
6078         field 
= getDecodedObject(c
->argv
[2]); 
6079         if (zipmapGet(zm
,field
->ptr
,sdslen(field
->ptr
), &val
,&vlen
)) { 
6080             addReplySds(c
,sdscatprintf(sdsempty(),"$%u\r\n", vlen
)); 
6081             addReplySds(c
,sdsnewlen(val
,vlen
)); 
6082             addReply(c
,shared
.crlf
); 
6083             decrRefCount(field
); 
6086             addReply(c
,shared
.nullbulk
); 
6087             decrRefCount(field
); 
6091         struct dictEntry 
*de
; 
6093         de 
= dictFind(o
->ptr
,c
->argv
[2]); 
6095             addReply(c
,shared
.nullbulk
); 
6097             robj 
*e 
= dictGetEntryVal(de
); 
6104 static void hdelCommand(redisClient 
*c
) { 
6108     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
6109         checkType(c
,o
,REDIS_HASH
)) return; 
6111     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6112         robj 
*field 
= getDecodedObject(c
->argv
[2]); 
6114         o
->ptr 
= zipmapDel((unsigned char*) o
->ptr
, 
6115             (unsigned char*) field
->ptr
, 
6116             sdslen(field
->ptr
), &deleted
); 
6117         decrRefCount(field
); 
6118         if (zipmapLen((unsigned char*) o
->ptr
) == 0) 
6119             deleteKey(c
->db
,c
->argv
[1]); 
6121         deleted 
= dictDelete((dict
*)o
->ptr
,c
->argv
[2]) == DICT_OK
; 
6122         if (htNeedsResize(o
->ptr
)) dictResize(o
->ptr
); 
6123         if (dictSize((dict
*)o
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]); 
6125     if (deleted
) server
.dirty
++; 
6126     addReply(c
,deleted 
? shared
.cone 
: shared
.czero
); 
6129 static void hlenCommand(redisClient 
*c
) { 
6133     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
6134         checkType(c
,o
,REDIS_HASH
)) return; 
6136     len 
= (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) ? 
6137             zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
); 
6138     addReplyUlong(c
,len
); 
6141 #define REDIS_GETALL_KEYS 1 
6142 #define REDIS_GETALL_VALS 2 
6143 static void genericHgetallCommand(redisClient 
*c
, int flags
) { 
6145     unsigned long count 
= 0; 
6147     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL
 
6148         || checkType(c
,o
,REDIS_HASH
)) return; 
6150     lenobj 
= createObject(REDIS_STRING
,NULL
); 
6152     decrRefCount(lenobj
); 
6154     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6155         unsigned char *p 
= zipmapRewind(o
->ptr
); 
6156         unsigned char *field
, *val
; 
6157         unsigned int flen
, vlen
; 
6159         while((p 
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) { 
6162             if (flags 
& REDIS_GETALL_KEYS
) { 
6163                 aux 
= createStringObject((char*)field
,flen
); 
6164                 addReplyBulk(c
,aux
); 
6168             if (flags 
& REDIS_GETALL_VALS
) { 
6169                 aux 
= createStringObject((char*)val
,vlen
); 
6170                 addReplyBulk(c
,aux
); 
6176         dictIterator 
*di 
= dictGetIterator(o
->ptr
); 
6179         while((de 
= dictNext(di
)) != NULL
) { 
6180             robj 
*fieldobj 
= dictGetEntryKey(de
); 
6181             robj 
*valobj 
= dictGetEntryVal(de
); 
6183             if (flags 
& REDIS_GETALL_KEYS
) { 
6184                 addReplyBulk(c
,fieldobj
); 
6187             if (flags 
& REDIS_GETALL_VALS
) { 
6188                 addReplyBulk(c
,valobj
); 
6192         dictReleaseIterator(di
); 
6194     lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n",count
); 
6197 static void hkeysCommand(redisClient 
*c
) { 
6198     genericHgetallCommand(c
,REDIS_GETALL_KEYS
); 
6201 static void hvalsCommand(redisClient 
*c
) { 
6202     genericHgetallCommand(c
,REDIS_GETALL_VALS
); 
6205 static void hgetallCommand(redisClient 
*c
) { 
6206     genericHgetallCommand(c
,REDIS_GETALL_KEYS
|REDIS_GETALL_VALS
); 
6209 static void hexistsCommand(redisClient 
*c
) { 
6213     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
6214         checkType(c
,o
,REDIS_HASH
)) return; 
6216     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6218         unsigned char *zm 
= o
->ptr
; 
6220         field 
= getDecodedObject(c
->argv
[2]); 
6221         exists 
= zipmapExists(zm
,field
->ptr
,sdslen(field
->ptr
)); 
6222         decrRefCount(field
); 
6224         exists 
= dictFind(o
->ptr
,c
->argv
[2]) != NULL
; 
6226     addReply(c
,exists 
? shared
.cone 
: shared
.czero
); 
6229 static void convertToRealHash(robj 
*o
) { 
6230     unsigned char *key
, *val
, *p
, *zm 
= o
->ptr
; 
6231     unsigned int klen
, vlen
; 
6232     dict 
*dict 
= dictCreate(&hashDictType
,NULL
); 
6234     assert(o
->type 
== REDIS_HASH 
&& o
->encoding 
!= REDIS_ENCODING_HT
); 
6235     p 
= zipmapRewind(zm
); 
6236     while((p 
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) { 
6237         robj 
*keyobj
, *valobj
; 
6239         keyobj 
= createStringObject((char*)key
,klen
); 
6240         valobj 
= createStringObject((char*)val
,vlen
); 
6241         tryObjectEncoding(keyobj
); 
6242         tryObjectEncoding(valobj
); 
6243         dictAdd(dict
,keyobj
,valobj
); 
6245     o
->encoding 
= REDIS_ENCODING_HT
; 
6250 /* ========================= Non type-specific commands  ==================== */ 
6252 static void flushdbCommand(redisClient 
*c
) { 
6253     server
.dirty 
+= dictSize(c
->db
->dict
); 
6254     dictEmpty(c
->db
->dict
); 
6255     dictEmpty(c
->db
->expires
); 
6256     addReply(c
,shared
.ok
); 
6259 static void flushallCommand(redisClient 
*c
) { 
6260     server
.dirty 
+= emptyDb(); 
6261     addReply(c
,shared
.ok
); 
6262     if (server
.bgsavechildpid 
!= -1) { 
6263         kill(server
.bgsavechildpid
,SIGKILL
); 
6264         rdbRemoveTempFile(server
.bgsavechildpid
); 
6266     rdbSave(server
.dbfilename
); 
6270 static redisSortOperation 
*createSortOperation(int type
, robj 
*pattern
) { 
6271     redisSortOperation 
*so 
= zmalloc(sizeof(*so
)); 
6273     so
->pattern 
= pattern
; 
6277 /* Return the value associated to the key with a name obtained 
6278  * substituting the first occurence of '*' in 'pattern' with 'subst' */ 
6279 static robj 
*lookupKeyByPattern(redisDb 
*db
, robj 
*pattern
, robj 
*subst
) { 
6283     int prefixlen
, sublen
, postfixlen
; 
6284     /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */ 
6288         char buf
[REDIS_SORTKEY_MAX
+1]; 
6291     /* If the pattern is "#" return the substitution object itself in order 
6292      * to implement the "SORT ... GET #" feature. */ 
6293     spat 
= pattern
->ptr
; 
6294     if (spat
[0] == '#' && spat
[1] == '\0') { 
6298     /* The substitution object may be specially encoded. If so we create 
6299      * a decoded object on the fly. Otherwise getDecodedObject will just 
6300      * increment the ref count, that we'll decrement later. */ 
6301     subst 
= getDecodedObject(subst
); 
6304     if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
; 
6305     p 
= strchr(spat
,'*'); 
6307         decrRefCount(subst
); 
6312     sublen 
= sdslen(ssub
); 
6313     postfixlen 
= sdslen(spat
)-(prefixlen
+1); 
6314     memcpy(keyname
.buf
,spat
,prefixlen
); 
6315     memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
); 
6316     memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
); 
6317     keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0'; 
6318     keyname
.len 
= prefixlen
+sublen
+postfixlen
; 
6320     initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2)) 
6321     decrRefCount(subst
); 
6323     /* printf("lookup '%s' => %p\n", keyname.buf,de); */ 
6324     return lookupKeyRead(db
,&keyobj
); 
6327 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with 
6328  * the additional parameter is not standard but a BSD-specific we have to 
6329  * pass sorting parameters via the global 'server' structure */ 
6330 static int sortCompare(const void *s1
, const void *s2
) { 
6331     const redisSortObject 
*so1 
= s1
, *so2 
= s2
; 
6334     if (!server
.sort_alpha
) { 
6335         /* Numeric sorting. Here it's trivial as we precomputed scores */ 
6336         if (so1
->u
.score 
> so2
->u
.score
) { 
6338         } else if (so1
->u
.score 
< so2
->u
.score
) { 
6344         /* Alphanumeric sorting */ 
6345         if (server
.sort_bypattern
) { 
6346             if (!so1
->u
.cmpobj 
|| !so2
->u
.cmpobj
) { 
6347                 /* At least one compare object is NULL */ 
6348                 if (so1
->u
.cmpobj 
== so2
->u
.cmpobj
) 
6350                 else if (so1
->u
.cmpobj 
== NULL
) 
6355                 /* We have both the objects, use strcoll */ 
6356                 cmp 
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
); 
6359             /* Compare elements directly */ 
6362             dec1 
= getDecodedObject(so1
->obj
); 
6363             dec2 
= getDecodedObject(so2
->obj
); 
6364             cmp 
= strcoll(dec1
->ptr
,dec2
->ptr
); 
6369     return server
.sort_desc 
? -cmp 
: cmp
; 
6372 /* The SORT command is the most complex command in Redis. Warning: this code 
6373  * is optimized for speed and a bit less for readability */ 
6374 static void sortCommand(redisClient 
*c
) { 
6377     int desc 
= 0, alpha 
= 0; 
6378     int limit_start 
= 0, limit_count 
= -1, start
, end
; 
6379     int j
, dontsort 
= 0, vectorlen
; 
6380     int getop 
= 0; /* GET operation counter */ 
6381     robj 
*sortval
, *sortby 
= NULL
, *storekey 
= NULL
; 
6382     redisSortObject 
*vector
; /* Resulting vector to sort */ 
6384     /* Lookup the key to sort. It must be of the right types */ 
6385     sortval 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
6386     if (sortval 
== NULL
) { 
6387         addReply(c
,shared
.nullmultibulk
); 
6390     if (sortval
->type 
!= REDIS_SET 
&& sortval
->type 
!= REDIS_LIST 
&& 
6391         sortval
->type 
!= REDIS_ZSET
) 
6393         addReply(c
,shared
.wrongtypeerr
); 
6397     /* Create a list of operations to perform for every sorted element. 
6398      * Operations can be GET/DEL/INCR/DECR */ 
6399     operations 
= listCreate(); 
6400     listSetFreeMethod(operations
,zfree
); 
6403     /* Now we need to protect sortval incrementing its count, in the future 
6404      * SORT may have options able to overwrite/delete keys during the sorting 
6405      * and the sorted key itself may get destroied */ 
6406     incrRefCount(sortval
); 
6408     /* The SORT command has an SQL-alike syntax, parse it */ 
6409     while(j 
< c
->argc
) { 
6410         int leftargs 
= c
->argc
-j
-1; 
6411         if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) { 
6413         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) { 
6415         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) { 
6417         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs 
>= 2) { 
6418             limit_start 
= atoi(c
->argv
[j
+1]->ptr
); 
6419             limit_count 
= atoi(c
->argv
[j
+2]->ptr
); 
6421         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs 
>= 1) { 
6422             storekey 
= c
->argv
[j
+1]; 
6424         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs 
>= 1) { 
6425             sortby 
= c
->argv
[j
+1]; 
6426             /* If the BY pattern does not contain '*', i.e. it is constant, 
6427              * we don't need to sort nor to lookup the weight keys. */ 
6428             if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort 
= 1; 
6430         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs 
>= 1) { 
6431             listAddNodeTail(operations
,createSortOperation( 
6432                 REDIS_SORT_GET
,c
->argv
[j
+1])); 
6436             decrRefCount(sortval
); 
6437             listRelease(operations
); 
6438             addReply(c
,shared
.syntaxerr
); 
6444     /* Load the sorting vector with all the objects to sort */ 
6445     switch(sortval
->type
) { 
6446     case REDIS_LIST
: vectorlen 
= listLength((list
*)sortval
->ptr
); break; 
6447     case REDIS_SET
: vectorlen 
=  dictSize((dict
*)sortval
->ptr
); break; 
6448     case REDIS_ZSET
: vectorlen 
= dictSize(((zset
*)sortval
->ptr
)->dict
); break; 
6449     default: vectorlen 
= 0; redisAssert(0); /* Avoid GCC warning */ 
6451     vector 
= zmalloc(sizeof(redisSortObject
)*vectorlen
); 
6454     if (sortval
->type 
== REDIS_LIST
) { 
6455         list 
*list 
= sortval
->ptr
; 
6459         listRewind(list
,&li
); 
6460         while((ln 
= listNext(&li
))) { 
6461             robj 
*ele 
= ln
->value
; 
6462             vector
[j
].obj 
= ele
; 
6463             vector
[j
].u
.score 
= 0; 
6464             vector
[j
].u
.cmpobj 
= NULL
; 
6472         if (sortval
->type 
== REDIS_SET
) { 
6475             zset 
*zs 
= sortval
->ptr
; 
6479         di 
= dictGetIterator(set
); 
6480         while((setele 
= dictNext(di
)) != NULL
) { 
6481             vector
[j
].obj 
= dictGetEntryKey(setele
); 
6482             vector
[j
].u
.score 
= 0; 
6483             vector
[j
].u
.cmpobj 
= NULL
; 
6486         dictReleaseIterator(di
); 
6488     redisAssert(j 
== vectorlen
); 
6490     /* Now it's time to load the right scores in the sorting vector */ 
6491     if (dontsort 
== 0) { 
6492         for (j 
= 0; j 
< vectorlen
; j
++) { 
6496                 byval 
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
); 
6497                 if (!byval 
|| byval
->type 
!= REDIS_STRING
) continue; 
6499                     vector
[j
].u
.cmpobj 
= getDecodedObject(byval
); 
6501                     if (byval
->encoding 
== REDIS_ENCODING_RAW
) { 
6502                         vector
[j
].u
.score 
= strtod(byval
->ptr
,NULL
); 
6504                         /* Don't need to decode the object if it's 
6505                          * integer-encoded (the only encoding supported) so 
6506                          * far. We can just cast it */ 
6507                         if (byval
->encoding 
== REDIS_ENCODING_INT
) { 
6508                             vector
[j
].u
.score 
= (long)byval
->ptr
; 
6510                             redisAssert(1 != 1); 
6515                     if (vector
[j
].obj
->encoding 
== REDIS_ENCODING_RAW
) 
6516                         vector
[j
].u
.score 
= strtod(vector
[j
].obj
->ptr
,NULL
); 
6518                         if (vector
[j
].obj
->encoding 
== REDIS_ENCODING_INT
) 
6519                             vector
[j
].u
.score 
= (long) vector
[j
].obj
->ptr
; 
6521                             redisAssert(1 != 1); 
6528     /* We are ready to sort the vector... perform a bit of sanity check 
6529      * on the LIMIT option too. We'll use a partial version of quicksort. */ 
6530     start 
= (limit_start 
< 0) ? 0 : limit_start
; 
6531     end 
= (limit_count 
< 0) ? vectorlen
-1 : start
+limit_count
-1; 
6532     if (start 
>= vectorlen
) { 
6533         start 
= vectorlen
-1; 
6536     if (end 
>= vectorlen
) end 
= vectorlen
-1; 
6538     if (dontsort 
== 0) { 
6539         server
.sort_desc 
= desc
; 
6540         server
.sort_alpha 
= alpha
; 
6541         server
.sort_bypattern 
= sortby 
? 1 : 0; 
6542         if (sortby 
&& (start 
!= 0 || end 
!= vectorlen
-1)) 
6543             pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
); 
6545             qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
); 
6548     /* Send command output to the output buffer, performing the specified 
6549      * GET/DEL/INCR/DECR operations if any. */ 
6550     outputlen 
= getop 
? getop
*(end
-start
+1) : end
-start
+1; 
6551     if (storekey 
== NULL
) { 
6552         /* STORE option not specified, sent the sorting result to client */ 
6553         addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
)); 
6554         for (j 
= start
; j 
<= end
; j
++) { 
6558             if (!getop
) addReplyBulk(c
,vector
[j
].obj
); 
6559             listRewind(operations
,&li
); 
6560             while((ln 
= listNext(&li
))) { 
6561                 redisSortOperation 
*sop 
= ln
->value
; 
6562                 robj 
*val 
= lookupKeyByPattern(c
->db
,sop
->pattern
, 
6565                 if (sop
->type 
== REDIS_SORT_GET
) { 
6566                     if (!val 
|| val
->type 
!= REDIS_STRING
) { 
6567                         addReply(c
,shared
.nullbulk
); 
6569                         addReplyBulk(c
,val
); 
6572                     redisAssert(sop
->type 
== REDIS_SORT_GET
); /* always fails */ 
6577         robj 
*listObject 
= createListObject(); 
6578         list 
*listPtr 
= (list
*) listObject
->ptr
; 
6580         /* STORE option specified, set the sorting result as a List object */ 
6581         for (j 
= start
; j 
<= end
; j
++) { 
6586                 listAddNodeTail(listPtr
,vector
[j
].obj
); 
6587                 incrRefCount(vector
[j
].obj
); 
6589             listRewind(operations
,&li
); 
6590             while((ln 
= listNext(&li
))) { 
6591                 redisSortOperation 
*sop 
= ln
->value
; 
6592                 robj 
*val 
= lookupKeyByPattern(c
->db
,sop
->pattern
, 
6595                 if (sop
->type 
== REDIS_SORT_GET
) { 
6596                     if (!val 
|| val
->type 
!= REDIS_STRING
) { 
6597                         listAddNodeTail(listPtr
,createStringObject("",0)); 
6599                         listAddNodeTail(listPtr
,val
); 
6603                     redisAssert(sop
->type 
== REDIS_SORT_GET
); /* always fails */ 
6607         if (dictReplace(c
->db
->dict
,storekey
,listObject
)) { 
6608             incrRefCount(storekey
); 
6610         /* Note: we add 1 because the DB is dirty anyway since even if the 
6611          * SORT result is empty a new key is set and maybe the old content 
6613         server
.dirty 
+= 1+outputlen
; 
6614         addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
)); 
6618     decrRefCount(sortval
); 
6619     listRelease(operations
); 
6620     for (j 
= 0; j 
< vectorlen
; j
++) { 
6621         if (sortby 
&& alpha 
&& vector
[j
].u
.cmpobj
) 
6622             decrRefCount(vector
[j
].u
.cmpobj
); 
6627 /* Convert an amount of bytes into a human readable string in the form 
6628  * of 100B, 2G, 100M, 4K, and so forth. */ 
6629 static void bytesToHuman(char *s
, unsigned long long n
) { 
6634         sprintf(s
,"%lluB",n
); 
6636     } else if (n 
< (1024*1024)) { 
6637         d 
= (double)n
/(1024); 
6638         sprintf(s
,"%.2fK",d
); 
6639     } else if (n 
< (1024LL*1024*1024)) { 
6640         d 
= (double)n
/(1024*1024); 
6641         sprintf(s
,"%.2fM",d
); 
6642     } else if (n 
< (1024LL*1024*1024*1024)) { 
6643         d 
= (double)n
/(1024LL*1024*1024); 
6644         sprintf(s
,"%.2fG",d
); 
6648 /* Create the string returned by the INFO command. This is decoupled 
6649  * by the INFO command itself as we need to report the same information 
6650  * on memory corruption problems. */ 
6651 static sds 
genRedisInfoString(void) { 
6653     time_t uptime 
= time(NULL
)-server
.stat_starttime
; 
6657     bytesToHuman(hmem
,zmalloc_used_memory()); 
6658     info 
= sdscatprintf(sdsempty(), 
6659         "redis_version:%s\r\n" 
6661         "multiplexing_api:%s\r\n" 
6662         "process_id:%ld\r\n" 
6663         "uptime_in_seconds:%ld\r\n" 
6664         "uptime_in_days:%ld\r\n" 
6665         "connected_clients:%d\r\n" 
6666         "connected_slaves:%d\r\n" 
6667         "blocked_clients:%d\r\n" 
6668         "used_memory:%zu\r\n" 
6669         "used_memory_human:%s\r\n" 
6670         "changes_since_last_save:%lld\r\n" 
6671         "bgsave_in_progress:%d\r\n" 
6672         "last_save_time:%ld\r\n" 
6673         "bgrewriteaof_in_progress:%d\r\n" 
6674         "total_connections_received:%lld\r\n" 
6675         "total_commands_processed:%lld\r\n" 
6676         "expired_keys:%lld\r\n" 
6677         "hash_max_zipmap_entries:%ld\r\n" 
6678         "hash_max_zipmap_value:%ld\r\n" 
6679         "pubsub_channels:%ld\r\n" 
6680         "pubsub_patterns:%u\r\n" 
6684         (sizeof(long) == 8) ? "64" : "32", 
6689         listLength(server
.clients
)-listLength(server
.slaves
), 
6690         listLength(server
.slaves
), 
6691         server
.blpop_blocked_clients
, 
6692         zmalloc_used_memory(), 
6695         server
.bgsavechildpid 
!= -1, 
6697         server
.bgrewritechildpid 
!= -1, 
6698         server
.stat_numconnections
, 
6699         server
.stat_numcommands
, 
6700         server
.stat_expiredkeys
, 
6701         server
.hash_max_zipmap_entries
, 
6702         server
.hash_max_zipmap_value
, 
6703         dictSize(server
.pubsub_channels
), 
6704         listLength(server
.pubsub_patterns
), 
6705         server
.vm_enabled 
!= 0, 
6706         server
.masterhost 
== NULL 
? "master" : "slave" 
6708     if (server
.masterhost
) { 
6709         info 
= sdscatprintf(info
, 
6710             "master_host:%s\r\n" 
6711             "master_port:%d\r\n" 
6712             "master_link_status:%s\r\n" 
6713             "master_last_io_seconds_ago:%d\r\n" 
6716             (server
.replstate 
== REDIS_REPL_CONNECTED
) ? 
6718             server
.master 
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1 
6721     if (server
.vm_enabled
) { 
6723         info 
= sdscatprintf(info
, 
6724             "vm_conf_max_memory:%llu\r\n" 
6725             "vm_conf_page_size:%llu\r\n" 
6726             "vm_conf_pages:%llu\r\n" 
6727             "vm_stats_used_pages:%llu\r\n" 
6728             "vm_stats_swapped_objects:%llu\r\n" 
6729             "vm_stats_swappin_count:%llu\r\n" 
6730             "vm_stats_swappout_count:%llu\r\n" 
6731             "vm_stats_io_newjobs_len:%lu\r\n" 
6732             "vm_stats_io_processing_len:%lu\r\n" 
6733             "vm_stats_io_processed_len:%lu\r\n" 
6734             "vm_stats_io_active_threads:%lu\r\n" 
6735             "vm_stats_blocked_clients:%lu\r\n" 
6736             ,(unsigned long long) server
.vm_max_memory
, 
6737             (unsigned long long) server
.vm_page_size
, 
6738             (unsigned long long) server
.vm_pages
, 
6739             (unsigned long long) server
.vm_stats_used_pages
, 
6740             (unsigned long long) server
.vm_stats_swapped_objects
, 
6741             (unsigned long long) server
.vm_stats_swapins
, 
6742             (unsigned long long) server
.vm_stats_swapouts
, 
6743             (unsigned long) listLength(server
.io_newjobs
), 
6744             (unsigned long) listLength(server
.io_processing
), 
6745             (unsigned long) listLength(server
.io_processed
), 
6746             (unsigned long) server
.io_active_threads
, 
6747             (unsigned long) server
.vm_blocked_clients
 
6751     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
6752         long long keys
, vkeys
; 
6754         keys 
= dictSize(server
.db
[j
].dict
); 
6755         vkeys 
= dictSize(server
.db
[j
].expires
); 
6756         if (keys 
|| vkeys
) { 
6757             info 
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n", 
6764 static void infoCommand(redisClient 
*c
) { 
6765     sds info 
= genRedisInfoString(); 
6766     addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n", 
6767         (unsigned long)sdslen(info
))); 
6768     addReplySds(c
,info
); 
6769     addReply(c
,shared
.crlf
); 
6772 static void monitorCommand(redisClient 
*c
) { 
6773     /* ignore MONITOR if aleady slave or in monitor mode */ 
6774     if (c
->flags 
& REDIS_SLAVE
) return; 
6776     c
->flags 
|= (REDIS_SLAVE
|REDIS_MONITOR
); 
6778     listAddNodeTail(server
.monitors
,c
); 
6779     addReply(c
,shared
.ok
); 
6782 /* ================================= Expire ================================= */ 
6783 static int removeExpire(redisDb 
*db
, robj 
*key
) { 
6784     if (dictDelete(db
->expires
,key
) == DICT_OK
) { 
6791 static int setExpire(redisDb 
*db
, robj 
*key
, time_t when
) { 
6792     if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) { 
6800 /* Return the expire time of the specified key, or -1 if no expire 
6801  * is associated with this key (i.e. the key is non volatile) */ 
6802 static time_t getExpire(redisDb 
*db
, robj 
*key
) { 
6805     /* No expire? return ASAP */ 
6806     if (dictSize(db
->expires
) == 0 || 
6807        (de 
= dictFind(db
->expires
,key
)) == NULL
) return -1; 
6809     return (time_t) dictGetEntryVal(de
); 
6812 static int expireIfNeeded(redisDb 
*db
, robj 
*key
) { 
6816     /* No expire? return ASAP */ 
6817     if (dictSize(db
->expires
) == 0 || 
6818        (de 
= dictFind(db
->expires
,key
)) == NULL
) return 0; 
6820     /* Lookup the expire */ 
6821     when 
= (time_t) dictGetEntryVal(de
); 
6822     if (time(NULL
) <= when
) return 0; 
6824     /* Delete the key */ 
6825     dictDelete(db
->expires
,key
); 
6826     server
.stat_expiredkeys
++; 
6827     return dictDelete(db
->dict
,key
) == DICT_OK
; 
6830 static int deleteIfVolatile(redisDb 
*db
, robj 
*key
) { 
6833     /* No expire? return ASAP */ 
6834     if (dictSize(db
->expires
) == 0 || 
6835        (de 
= dictFind(db
->expires
,key
)) == NULL
) return 0; 
6837     /* Delete the key */ 
6839     server
.stat_expiredkeys
++; 
6840     dictDelete(db
->expires
,key
); 
6841     return dictDelete(db
->dict
,key
) == DICT_OK
; 
6844 static void expireGenericCommand(redisClient 
*c
, robj 
*key
, time_t seconds
) { 
6847     de 
= dictFind(c
->db
->dict
,key
); 
6849         addReply(c
,shared
.czero
); 
6853         if (deleteKey(c
->db
,key
)) server
.dirty
++; 
6854         addReply(c
, shared
.cone
); 
6857         time_t when 
= time(NULL
)+seconds
; 
6858         if (setExpire(c
->db
,key
,when
)) { 
6859             addReply(c
,shared
.cone
); 
6862             addReply(c
,shared
.czero
); 
6868 static void expireCommand(redisClient 
*c
) { 
6869     expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10)); 
6872 static void expireatCommand(redisClient 
*c
) { 
6873     expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10)-time(NULL
)); 
6876 static void ttlCommand(redisClient 
*c
) { 
6880     expire 
= getExpire(c
->db
,c
->argv
[1]); 
6882         ttl 
= (int) (expire
-time(NULL
)); 
6883         if (ttl 
< 0) ttl 
= -1; 
6885     addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
)); 
6888 /* ================================ MULTI/EXEC ============================== */ 
6890 /* Client state initialization for MULTI/EXEC */ 
6891 static void initClientMultiState(redisClient 
*c
) { 
6892     c
->mstate
.commands 
= NULL
; 
6893     c
->mstate
.count 
= 0; 
6896 /* Release all the resources associated with MULTI/EXEC state */ 
6897 static void freeClientMultiState(redisClient 
*c
) { 
6900     for (j 
= 0; j 
< c
->mstate
.count
; j
++) { 
6902         multiCmd 
*mc 
= c
->mstate
.commands
+j
; 
6904         for (i 
= 0; i 
< mc
->argc
; i
++) 
6905             decrRefCount(mc
->argv
[i
]); 
6908     zfree(c
->mstate
.commands
); 
6911 /* Add a new command into the MULTI commands queue */ 
6912 static void queueMultiCommand(redisClient 
*c
, struct redisCommand 
*cmd
) { 
6916     c
->mstate
.commands 
= zrealloc(c
->mstate
.commands
, 
6917             sizeof(multiCmd
)*(c
->mstate
.count
+1)); 
6918     mc 
= c
->mstate
.commands
+c
->mstate
.count
; 
6921     mc
->argv 
= zmalloc(sizeof(robj
*)*c
->argc
); 
6922     memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
); 
6923     for (j 
= 0; j 
< c
->argc
; j
++) 
6924         incrRefCount(mc
->argv
[j
]); 
6928 static void multiCommand(redisClient 
*c
) { 
6929     c
->flags 
|= REDIS_MULTI
; 
6930     addReply(c
,shared
.ok
); 
6933 static void discardCommand(redisClient 
*c
) { 
6934     if (!(c
->flags 
& REDIS_MULTI
)) { 
6935         addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n")); 
6939     freeClientMultiState(c
); 
6940     initClientMultiState(c
); 
6941     c
->flags 
&= (~REDIS_MULTI
); 
6942     addReply(c
,shared
.ok
); 
6945 static void execCommand(redisClient 
*c
) { 
6950     if (!(c
->flags 
& REDIS_MULTI
)) { 
6951         addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n")); 
6955     orig_argv 
= c
->argv
; 
6956     orig_argc 
= c
->argc
; 
6957     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
)); 
6958     for (j 
= 0; j 
< c
->mstate
.count
; j
++) { 
6959         c
->argc 
= c
->mstate
.commands
[j
].argc
; 
6960         c
->argv 
= c
->mstate
.commands
[j
].argv
; 
6961         call(c
,c
->mstate
.commands
[j
].cmd
); 
6963     c
->argv 
= orig_argv
; 
6964     c
->argc 
= orig_argc
; 
6965     freeClientMultiState(c
); 
6966     initClientMultiState(c
); 
6967     c
->flags 
&= (~REDIS_MULTI
); 
6970 /* =========================== Blocking Operations  ========================= */ 
6972 /* Currently Redis blocking operations support is limited to list POP ops, 
6973  * so the current implementation is not fully generic, but it is also not 
6974  * completely specific so it will not require a rewrite to support new 
6975  * kind of blocking operations in the future. 
6977  * Still it's important to note that list blocking operations can be already 
6978  * used as a notification mechanism in order to implement other blocking 
6979  * operations at application level, so there must be a very strong evidence 
6980  * of usefulness and generality before new blocking operations are implemented. 
6982  * This is how the current blocking POP works, we use BLPOP as example: 
6983  * - If the user calls BLPOP and the key exists and contains a non empty list 
6984  *   then LPOP is called instead. So BLPOP is semantically the same as LPOP 
6985  *   if there is not to block. 
6986  * - If instead BLPOP is called and the key does not exists or the list is 
6987  *   empty we need to block. In order to do so we remove the notification for 
6988  *   new data to read in the client socket (so that we'll not serve new 
6989  *   requests if the blocking request is not served). Also we put the client 
6990  *   in a dictionary (db->blockingkeys) mapping keys to a list of clients 
6991  *   blocking for this keys. 
6992  * - If a PUSH operation against a key with blocked clients waiting is 
6993  *   performed, we serve the first in the list: basically instead to push 
6994  *   the new element inside the list we return it to the (first / oldest) 
6995  *   blocking client, unblock the client, and remove it form the list. 
6997  * The above comment and the source code should be enough in order to understand 
6998  * the implementation and modify / fix it later. 
7001 /* Set a client in blocking mode for the specified key, with the specified 
7003 static void blockForKeys(redisClient 
*c
, robj 
**keys
, int numkeys
, time_t timeout
) { 
7008     c
->blockingkeys 
= zmalloc(sizeof(robj
*)*numkeys
); 
7009     c
->blockingkeysnum 
= numkeys
; 
7010     c
->blockingto 
= timeout
; 
7011     for (j 
= 0; j 
< numkeys
; j
++) { 
7012         /* Add the key in the client structure, to map clients -> keys */ 
7013         c
->blockingkeys
[j
] = keys
[j
]; 
7014         incrRefCount(keys
[j
]); 
7016         /* And in the other "side", to map keys -> clients */ 
7017         de 
= dictFind(c
->db
->blockingkeys
,keys
[j
]); 
7021             /* For every key we take a list of clients blocked for it */ 
7023             retval 
= dictAdd(c
->db
->blockingkeys
,keys
[j
],l
); 
7024             incrRefCount(keys
[j
]); 
7025             assert(retval 
== DICT_OK
); 
7027             l 
= dictGetEntryVal(de
); 
7029         listAddNodeTail(l
,c
); 
7031     /* Mark the client as a blocked client */ 
7032     c
->flags 
|= REDIS_BLOCKED
; 
7033     server
.blpop_blocked_clients
++; 
7036 /* Unblock a client that's waiting in a blocking operation such as BLPOP */ 
7037 static void unblockClientWaitingData(redisClient 
*c
) { 
7042     assert(c
->blockingkeys 
!= NULL
); 
7043     /* The client may wait for multiple keys, so unblock it for every key. */ 
7044     for (j 
= 0; j 
< c
->blockingkeysnum
; j
++) { 
7045         /* Remove this client from the list of clients waiting for this key. */ 
7046         de 
= dictFind(c
->db
->blockingkeys
,c
->blockingkeys
[j
]); 
7048         l 
= dictGetEntryVal(de
); 
7049         listDelNode(l
,listSearchKey(l
,c
)); 
7050         /* If the list is empty we need to remove it to avoid wasting memory */ 
7051         if (listLength(l
) == 0) 
7052             dictDelete(c
->db
->blockingkeys
,c
->blockingkeys
[j
]); 
7053         decrRefCount(c
->blockingkeys
[j
]); 
7055     /* Cleanup the client structure */ 
7056     zfree(c
->blockingkeys
); 
7057     c
->blockingkeys 
= NULL
; 
7058     c
->flags 
&= (~REDIS_BLOCKED
); 
7059     server
.blpop_blocked_clients
--; 
7060     /* We want to process data if there is some command waiting 
7061      * in the input buffer. Note that this is safe even if 
7062      * unblockClientWaitingData() gets called from freeClient() because 
7063      * freeClient() will be smart enough to call this function 
7064      * *after* c->querybuf was set to NULL. */ 
7065     if (c
->querybuf 
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
); 
7068 /* This should be called from any function PUSHing into lists. 
7069  * 'c' is the "pushing client", 'key' is the key it is pushing data against, 
7070  * 'ele' is the element pushed. 
7072  * If the function returns 0 there was no client waiting for a list push 
7075  * If the function returns 1 there was a client waiting for a list push 
7076  * against this key, the element was passed to this client thus it's not 
7077  * needed to actually add it to the list and the caller should return asap. */ 
7078 static int handleClientsWaitingListPush(redisClient 
*c
, robj 
*key
, robj 
*ele
) { 
7079     struct dictEntry 
*de
; 
7080     redisClient 
*receiver
; 
7084     de 
= dictFind(c
->db
->blockingkeys
,key
); 
7085     if (de 
== NULL
) return 0; 
7086     l 
= dictGetEntryVal(de
); 
7089     receiver 
= ln
->value
; 
7091     addReplySds(receiver
,sdsnew("*2\r\n")); 
7092     addReplyBulk(receiver
,key
); 
7093     addReplyBulk(receiver
,ele
); 
7094     unblockClientWaitingData(receiver
); 
7098 /* Blocking RPOP/LPOP */ 
7099 static void blockingPopGenericCommand(redisClient 
*c
, int where
) { 
7104     for (j 
= 1; j 
< c
->argc
-1; j
++) { 
7105         o 
= lookupKeyWrite(c
->db
,c
->argv
[j
]); 
7107             if (o
->type 
!= REDIS_LIST
) { 
7108                 addReply(c
,shared
.wrongtypeerr
); 
7111                 list 
*list 
= o
->ptr
; 
7112                 if (listLength(list
) != 0) { 
7113                     /* If the list contains elements fall back to the usual 
7114                      * non-blocking POP operation */ 
7115                     robj 
*argv
[2], **orig_argv
; 
7118                     /* We need to alter the command arguments before to call 
7119                      * popGenericCommand() as the command takes a single key. */ 
7120                     orig_argv 
= c
->argv
; 
7121                     orig_argc 
= c
->argc
; 
7122                     argv
[1] = c
->argv
[j
]; 
7126                     /* Also the return value is different, we need to output 
7127                      * the multi bulk reply header and the key name. The 
7128                      * "real" command will add the last element (the value) 
7129                      * for us. If this souds like an hack to you it's just 
7130                      * because it is... */ 
7131                     addReplySds(c
,sdsnew("*2\r\n")); 
7132                     addReplyBulk(c
,argv
[1]); 
7133                     popGenericCommand(c
,where
); 
7135                     /* Fix the client structure with the original stuff */ 
7136                     c
->argv 
= orig_argv
; 
7137                     c
->argc 
= orig_argc
; 
7143     /* If the list is empty or the key does not exists we must block */ 
7144     timeout 
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10); 
7145     if (timeout 
> 0) timeout 
+= time(NULL
); 
7146     blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
); 
7149 static void blpopCommand(redisClient 
*c
) { 
7150     blockingPopGenericCommand(c
,REDIS_HEAD
); 
7153 static void brpopCommand(redisClient 
*c
) { 
7154     blockingPopGenericCommand(c
,REDIS_TAIL
); 
7157 /* =============================== Replication  ============================= */ 
7159 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) { 
7160     ssize_t nwritten
, ret 
= size
; 
7161     time_t start 
= time(NULL
); 
7165         if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) { 
7166             nwritten 
= write(fd
,ptr
,size
); 
7167             if (nwritten 
== -1) return -1; 
7171         if ((time(NULL
)-start
) > timeout
) { 
7179 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) { 
7180     ssize_t nread
, totread 
= 0; 
7181     time_t start 
= time(NULL
); 
7185         if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) { 
7186             nread 
= read(fd
,ptr
,size
); 
7187             if (nread 
== -1) return -1; 
7192         if ((time(NULL
)-start
) > timeout
) { 
7200 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) { 
7207         if (syncRead(fd
,&c
,1,timeout
) == -1) return -1; 
7210             if (nread 
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0'; 
7221 static void syncCommand(redisClient 
*c
) { 
7222     /* ignore SYNC if aleady slave or in monitor mode */ 
7223     if (c
->flags 
& REDIS_SLAVE
) return; 
7225     /* SYNC can't be issued when the server has pending data to send to 
7226      * the client about already issued commands. We need a fresh reply 
7227      * buffer registering the differences between the BGSAVE and the current 
7228      * dataset, so that we can copy to other slaves if needed. */ 
7229     if (listLength(c
->reply
) != 0) { 
7230         addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n")); 
7234     redisLog(REDIS_NOTICE
,"Slave ask for synchronization"); 
7235     /* Here we need to check if there is a background saving operation 
7236      * in progress, or if it is required to start one */ 
7237     if (server
.bgsavechildpid 
!= -1) { 
7238         /* Ok a background save is in progress. Let's check if it is a good 
7239          * one for replication, i.e. if there is another slave that is 
7240          * registering differences since the server forked to save */ 
7245         listRewind(server
.slaves
,&li
); 
7246         while((ln 
= listNext(&li
))) { 
7248             if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_END
) break; 
7251             /* Perfect, the server is already registering differences for 
7252              * another slave. Set the right state, and copy the buffer. */ 
7253             listRelease(c
->reply
); 
7254             c
->reply 
= listDup(slave
->reply
); 
7255             c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_END
; 
7256             redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC"); 
7258             /* No way, we need to wait for the next BGSAVE in order to 
7259              * register differences */ 
7260             c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_START
; 
7261             redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC"); 
7264         /* Ok we don't have a BGSAVE in progress, let's start one */ 
7265         redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC"); 
7266         if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) { 
7267             redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE"); 
7268             addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n")); 
7271         c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_END
; 
7274     c
->flags 
|= REDIS_SLAVE
; 
7276     listAddNodeTail(server
.slaves
,c
); 
7280 static void sendBulkToSlave(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
7281     redisClient 
*slave 
= privdata
; 
7283     REDIS_NOTUSED(mask
); 
7284     char buf
[REDIS_IOBUF_LEN
]; 
7285     ssize_t nwritten
, buflen
; 
7287     if (slave
->repldboff 
== 0) { 
7288         /* Write the bulk write count before to transfer the DB. In theory here 
7289          * we don't know how much room there is in the output buffer of the 
7290          * socket, but in pratice SO_SNDLOWAT (the minimum count for output 
7291          * operations) will never be smaller than the few bytes we need. */ 
7294         bulkcount 
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long) 
7296         if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
)) 
7304     lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
); 
7305     buflen 
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
); 
7307         redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s", 
7308             (buflen 
== 0) ? "premature EOF" : strerror(errno
)); 
7312     if ((nwritten 
= write(fd
,buf
,buflen
)) == -1) { 
7313         redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s", 
7318     slave
->repldboff 
+= nwritten
; 
7319     if (slave
->repldboff 
== slave
->repldbsize
) { 
7320         close(slave
->repldbfd
); 
7321         slave
->repldbfd 
= -1; 
7322         aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
); 
7323         slave
->replstate 
= REDIS_REPL_ONLINE
; 
7324         if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, 
7325             sendReplyToClient
, slave
) == AE_ERR
) { 
7329         addReplySds(slave
,sdsempty()); 
7330         redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded"); 
7334 /* This function is called at the end of every backgrond saving. 
7335  * The argument bgsaveerr is REDIS_OK if the background saving succeeded 
7336  * otherwise REDIS_ERR is passed to the function. 
7338  * The goal of this function is to handle slaves waiting for a successful 
7339  * background saving in order to perform non-blocking synchronization. */ 
7340 static void updateSlavesWaitingBgsave(int bgsaveerr
) { 
7342     int startbgsave 
= 0; 
7345     listRewind(server
.slaves
,&li
); 
7346     while((ln 
= listNext(&li
))) { 
7347         redisClient 
*slave 
= ln
->value
; 
7349         if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_START
) { 
7351             slave
->replstate 
= REDIS_REPL_WAIT_BGSAVE_END
; 
7352         } else if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_END
) { 
7353             struct redis_stat buf
; 
7355             if (bgsaveerr 
!= REDIS_OK
) { 
7357                 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error"); 
7360             if ((slave
->repldbfd 
= open(server
.dbfilename
,O_RDONLY
)) == -1 || 
7361                 redis_fstat(slave
->repldbfd
,&buf
) == -1) { 
7363                 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
)); 
7366             slave
->repldboff 
= 0; 
7367             slave
->repldbsize 
= buf
.st_size
; 
7368             slave
->replstate 
= REDIS_REPL_SEND_BULK
; 
7369             aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
); 
7370             if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) { 
7377         if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) { 
7380             listRewind(server
.slaves
,&li
); 
7381             redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed"); 
7382             while((ln 
= listNext(&li
))) { 
7383                 redisClient 
*slave 
= ln
->value
; 
7385                 if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_START
) 
7392 static int syncWithMaster(void) { 
7393     char buf
[1024], tmpfile
[256], authcmd
[1024]; 
7395     int fd 
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
); 
7396     int dfd
, maxtries 
= 5; 
7399         redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s", 
7404     /* AUTH with the master if required. */ 
7405     if(server
.masterauth
) { 
7406         snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
); 
7407         if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) { 
7409             redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s", 
7413         /* Read the AUTH result.  */ 
7414         if (syncReadLine(fd
,buf
,1024,3600) == -1) { 
7416             redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s", 
7420         if (buf
[0] != '+') { 
7422             redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?"); 
7427     /* Issue the SYNC command */ 
7428     if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) { 
7430         redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s", 
7434     /* Read the bulk write count */ 
7435     if (syncReadLine(fd
,buf
,1024,3600) == -1) { 
7437         redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s", 
7441     if (buf
[0] != '$') { 
7443         redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?"); 
7446     dumpsize 
= strtol(buf
+1,NULL
,10); 
7447     redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
); 
7448     /* Read the bulk write data on a temp file */ 
7450         snprintf(tmpfile
,256, 
7451             "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid()); 
7452         dfd 
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644); 
7453         if (dfd 
!= -1) break; 
7458         redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
)); 
7462         int nread
, nwritten
; 
7464         nread 
= read(fd
,buf
,(dumpsize 
< 1024)?dumpsize
:1024); 
7466             redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s", 
7472         nwritten 
= write(dfd
,buf
,nread
); 
7473         if (nwritten 
== -1) { 
7474             redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
)); 
7482     if (rename(tmpfile
,server
.dbfilename
) == -1) { 
7483         redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
)); 
7489     if (rdbLoad(server
.dbfilename
) != REDIS_OK
) { 
7490         redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk"); 
7494     server
.master 
= createClient(fd
); 
7495     server
.master
->flags 
|= REDIS_MASTER
; 
7496     server
.master
->authenticated 
= 1; 
7497     server
.replstate 
= REDIS_REPL_CONNECTED
; 
7501 static void slaveofCommand(redisClient 
*c
) { 
7502     if (!strcasecmp(c
->argv
[1]->ptr
,"no") && 
7503         !strcasecmp(c
->argv
[2]->ptr
,"one")) { 
7504         if (server
.masterhost
) { 
7505             sdsfree(server
.masterhost
); 
7506             server
.masterhost 
= NULL
; 
7507             if (server
.master
) freeClient(server
.master
); 
7508             server
.replstate 
= REDIS_REPL_NONE
; 
7509             redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)"); 
7512         sdsfree(server
.masterhost
); 
7513         server
.masterhost 
= sdsdup(c
->argv
[1]->ptr
); 
7514         server
.masterport 
= atoi(c
->argv
[2]->ptr
); 
7515         if (server
.master
) freeClient(server
.master
); 
7516         server
.replstate 
= REDIS_REPL_CONNECT
; 
7517         redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)", 
7518             server
.masterhost
, server
.masterport
); 
7520     addReply(c
,shared
.ok
); 
7523 /* ============================ Maxmemory directive  ======================== */ 
7525 /* Try to free one object form the pre-allocated objects free list. 
7526  * This is useful under low mem conditions as by default we take 1 million 
7527  * free objects allocated. On success REDIS_OK is returned, otherwise 
7529 static int tryFreeOneObjectFromFreelist(void) { 
7532     if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
); 
7533     if (listLength(server
.objfreelist
)) { 
7534         listNode 
*head 
= listFirst(server
.objfreelist
); 
7535         o 
= listNodeValue(head
); 
7536         listDelNode(server
.objfreelist
,head
); 
7537         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
7541         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
7546 /* This function gets called when 'maxmemory' is set on the config file to limit 
7547  * the max memory used by the server, and we are out of memory. 
7548  * This function will try to, in order: 
7550  * - Free objects from the free list 
7551  * - Try to remove keys with an EXPIRE set 
7553  * It is not possible to free enough memory to reach used-memory < maxmemory 
7554  * the server will start refusing commands that will enlarge even more the 
7557 static void freeMemoryIfNeeded(void) { 
7558     while (server
.maxmemory 
&& zmalloc_used_memory() > server
.maxmemory
) { 
7559         int j
, k
, freed 
= 0; 
7561         if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue; 
7562         for (j 
= 0; j 
< server
.dbnum
; j
++) { 
7564             robj 
*minkey 
= NULL
; 
7565             struct dictEntry 
*de
; 
7567             if (dictSize(server
.db
[j
].expires
)) { 
7569                 /* From a sample of three keys drop the one nearest to 
7570                  * the natural expire */ 
7571                 for (k 
= 0; k 
< 3; k
++) { 
7574                     de 
= dictGetRandomKey(server
.db
[j
].expires
); 
7575                     t 
= (time_t) dictGetEntryVal(de
); 
7576                     if (minttl 
== -1 || t 
< minttl
) { 
7577                         minkey 
= dictGetEntryKey(de
); 
7581                 deleteKey(server
.db
+j
,minkey
); 
7584         if (!freed
) return; /* nothing to free... */ 
7588 /* ============================== Append Only file ========================== */ 
7590 static void feedAppendOnlyFile(struct redisCommand 
*cmd
, int dictid
, robj 
**argv
, int argc
) { 
7591     sds buf 
= sdsempty(); 
7597     /* The DB this command was targetting is not the same as the last command 
7598      * we appendend. To issue a SELECT command is needed. */ 
7599     if (dictid 
!= server
.appendseldb
) { 
7602         snprintf(seldb
,sizeof(seldb
),"%d",dictid
); 
7603         buf 
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n", 
7604             (unsigned long)strlen(seldb
),seldb
); 
7605         server
.appendseldb 
= dictid
; 
7608     /* "Fix" the argv vector if the command is EXPIRE. We want to translate 
7609      * EXPIREs into EXPIREATs calls */ 
7610     if (cmd
->proc 
== expireCommand
) { 
7613         tmpargv
[0] = createStringObject("EXPIREAT",8); 
7614         tmpargv
[1] = argv
[1]; 
7615         incrRefCount(argv
[1]); 
7616         when 
= time(NULL
)+strtol(argv
[2]->ptr
,NULL
,10); 
7617         tmpargv
[2] = createObject(REDIS_STRING
, 
7618             sdscatprintf(sdsempty(),"%ld",when
)); 
7622     /* Append the actual command */ 
7623     buf 
= sdscatprintf(buf
,"*%d\r\n",argc
); 
7624     for (j 
= 0; j 
< argc
; j
++) { 
7627         o 
= getDecodedObject(o
); 
7628         buf 
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
)); 
7629         buf 
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
)); 
7630         buf 
= sdscatlen(buf
,"\r\n",2); 
7634     /* Free the objects from the modified argv for EXPIREAT */ 
7635     if (cmd
->proc 
== expireCommand
) { 
7636         for (j 
= 0; j 
< 3; j
++) 
7637             decrRefCount(argv
[j
]); 
7640     /* We want to perform a single write. This should be guaranteed atomic 
7641      * at least if the filesystem we are writing is a real physical one. 
7642      * While this will save us against the server being killed I don't think 
7643      * there is much to do about the whole server stopping for power problems 
7645      nwritten 
= write(server
.appendfd
,buf
,sdslen(buf
)); 
7646      if (nwritten 
!= (signed)sdslen(buf
)) { 
7647         /* Ooops, we are in troubles. The best thing to do for now is 
7648          * to simply exit instead to give the illusion that everything is 
7649          * working as expected. */ 
7650          if (nwritten 
== -1) { 
7651             redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
)); 
7653             redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
)); 
7657     /* If a background append only file rewriting is in progress we want to 
7658      * accumulate the differences between the child DB and the current one 
7659      * in a buffer, so that when the child process will do its work we 
7660      * can append the differences to the new append only file. */ 
7661     if (server
.bgrewritechildpid 
!= -1) 
7662         server
.bgrewritebuf 
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
)); 
7666     if (server
.appendfsync 
== APPENDFSYNC_ALWAYS 
|| 
7667         (server
.appendfsync 
== APPENDFSYNC_EVERYSEC 
&& 
7668          now
-server
.lastfsync 
> 1)) 
7670         fsync(server
.appendfd
); /* Let's try to get this data on the disk */ 
7671         server
.lastfsync 
= now
; 
7675 /* In Redis commands are always executed in the context of a client, so in 
7676  * order to load the append only file we need to create a fake client. */ 
7677 static struct redisClient 
*createFakeClient(void) { 
7678     struct redisClient 
*c 
= zmalloc(sizeof(*c
)); 
7682     c
->querybuf 
= sdsempty(); 
7686     /* We set the fake client as a slave waiting for the synchronization 
7687      * so that Redis will not try to send replies to this client. */ 
7688     c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_START
; 
7689     c
->reply 
= listCreate(); 
7690     listSetFreeMethod(c
->reply
,decrRefCount
); 
7691     listSetDupMethod(c
->reply
,dupClientReplyValue
); 
7695 static void freeFakeClient(struct redisClient 
*c
) { 
7696     sdsfree(c
->querybuf
); 
7697     listRelease(c
->reply
); 
7701 /* Replay the append log file. On error REDIS_OK is returned. On non fatal 
7702  * error (the append only file is zero-length) REDIS_ERR is returned. On 
7703  * fatal error an error message is logged and the program exists. */ 
7704 int loadAppendOnlyFile(char *filename
) { 
7705     struct redisClient 
*fakeClient
; 
7706     FILE *fp 
= fopen(filename
,"r"); 
7707     struct redis_stat sb
; 
7708     unsigned long long loadedkeys 
= 0; 
7710     if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size 
== 0) 
7714         redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
)); 
7718     fakeClient 
= createFakeClient(); 
7725         struct redisCommand 
*cmd
; 
7727         if (fgets(buf
,sizeof(buf
),fp
) == NULL
) { 
7733         if (buf
[0] != '*') goto fmterr
; 
7735         argv 
= zmalloc(sizeof(robj
*)*argc
); 
7736         for (j 
= 0; j 
< argc
; j
++) { 
7737             if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
; 
7738             if (buf
[0] != '$') goto fmterr
; 
7739             len 
= strtol(buf
+1,NULL
,10); 
7740             argsds 
= sdsnewlen(NULL
,len
); 
7741             if (len 
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
; 
7742             argv
[j
] = createObject(REDIS_STRING
,argsds
); 
7743             if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */ 
7746         /* Command lookup */ 
7747         cmd 
= lookupCommand(argv
[0]->ptr
); 
7749             redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
); 
7752         /* Try object encoding */ 
7753         if (cmd
->flags 
& REDIS_CMD_BULK
) 
7754             tryObjectEncoding(argv
[argc
-1]); 
7755         /* Run the command in the context of a fake client */ 
7756         fakeClient
->argc 
= argc
; 
7757         fakeClient
->argv 
= argv
; 
7758         cmd
->proc(fakeClient
); 
7759         /* Discard the reply objects list from the fake client */ 
7760         while(listLength(fakeClient
->reply
)) 
7761             listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
)); 
7762         /* Clean up, ready for the next command */ 
7763         for (j 
= 0; j 
< argc
; j
++) decrRefCount(argv
[j
]); 
7765         /* Handle swapping while loading big datasets when VM is on */ 
7767         if (server
.vm_enabled 
&& (loadedkeys 
% 5000) == 0) { 
7768             while (zmalloc_used_memory() > server
.vm_max_memory
) { 
7769                 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break; 
7774     freeFakeClient(fakeClient
); 
7779         redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file"); 
7781         redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
)); 
7785     redisLog(REDIS_WARNING
,"Bad file format reading the append only file"); 
7789 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */ 
7790 static int fwriteBulkObject(FILE *fp
, robj 
*obj
) { 
7794     /* Avoid the incr/decr ref count business if possible to help 
7795      * copy-on-write (we are often in a child process when this function 
7797      * Also makes sure that key objects don't get incrRefCount-ed when VM 
7799     if (obj
->encoding 
!= REDIS_ENCODING_RAW
) { 
7800         obj 
= getDecodedObject(obj
); 
7803     snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
)); 
7804     if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
; 
7805     if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0) 
7807     if (fwrite("\r\n",2,1,fp
) == 0) goto err
; 
7808     if (decrrc
) decrRefCount(obj
); 
7811     if (decrrc
) decrRefCount(obj
); 
7815 /* Write binary-safe string into a file in the bulkformat 
7816  * $<count>\r\n<payload>\r\n */ 
7817 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) { 
7820     snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
); 
7821     if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0; 
7822     if (len 
&& fwrite(s
,len
,1,fp
) == 0) return 0; 
7823     if (fwrite("\r\n",2,1,fp
) == 0) return 0; 
7827 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */ 
7828 static int fwriteBulkDouble(FILE *fp
, double d
) { 
7829     char buf
[128], dbuf
[128]; 
7831     snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
); 
7832     snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2); 
7833     if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0; 
7834     if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0; 
7838 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */ 
7839 static int fwriteBulkLong(FILE *fp
, long l
) { 
7840     char buf
[128], lbuf
[128]; 
7842     snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
); 
7843     snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2); 
7844     if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0; 
7845     if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0; 
7849 /* Write a sequence of commands able to fully rebuild the dataset into 
7850  * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */ 
7851 static int rewriteAppendOnlyFile(char *filename
) { 
7852     dictIterator 
*di 
= NULL
; 
7857     time_t now 
= time(NULL
); 
7859     /* Note that we have to use a different temp name here compared to the 
7860      * one used by rewriteAppendOnlyFileBackground() function. */ 
7861     snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid()); 
7862     fp 
= fopen(tmpfile
,"w"); 
7864         redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
)); 
7867     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
7868         char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n"; 
7869         redisDb 
*db 
= server
.db
+j
; 
7871         if (dictSize(d
) == 0) continue; 
7872         di 
= dictGetIterator(d
); 
7878         /* SELECT the new DB */ 
7879         if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
; 
7880         if (fwriteBulkLong(fp
,j
) == 0) goto werr
; 
7882         /* Iterate this DB writing every entry */ 
7883         while((de 
= dictNext(di
)) != NULL
) { 
7888             key 
= dictGetEntryKey(de
); 
7889             /* If the value for this key is swapped, load a preview in memory. 
7890              * We use a "swapped" flag to remember if we need to free the 
7891              * value object instead to just increment the ref count anyway 
7892              * in order to avoid copy-on-write of pages if we are forked() */ 
7893             if (!server
.vm_enabled 
|| key
->storage 
== REDIS_VM_MEMORY 
|| 
7894                 key
->storage 
== REDIS_VM_SWAPPING
) { 
7895                 o 
= dictGetEntryVal(de
); 
7898                 o 
= vmPreviewObject(key
); 
7901             expiretime 
= getExpire(db
,key
); 
7903             /* Save the key and associated value */ 
7904             if (o
->type 
== REDIS_STRING
) { 
7905                 /* Emit a SET command */ 
7906                 char cmd
[]="*3\r\n$3\r\nSET\r\n"; 
7907                 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
7909                 if (fwriteBulkObject(fp
,key
) == 0) goto werr
; 
7910                 if (fwriteBulkObject(fp
,o
) == 0) goto werr
; 
7911             } else if (o
->type 
== REDIS_LIST
) { 
7912                 /* Emit the RPUSHes needed to rebuild the list */ 
7913                 list 
*list 
= o
->ptr
; 
7917                 listRewind(list
,&li
); 
7918                 while((ln 
= listNext(&li
))) { 
7919                     char cmd
[]="*3\r\n$5\r\nRPUSH\r\n"; 
7920                     robj 
*eleobj 
= listNodeValue(ln
); 
7922                     if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
7923                     if (fwriteBulkObject(fp
,key
) == 0) goto werr
; 
7924                     if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
; 
7926             } else if (o
->type 
== REDIS_SET
) { 
7927                 /* Emit the SADDs needed to rebuild the set */ 
7929                 dictIterator 
*di 
= dictGetIterator(set
); 
7932                 while((de 
= dictNext(di
)) != NULL
) { 
7933                     char cmd
[]="*3\r\n$4\r\nSADD\r\n"; 
7934                     robj 
*eleobj 
= dictGetEntryKey(de
); 
7936                     if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
7937                     if (fwriteBulkObject(fp
,key
) == 0) goto werr
; 
7938                     if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
; 
7940                 dictReleaseIterator(di
); 
7941             } else if (o
->type 
== REDIS_ZSET
) { 
7942                 /* Emit the ZADDs needed to rebuild the sorted set */ 
7944                 dictIterator 
*di 
= dictGetIterator(zs
->dict
); 
7947                 while((de 
= dictNext(di
)) != NULL
) { 
7948                     char cmd
[]="*4\r\n$4\r\nZADD\r\n"; 
7949                     robj 
*eleobj 
= dictGetEntryKey(de
); 
7950                     double *score 
= dictGetEntryVal(de
); 
7952                     if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
7953                     if (fwriteBulkObject(fp
,key
) == 0) goto werr
; 
7954                     if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
; 
7955                     if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
; 
7957                 dictReleaseIterator(di
); 
7958             } else if (o
->type 
== REDIS_HASH
) { 
7959                 char cmd
[]="*4\r\n$4\r\nHSET\r\n"; 
7961                 /* Emit the HSETs needed to rebuild the hash */ 
7962                 if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
7963                     unsigned char *p 
= zipmapRewind(o
->ptr
); 
7964                     unsigned char *field
, *val
; 
7965                     unsigned int flen
, vlen
; 
7967                     while((p 
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) { 
7968                         if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
7969                         if (fwriteBulkObject(fp
,key
) == 0) goto werr
; 
7970                         if (fwriteBulkString(fp
,(char*)field
,flen
) == -1) 
7972                         if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1) 
7976                     dictIterator 
*di 
= dictGetIterator(o
->ptr
); 
7979                     while((de 
= dictNext(di
)) != NULL
) { 
7980                         robj 
*field 
= dictGetEntryKey(de
); 
7981                         robj 
*val 
= dictGetEntryVal(de
); 
7983                         if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
7984                         if (fwriteBulkObject(fp
,key
) == 0) goto werr
; 
7985                         if (fwriteBulkObject(fp
,field
) == -1) return -1; 
7986                         if (fwriteBulkObject(fp
,val
) == -1) return -1; 
7988                     dictReleaseIterator(di
); 
7993             /* Save the expire time */ 
7994             if (expiretime 
!= -1) { 
7995                 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n"; 
7996                 /* If this key is already expired skip it */ 
7997                 if (expiretime 
< now
) continue; 
7998                 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
7999                 if (fwriteBulkObject(fp
,key
) == 0) goto werr
; 
8000                 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
; 
8002             if (swapped
) decrRefCount(o
); 
8004         dictReleaseIterator(di
); 
8007     /* Make sure data will not remain on the OS's output buffers */ 
8012     /* Use RENAME to make sure the DB file is changed atomically only 
8013      * if the generate DB file is ok. */ 
8014     if (rename(tmpfile
,filename
) == -1) { 
8015         redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
)); 
8019     redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed"); 
8025     redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
)); 
8026     if (di
) dictReleaseIterator(di
); 
8030 /* This is how rewriting of the append only file in background works: 
8032  * 1) The user calls BGREWRITEAOF 
8033  * 2) Redis calls this function, that forks(): 
8034  *    2a) the child rewrite the append only file in a temp file. 
8035  *    2b) the parent accumulates differences in server.bgrewritebuf. 
8036  * 3) When the child finished '2a' exists. 
8037  * 4) The parent will trap the exit code, if it's OK, will append the 
8038  *    data accumulated into server.bgrewritebuf into the temp file, and 
8039  *    finally will rename(2) the temp file in the actual file name. 
8040  *    The the new file is reopened as the new append only file. Profit! 
8042 static int rewriteAppendOnlyFileBackground(void) { 
8045     if (server
.bgrewritechildpid 
!= -1) return REDIS_ERR
; 
8046     if (server
.vm_enabled
) waitEmptyIOJobsQueue(); 
8047     if ((childpid 
= fork()) == 0) { 
8051         if (server
.vm_enabled
) vmReopenSwapFile(); 
8053         snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid()); 
8054         if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) { 
8061         if (childpid 
== -1) { 
8062             redisLog(REDIS_WARNING
, 
8063                 "Can't rewrite append only file in background: fork: %s", 
8067         redisLog(REDIS_NOTICE
, 
8068             "Background append only file rewriting started by pid %d",childpid
); 
8069         server
.bgrewritechildpid 
= childpid
; 
8070         updateDictResizePolicy(); 
8071         /* We set appendseldb to -1 in order to force the next call to the 
8072          * feedAppendOnlyFile() to issue a SELECT command, so the differences 
8073          * accumulated by the parent into server.bgrewritebuf will start 
8074          * with a SELECT statement and it will be safe to merge. */ 
8075         server
.appendseldb 
= -1; 
8078     return REDIS_OK
; /* unreached */ 
8081 static void bgrewriteaofCommand(redisClient 
*c
) { 
8082     if (server
.bgrewritechildpid 
!= -1) { 
8083         addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n")); 
8086     if (rewriteAppendOnlyFileBackground() == REDIS_OK
) { 
8087         char *status 
= "+Background append only file rewriting started\r\n"; 
8088         addReplySds(c
,sdsnew(status
)); 
8090         addReply(c
,shared
.err
); 
8094 static void aofRemoveTempFile(pid_t childpid
) { 
8097     snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
); 
8101 /* Virtual Memory is composed mainly of two subsystems: 
8102  * - Blocking Virutal Memory 
8103  * - Threaded Virtual Memory I/O 
8104  * The two parts are not fully decoupled, but functions are split among two 
8105  * different sections of the source code (delimited by comments) in order to 
8106  * make more clear what functionality is about the blocking VM and what about 
8107  * the threaded (not blocking) VM. 
8111  * Redis VM is a blocking VM (one that blocks reading swapped values from 
8112  * disk into memory when a value swapped out is needed in memory) that is made 
8113  * unblocking by trying to examine the command argument vector in order to 
8114  * load in background values that will likely be needed in order to exec 
8115  * the command. The command is executed only once all the relevant keys 
8116  * are loaded into memory. 
8118  * This basically is almost as simple of a blocking VM, but almost as parallel 
8119  * as a fully non-blocking VM. 
8122 /* =================== Virtual Memory - Blocking Side  ====================== */ 
8124 /* substitute the first occurrence of '%p' with the process pid in the 
8125  * swap file name. */ 
8126 static void expandVmSwapFilename(void) { 
8127     char *p 
= strstr(server
.vm_swap_file
,"%p"); 
8133     new = sdscat(new,server
.vm_swap_file
); 
8134     new = sdscatprintf(new,"%ld",(long) getpid()); 
8135     new = sdscat(new,p
+2); 
8136     zfree(server
.vm_swap_file
); 
8137     server
.vm_swap_file 
= new; 
8140 static void vmInit(void) { 
8145     if (server
.vm_max_threads 
!= 0) 
8146         zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */ 
8148     expandVmSwapFilename(); 
8149     redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
); 
8150     if ((server
.vm_fp 
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) { 
8151         server
.vm_fp 
= fopen(server
.vm_swap_file
,"w+b"); 
8153     if (server
.vm_fp 
== NULL
) { 
8154         redisLog(REDIS_WARNING
, 
8155             "Impossible to open the swap file: %s. Exiting.", 
8159     server
.vm_fd 
= fileno(server
.vm_fp
); 
8160     server
.vm_next_page 
= 0; 
8161     server
.vm_near_pages 
= 0; 
8162     server
.vm_stats_used_pages 
= 0; 
8163     server
.vm_stats_swapped_objects 
= 0; 
8164     server
.vm_stats_swapouts 
= 0; 
8165     server
.vm_stats_swapins 
= 0; 
8166     totsize 
= server
.vm_pages
*server
.vm_page_size
; 
8167     redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
); 
8168     if (ftruncate(server
.vm_fd
,totsize
) == -1) { 
8169         redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.", 
8173         redisLog(REDIS_NOTICE
,"Swap file allocated with success"); 
8175     server
.vm_bitmap 
= zmalloc((server
.vm_pages
+7)/8); 
8176     redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages", 
8177         (long long) (server
.vm_pages
+7)/8, server
.vm_pages
); 
8178     memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8); 
8180     /* Initialize threaded I/O (used by Virtual Memory) */ 
8181     server
.io_newjobs 
= listCreate(); 
8182     server
.io_processing 
= listCreate(); 
8183     server
.io_processed 
= listCreate(); 
8184     server
.io_ready_clients 
= listCreate(); 
8185     pthread_mutex_init(&server
.io_mutex
,NULL
); 
8186     pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
); 
8187     pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
); 
8188     server
.io_active_threads 
= 0; 
8189     if (pipe(pipefds
) == -1) { 
8190         redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting." 
8194     server
.io_ready_pipe_read 
= pipefds
[0]; 
8195     server
.io_ready_pipe_write 
= pipefds
[1]; 
8196     redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
); 
8197     /* LZF requires a lot of stack */ 
8198     pthread_attr_init(&server
.io_threads_attr
); 
8199     pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
); 
8200     while (stacksize 
< REDIS_THREAD_STACK_SIZE
) stacksize 
*= 2; 
8201     pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
); 
8202     /* Listen for events in the threaded I/O pipe */ 
8203     if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
, 
8204         vmThreadedIOCompletedJob
, NULL
) == AE_ERR
) 
8205         oom("creating file event"); 
8208 /* Mark the page as used */ 
8209 static void vmMarkPageUsed(off_t page
) { 
8210     off_t byte 
= page
/8; 
8212     redisAssert(vmFreePage(page
) == 1); 
8213     server
.vm_bitmap
[byte
] |= 1<<bit
; 
8216 /* Mark N contiguous pages as used, with 'page' being the first. */ 
8217 static void vmMarkPagesUsed(off_t page
, off_t count
) { 
8220     for (j 
= 0; j 
< count
; j
++) 
8221         vmMarkPageUsed(page
+j
); 
8222     server
.vm_stats_used_pages 
+= count
; 
8223     redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n", 
8224         (long long)count
, (long long)page
); 
8227 /* Mark the page as free */ 
8228 static void vmMarkPageFree(off_t page
) { 
8229     off_t byte 
= page
/8; 
8231     redisAssert(vmFreePage(page
) == 0); 
8232     server
.vm_bitmap
[byte
] &= ~(1<<bit
); 
8235 /* Mark N contiguous pages as free, with 'page' being the first. */ 
8236 static void vmMarkPagesFree(off_t page
, off_t count
) { 
8239     for (j 
= 0; j 
< count
; j
++) 
8240         vmMarkPageFree(page
+j
); 
8241     server
.vm_stats_used_pages 
-= count
; 
8242     redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n", 
8243         (long long)count
, (long long)page
); 
8246 /* Test if the page is free */ 
8247 static int vmFreePage(off_t page
) { 
8248     off_t byte 
= page
/8; 
8250     return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0; 
8253 /* Find N contiguous free pages storing the first page of the cluster in *first. 
8254  * Returns REDIS_OK if it was able to find N contiguous pages, otherwise  
8255  * REDIS_ERR is returned. 
8257  * This function uses a simple algorithm: we try to allocate 
8258  * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start 
8259  * again from the start of the swap file searching for free spaces. 
8261  * If it looks pretty clear that there are no free pages near our offset 
8262  * we try to find less populated places doing a forward jump of 
8263  * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages 
8264  * without hurry, and then we jump again and so forth... 
8266  * This function can be improved using a free list to avoid to guess 
8267  * too much, since we could collect data about freed pages. 
8269  * note: I implemented this function just after watching an episode of 
8270  * Battlestar Galactica, where the hybrid was continuing to say "JUMP!" 
8272 static int vmFindContiguousPages(off_t 
*first
, off_t n
) { 
8273     off_t base
, offset 
= 0, since_jump 
= 0, numfree 
= 0; 
8275     if (server
.vm_near_pages 
== REDIS_VM_MAX_NEAR_PAGES
) { 
8276         server
.vm_near_pages 
= 0; 
8277         server
.vm_next_page 
= 0; 
8279     server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */ 
8280     base 
= server
.vm_next_page
; 
8282     while(offset 
< server
.vm_pages
) { 
8283         off_t 
this = base
+offset
; 
8285         /* If we overflow, restart from page zero */ 
8286         if (this >= server
.vm_pages
) { 
8287             this -= server
.vm_pages
; 
8289                 /* Just overflowed, what we found on tail is no longer 
8290                  * interesting, as it's no longer contiguous. */ 
8294         if (vmFreePage(this)) { 
8295             /* This is a free page */ 
8297             /* Already got N free pages? Return to the caller, with success */ 
8299                 *first 
= this-(n
-1); 
8300                 server
.vm_next_page 
= this+1; 
8301                 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
); 
8305             /* The current one is not a free page */ 
8309         /* Fast-forward if the current page is not free and we already 
8310          * searched enough near this place. */ 
8312         if (!numfree 
&& since_jump 
>= REDIS_VM_MAX_RANDOM_JUMP
/4) { 
8313             offset 
+= random() % REDIS_VM_MAX_RANDOM_JUMP
; 
8315             /* Note that even if we rewind after the jump, we are don't need 
8316              * to make sure numfree is set to zero as we only jump *if* it 
8317              * is set to zero. */ 
8319             /* Otherwise just check the next page */ 
8326 /* Write the specified object at the specified page of the swap file */ 
8327 static int vmWriteObjectOnSwap(robj 
*o
, off_t page
) { 
8328     if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
); 
8329     if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) { 
8330         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
); 
8331         redisLog(REDIS_WARNING
, 
8332             "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s", 
8336     rdbSaveObject(server
.vm_fp
,o
); 
8337     fflush(server
.vm_fp
); 
8338     if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
); 
8342 /* Swap the 'val' object relative to 'key' into disk. Store all the information 
8343  * needed to later retrieve the object into the key object. 
8344  * If we can't find enough contiguous empty pages to swap the object on disk 
8345  * REDIS_ERR is returned. */ 
8346 static int vmSwapObjectBlocking(robj 
*key
, robj 
*val
) { 
8347     off_t pages 
= rdbSavedObjectPages(val
,NULL
); 
8350     assert(key
->storage 
== REDIS_VM_MEMORY
); 
8351     assert(key
->refcount 
== 1); 
8352     if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
; 
8353     if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
; 
8354     key
->vm
.page 
= page
; 
8355     key
->vm
.usedpages 
= pages
; 
8356     key
->storage 
= REDIS_VM_SWAPPED
; 
8357     key
->vtype 
= val
->type
; 
8358     decrRefCount(val
); /* Deallocate the object from memory. */ 
8359     vmMarkPagesUsed(page
,pages
); 
8360     redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)", 
8361         (unsigned char*) key
->ptr
, 
8362         (unsigned long long) page
, (unsigned long long) pages
); 
8363     server
.vm_stats_swapped_objects
++; 
8364     server
.vm_stats_swapouts
++; 
8368 static robj 
*vmReadObjectFromSwap(off_t page
, int type
) { 
8371     if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
); 
8372     if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) { 
8373         redisLog(REDIS_WARNING
, 
8374             "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s", 
8378     o 
= rdbLoadObject(type
,server
.vm_fp
); 
8380         redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
)); 
8383     if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
); 
8387 /* Load the value object relative to the 'key' object from swap to memory. 
8388  * The newly allocated object is returned. 
8390  * If preview is true the unserialized object is returned to the caller but 
8391  * no changes are made to the key object, nor the pages are marked as freed */ 
8392 static robj 
*vmGenericLoadObject(robj 
*key
, int preview
) { 
8395     redisAssert(key
->storage 
== REDIS_VM_SWAPPED 
|| key
->storage 
== REDIS_VM_LOADING
); 
8396     val 
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
); 
8398         key
->storage 
= REDIS_VM_MEMORY
; 
8399         key
->vm
.atime 
= server
.unixtime
; 
8400         vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
); 
8401         redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk", 
8402             (unsigned char*) key
->ptr
); 
8403         server
.vm_stats_swapped_objects
--; 
8405         redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk", 
8406             (unsigned char*) key
->ptr
); 
8408     server
.vm_stats_swapins
++; 
8412 /* Plain object loading, from swap to memory */ 
8413 static robj 
*vmLoadObject(robj 
*key
) { 
8414     /* If we are loading the object in background, stop it, we 
8415      * need to load this object synchronously ASAP. */ 
8416     if (key
->storage 
== REDIS_VM_LOADING
) 
8417         vmCancelThreadedIOJob(key
); 
8418     return vmGenericLoadObject(key
,0); 
8421 /* Just load the value on disk, without to modify the key. 
8422  * This is useful when we want to perform some operation on the value 
8423  * without to really bring it from swap to memory, like while saving the 
8424  * dataset or rewriting the append only log. */ 
8425 static robj 
*vmPreviewObject(robj 
*key
) { 
8426     return vmGenericLoadObject(key
,1); 
8429 /* How a good candidate is this object for swapping? 
8430  * The better candidate it is, the greater the returned value. 
8432  * Currently we try to perform a fast estimation of the object size in 
8433  * memory, and combine it with aging informations. 
8435  * Basically swappability = idle-time * log(estimated size) 
8437  * Bigger objects are preferred over smaller objects, but not 
8438  * proportionally, this is why we use the logarithm. This algorithm is 
8439  * just a first try and will probably be tuned later. */ 
8440 static double computeObjectSwappability(robj 
*o
) { 
8441     time_t age 
= server
.unixtime 
- o
->vm
.atime
; 
8445     struct dictEntry 
*de
; 
8448     if (age 
<= 0) return 0; 
8451         if (o
->encoding 
!= REDIS_ENCODING_RAW
) { 
8454             asize 
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2; 
8459         listNode 
*ln 
= listFirst(l
); 
8461         asize 
= sizeof(list
); 
8463             robj 
*ele 
= ln
->value
; 
8466             elesize 
= (ele
->encoding 
== REDIS_ENCODING_RAW
) ? 
8467                             (sizeof(*o
)+sdslen(ele
->ptr
)) : 
8469             asize 
+= (sizeof(listNode
)+elesize
)*listLength(l
); 
8474         z 
= (o
->type 
== REDIS_ZSET
); 
8475         d 
= z 
? ((zset
*)o
->ptr
)->dict 
: o
->ptr
; 
8477         asize 
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
)); 
8478         if (z
) asize 
+= sizeof(zset
)-sizeof(dict
); 
8483             de 
= dictGetRandomKey(d
); 
8484             ele 
= dictGetEntryKey(de
); 
8485             elesize 
= (ele
->encoding 
== REDIS_ENCODING_RAW
) ? 
8486                             (sizeof(*o
)+sdslen(ele
->ptr
)) : 
8488             asize 
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
); 
8489             if (z
) asize 
+= sizeof(zskiplistNode
)*dictSize(d
); 
8493         if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
8494             unsigned char *p 
= zipmapRewind((unsigned char*)o
->ptr
); 
8495             unsigned int len 
= zipmapLen((unsigned char*)o
->ptr
); 
8496             unsigned int klen
, vlen
; 
8497             unsigned char *key
, *val
; 
8499             if ((p 
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) { 
8503             asize 
= len
*(klen
+vlen
+3); 
8504         } else if (o
->encoding 
== REDIS_ENCODING_HT
) { 
8506             asize 
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
)); 
8511                 de 
= dictGetRandomKey(d
); 
8512                 ele 
= dictGetEntryKey(de
); 
8513                 elesize 
= (ele
->encoding 
== REDIS_ENCODING_RAW
) ? 
8514                                 (sizeof(*o
)+sdslen(ele
->ptr
)) : 
8516                 ele 
= dictGetEntryVal(de
); 
8517                 elesize 
= (ele
->encoding 
== REDIS_ENCODING_RAW
) ? 
8518                                 (sizeof(*o
)+sdslen(ele
->ptr
)) : 
8520                 asize 
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
); 
8525     return (double)age
*log(1+asize
); 
8528 /* Try to swap an object that's a good candidate for swapping. 
8529  * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible 
8530  * to swap any object at all. 
8532  * If 'usethreaded' is true, Redis will try to swap the object in background 
8533  * using I/O threads. */ 
8534 static int vmSwapOneObject(int usethreads
) { 
8536     struct dictEntry 
*best 
= NULL
; 
8537     double best_swappability 
= 0; 
8538     redisDb 
*best_db 
= NULL
; 
8541     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
8542         redisDb 
*db 
= server
.db
+j
; 
8543         /* Why maxtries is set to 100? 
8544          * Because this way (usually) we'll find 1 object even if just 1% - 2% 
8545          * are swappable objects */ 
8548         if (dictSize(db
->dict
) == 0) continue; 
8549         for (i 
= 0; i 
< 5; i
++) { 
8551             double swappability
; 
8553             if (maxtries
) maxtries
--; 
8554             de 
= dictGetRandomKey(db
->dict
); 
8555             key 
= dictGetEntryKey(de
); 
8556             val 
= dictGetEntryVal(de
); 
8557             /* Only swap objects that are currently in memory. 
8559              * Also don't swap shared objects if threaded VM is on, as we 
8560              * try to ensure that the main thread does not touch the 
8561              * object while the I/O thread is using it, but we can't 
8562              * control other keys without adding additional mutex. */ 
8563             if (key
->storage 
!= REDIS_VM_MEMORY 
|| 
8564                 (server
.vm_max_threads 
!= 0 && val
->refcount 
!= 1)) { 
8565                 if (maxtries
) i
--; /* don't count this try */ 
8568             swappability 
= computeObjectSwappability(val
); 
8569             if (!best 
|| swappability 
> best_swappability
) { 
8571                 best_swappability 
= swappability
; 
8576     if (best 
== NULL
) return REDIS_ERR
; 
8577     key 
= dictGetEntryKey(best
); 
8578     val 
= dictGetEntryVal(best
); 
8580     redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f", 
8581         key
->ptr
, best_swappability
); 
8583     /* Unshare the key if needed */ 
8584     if (key
->refcount 
> 1) { 
8585         robj 
*newkey 
= dupStringObject(key
); 
8587         key 
= dictGetEntryKey(best
) = newkey
; 
8591         vmSwapObjectThreaded(key
,val
,best_db
); 
8594         if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) { 
8595             dictGetEntryVal(best
) = NULL
; 
8603 static int vmSwapOneObjectBlocking() { 
8604     return vmSwapOneObject(0); 
8607 static int vmSwapOneObjectThreaded() { 
8608     return vmSwapOneObject(1); 
8611 /* Return true if it's safe to swap out objects in a given moment. 
8612  * Basically we don't want to swap objects out while there is a BGSAVE 
8613  * or a BGAEOREWRITE running in backgroud. */ 
8614 static int vmCanSwapOut(void) { 
8615     return (server
.bgsavechildpid 
== -1 && server
.bgrewritechildpid 
== -1); 
8618 /* Delete a key if swapped. Returns 1 if the key was found, was swapped 
8619  * and was deleted. Otherwise 0 is returned. */ 
8620 static int deleteIfSwapped(redisDb 
*db
, robj 
*key
) { 
8624     if ((de 
= dictFind(db
->dict
,key
)) == NULL
) return 0; 
8625     foundkey 
= dictGetEntryKey(de
); 
8626     if (foundkey
->storage 
== REDIS_VM_MEMORY
) return 0; 
8631 /* =================== Virtual Memory - Threaded I/O  ======================= */ 
8633 static void freeIOJob(iojob 
*j
) { 
8634     if ((j
->type 
== REDIS_IOJOB_PREPARE_SWAP 
|| 
8635         j
->type 
== REDIS_IOJOB_DO_SWAP 
|| 
8636         j
->type 
== REDIS_IOJOB_LOAD
) && j
->val 
!= NULL
) 
8637         decrRefCount(j
->val
); 
8638     /* We don't decrRefCount the j->key field as we did't incremented 
8639      * the count creating IO Jobs. This is because the key field here is 
8640      * just used as an indentifier and if a key is removed the Job should 
8641      * never be touched again. */ 
8645 /* Every time a thread finished a Job, it writes a byte into the write side 
8646  * of an unix pipe in order to "awake" the main thread, and this function 
8648 static void vmThreadedIOCompletedJob(aeEventLoop 
*el
, int fd
, void *privdata
, 
8652     int retval
, processed 
= 0, toprocess 
= -1, trytoswap 
= 1; 
8654     REDIS_NOTUSED(mask
); 
8655     REDIS_NOTUSED(privdata
); 
8657     /* For every byte we read in the read side of the pipe, there is one 
8658      * I/O job completed to process. */ 
8659     while((retval 
= read(fd
,buf
,1)) == 1) { 
8663         struct dictEntry 
*de
; 
8665         redisLog(REDIS_DEBUG
,"Processing I/O completed job"); 
8667         /* Get the processed element (the oldest one) */ 
8669         assert(listLength(server
.io_processed
) != 0); 
8670         if (toprocess 
== -1) { 
8671             toprocess 
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100; 
8672             if (toprocess 
<= 0) toprocess 
= 1; 
8674         ln 
= listFirst(server
.io_processed
); 
8676         listDelNode(server
.io_processed
,ln
); 
8678         /* If this job is marked as canceled, just ignore it */ 
8683         /* Post process it in the main thread, as there are things we 
8684          * can do just here to avoid race conditions and/or invasive locks */ 
8685         redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
); 
8686         de 
= dictFind(j
->db
->dict
,j
->key
); 
8688         key 
= dictGetEntryKey(de
); 
8689         if (j
->type 
== REDIS_IOJOB_LOAD
) { 
8692             /* Key loaded, bring it at home */ 
8693             key
->storage 
= REDIS_VM_MEMORY
; 
8694             key
->vm
.atime 
= server
.unixtime
; 
8695             vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
); 
8696             redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)", 
8697                 (unsigned char*) key
->ptr
); 
8698             server
.vm_stats_swapped_objects
--; 
8699             server
.vm_stats_swapins
++; 
8700             dictGetEntryVal(de
) = j
->val
; 
8701             incrRefCount(j
->val
); 
8704             /* Handle clients waiting for this key to be loaded. */ 
8705             handleClientsBlockedOnSwappedKey(db
,key
); 
8706         } else if (j
->type 
== REDIS_IOJOB_PREPARE_SWAP
) { 
8707             /* Now we know the amount of pages required to swap this object. 
8708              * Let's find some space for it, and queue this task again 
8709              * rebranded as REDIS_IOJOB_DO_SWAP. */ 
8710             if (!vmCanSwapOut() || 
8711                 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
) 
8713                 /* Ooops... no space or we can't swap as there is 
8714                  * a fork()ed Redis trying to save stuff on disk. */ 
8716                 key
->storage 
= REDIS_VM_MEMORY
; /* undo operation */ 
8718                 /* Note that we need to mark this pages as used now, 
8719                  * if the job will be canceled, we'll mark them as freed 
8721                 vmMarkPagesUsed(j
->page
,j
->pages
); 
8722                 j
->type 
= REDIS_IOJOB_DO_SWAP
; 
8727         } else if (j
->type 
== REDIS_IOJOB_DO_SWAP
) { 
8730             /* Key swapped. We can finally free some memory. */ 
8731             if (key
->storage 
!= REDIS_VM_SWAPPING
) { 
8732                 printf("key->storage: %d\n",key
->storage
); 
8733                 printf("key->name: %s\n",(char*)key
->ptr
); 
8734                 printf("key->refcount: %d\n",key
->refcount
); 
8735                 printf("val: %p\n",(void*)j
->val
); 
8736                 printf("val->type: %d\n",j
->val
->type
); 
8737                 printf("val->ptr: %s\n",(char*)j
->val
->ptr
); 
8739             redisAssert(key
->storage 
== REDIS_VM_SWAPPING
); 
8740             val 
= dictGetEntryVal(de
); 
8741             key
->vm
.page 
= j
->page
; 
8742             key
->vm
.usedpages 
= j
->pages
; 
8743             key
->storage 
= REDIS_VM_SWAPPED
; 
8744             key
->vtype 
= j
->val
->type
; 
8745             decrRefCount(val
); /* Deallocate the object from memory. */ 
8746             dictGetEntryVal(de
) = NULL
; 
8747             redisLog(REDIS_DEBUG
, 
8748                 "VM: object %s swapped out at %lld (%lld pages) (threaded)", 
8749                 (unsigned char*) key
->ptr
, 
8750                 (unsigned long long) j
->page
, (unsigned long long) j
->pages
); 
8751             server
.vm_stats_swapped_objects
++; 
8752             server
.vm_stats_swapouts
++; 
8754             /* Put a few more swap requests in queue if we are still 
8756             if (trytoswap 
&& vmCanSwapOut() && 
8757                 zmalloc_used_memory() > server
.vm_max_memory
) 
8762                     more 
= listLength(server
.io_newjobs
) < 
8763                             (unsigned) server
.vm_max_threads
; 
8765                     /* Don't waste CPU time if swappable objects are rare. */ 
8766                     if (vmSwapOneObjectThreaded() == REDIS_ERR
) { 
8774         if (processed 
== toprocess
) return; 
8776     if (retval 
< 0 && errno 
!= EAGAIN
) { 
8777         redisLog(REDIS_WARNING
, 
8778             "WARNING: read(2) error in vmThreadedIOCompletedJob() %s", 
8783 static void lockThreadedIO(void) { 
8784     pthread_mutex_lock(&server
.io_mutex
); 
8787 static void unlockThreadedIO(void) { 
8788     pthread_mutex_unlock(&server
.io_mutex
); 
8791 /* Remove the specified object from the threaded I/O queue if still not 
8792  * processed, otherwise make sure to flag it as canceled. */ 
8793 static void vmCancelThreadedIOJob(robj 
*o
) { 
8795         server
.io_newjobs
,      /* 0 */ 
8796         server
.io_processing
,   /* 1 */ 
8797         server
.io_processed     
/* 2 */ 
8801     assert(o
->storage 
== REDIS_VM_LOADING 
|| o
->storage 
== REDIS_VM_SWAPPING
); 
8804     /* Search for a matching key in one of the queues */ 
8805     for (i 
= 0; i 
< 3; i
++) { 
8809         listRewind(lists
[i
],&li
); 
8810         while ((ln 
= listNext(&li
)) != NULL
) { 
8811             iojob 
*job 
= ln
->value
; 
8813             if (job
->canceled
) continue; /* Skip this, already canceled. */ 
8814             if (job
->key 
== o
) { 
8815                 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n", 
8816                     (void*)job
, (char*)o
->ptr
, job
->type
, i
); 
8817                 /* Mark the pages as free since the swap didn't happened 
8818                  * or happened but is now discarded. */ 
8819                 if (i 
!= 1 && job
->type 
== REDIS_IOJOB_DO_SWAP
) 
8820                     vmMarkPagesFree(job
->page
,job
->pages
); 
8821                 /* Cancel the job. It depends on the list the job is 
8824                 case 0: /* io_newjobs */ 
8825                     /* If the job was yet not processed the best thing to do 
8826                      * is to remove it from the queue at all */ 
8828                     listDelNode(lists
[i
],ln
); 
8830                 case 1: /* io_processing */ 
8831                     /* Oh Shi- the thread is messing with the Job: 
8833                      * Probably it's accessing the object if this is a 
8834                      * PREPARE_SWAP or DO_SWAP job. 
8835                      * If it's a LOAD job it may be reading from disk and 
8836                      * if we don't wait for the job to terminate before to 
8837                      * cancel it, maybe in a few microseconds data can be 
8838                      * corrupted in this pages. So the short story is: 
8840                      * Better to wait for the job to move into the 
8841                      * next queue (processed)... */ 
8843                     /* We try again and again until the job is completed. */ 
8845                     /* But let's wait some time for the I/O thread 
8846                      * to finish with this job. After all this condition 
8847                      * should be very rare. */ 
8850                 case 2: /* io_processed */ 
8851                     /* The job was already processed, that's easy... 
8852                      * just mark it as canceled so that we'll ignore it 
8853                      * when processing completed jobs. */ 
8857                 /* Finally we have to adjust the storage type of the object 
8858                  * in order to "UNDO" the operaiton. */ 
8859                 if (o
->storage 
== REDIS_VM_LOADING
) 
8860                     o
->storage 
= REDIS_VM_SWAPPED
; 
8861                 else if (o
->storage 
== REDIS_VM_SWAPPING
) 
8862                     o
->storage 
= REDIS_VM_MEMORY
; 
8869     assert(1 != 1); /* We should never reach this */ 
8872 static void *IOThreadEntryPoint(void *arg
) { 
8877     pthread_detach(pthread_self()); 
8879         /* Get a new job to process */ 
8881         if (listLength(server
.io_newjobs
) == 0) { 
8882             /* No new jobs in queue, exit. */ 
8883             redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do", 
8884                 (long) pthread_self()); 
8885             server
.io_active_threads
--; 
8889         ln 
= listFirst(server
.io_newjobs
); 
8891         listDelNode(server
.io_newjobs
,ln
); 
8892         /* Add the job in the processing queue */ 
8893         j
->thread 
= pthread_self(); 
8894         listAddNodeTail(server
.io_processing
,j
); 
8895         ln 
= listLast(server
.io_processing
); /* We use ln later to remove it */ 
8897         redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'", 
8898             (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
); 
8900         /* Process the Job */ 
8901         if (j
->type 
== REDIS_IOJOB_LOAD
) { 
8902             j
->val 
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
); 
8903         } else if (j
->type 
== REDIS_IOJOB_PREPARE_SWAP
) { 
8904             FILE *fp 
= fopen("/dev/null","w+"); 
8905             j
->pages 
= rdbSavedObjectPages(j
->val
,fp
); 
8907         } else if (j
->type 
== REDIS_IOJOB_DO_SWAP
) { 
8908             if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
) 
8912         /* Done: insert the job into the processed queue */ 
8913         redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)", 
8914             (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
); 
8916         listDelNode(server
.io_processing
,ln
); 
8917         listAddNodeTail(server
.io_processed
,j
); 
8920         /* Signal the main thread there is new stuff to process */ 
8921         assert(write(server
.io_ready_pipe_write
,"x",1) == 1); 
8923     return NULL
; /* never reached */ 
8926 static void spawnIOThread(void) { 
8928     sigset_t mask
, omask
; 
8932     sigaddset(&mask
,SIGCHLD
); 
8933     sigaddset(&mask
,SIGHUP
); 
8934     sigaddset(&mask
,SIGPIPE
); 
8935     pthread_sigmask(SIG_SETMASK
, &mask
, &omask
); 
8936     while ((err 
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) { 
8937         redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s", 
8941     pthread_sigmask(SIG_SETMASK
, &omask
, NULL
); 
8942     server
.io_active_threads
++; 
8945 /* We need to wait for the last thread to exit before we are able to 
8946  * fork() in order to BGSAVE or BGREWRITEAOF. */ 
8947 static void waitEmptyIOJobsQueue(void) { 
8949         int io_processed_len
; 
8952         if (listLength(server
.io_newjobs
) == 0 && 
8953             listLength(server
.io_processing
) == 0 && 
8954             server
.io_active_threads 
== 0) 
8959         /* While waiting for empty jobs queue condition we post-process some 
8960          * finshed job, as I/O threads may be hanging trying to write against 
8961          * the io_ready_pipe_write FD but there are so much pending jobs that 
8963         io_processed_len 
= listLength(server
.io_processed
); 
8965         if (io_processed_len
) { 
8966             vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0); 
8967             usleep(1000); /* 1 millisecond */ 
8969             usleep(10000); /* 10 milliseconds */ 
8974 static void vmReopenSwapFile(void) { 
8975     /* Note: we don't close the old one as we are in the child process 
8976      * and don't want to mess at all with the original file object. */ 
8977     server
.vm_fp 
= fopen(server
.vm_swap_file
,"r+b"); 
8978     if (server
.vm_fp 
== NULL
) { 
8979         redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.", 
8980             server
.vm_swap_file
); 
8983     server
.vm_fd 
= fileno(server
.vm_fp
); 
8986 /* This function must be called while with threaded IO locked */ 
8987 static void queueIOJob(iojob 
*j
) { 
8988     redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n", 
8989         (void*)j
, j
->type
, (char*)j
->key
->ptr
); 
8990     listAddNodeTail(server
.io_newjobs
,j
); 
8991     if (server
.io_active_threads 
< server
.vm_max_threads
) 
8995 static int vmSwapObjectThreaded(robj 
*key
, robj 
*val
, redisDb 
*db
) { 
8998     assert(key
->storage 
== REDIS_VM_MEMORY
); 
8999     assert(key
->refcount 
== 1); 
9001     j 
= zmalloc(sizeof(*j
)); 
9002     j
->type 
= REDIS_IOJOB_PREPARE_SWAP
; 
9008     j
->thread 
= (pthread_t
) -1; 
9009     key
->storage 
= REDIS_VM_SWAPPING
; 
9017 /* ============ Virtual Memory - Blocking clients on missing keys =========== */ 
9019 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded. 
9020  * If there is not already a job loading the key, it is craeted. 
9021  * The key is added to the io_keys list in the client structure, and also 
9022  * in the hash table mapping swapped keys to waiting clients, that is, 
9023  * server.io_waited_keys. */ 
9024 static int waitForSwappedKey(redisClient 
*c
, robj 
*key
) { 
9025     struct dictEntry 
*de
; 
9029     /* If the key does not exist or is already in RAM we don't need to 
9030      * block the client at all. */ 
9031     de 
= dictFind(c
->db
->dict
,key
); 
9032     if (de 
== NULL
) return 0; 
9033     o 
= dictGetEntryKey(de
); 
9034     if (o
->storage 
== REDIS_VM_MEMORY
) { 
9036     } else if (o
->storage 
== REDIS_VM_SWAPPING
) { 
9037         /* We were swapping the key, undo it! */ 
9038         vmCancelThreadedIOJob(o
); 
9042     /* OK: the key is either swapped, or being loaded just now. */ 
9044     /* Add the key to the list of keys this client is waiting for. 
9045      * This maps clients to keys they are waiting for. */ 
9046     listAddNodeTail(c
->io_keys
,key
); 
9049     /* Add the client to the swapped keys => clients waiting map. */ 
9050     de 
= dictFind(c
->db
->io_keys
,key
); 
9054         /* For every key we take a list of clients blocked for it */ 
9056         retval 
= dictAdd(c
->db
->io_keys
,key
,l
); 
9058         assert(retval 
== DICT_OK
); 
9060         l 
= dictGetEntryVal(de
); 
9062     listAddNodeTail(l
,c
); 
9064     /* Are we already loading the key from disk? If not create a job */ 
9065     if (o
->storage 
== REDIS_VM_SWAPPED
) { 
9068         o
->storage 
= REDIS_VM_LOADING
; 
9069         j 
= zmalloc(sizeof(*j
)); 
9070         j
->type 
= REDIS_IOJOB_LOAD
; 
9073         j
->key
->vtype 
= o
->vtype
; 
9074         j
->page 
= o
->vm
.page
; 
9077         j
->thread 
= (pthread_t
) -1; 
9085 /* Preload keys needed for the ZUNION and ZINTER commands. */ 
9086 static void zunionInterBlockClientOnSwappedKeys(redisClient 
*c
) { 
9088     num 
= atoi(c
->argv
[2]->ptr
); 
9089     for (i 
= 0; i 
< num
; i
++) { 
9090         waitForSwappedKey(c
,c
->argv
[3+i
]); 
9094 /* Is this client attempting to run a command against swapped keys? 
9095  * If so, block it ASAP, load the keys in background, then resume it. 
9097  * The important idea about this function is that it can fail! If keys will 
9098  * still be swapped when the client is resumed, this key lookups will 
9099  * just block loading keys from disk. In practical terms this should only 
9100  * happen with SORT BY command or if there is a bug in this function. 
9102  * Return 1 if the client is marked as blocked, 0 if the client can 
9103  * continue as the keys it is going to access appear to be in memory. */ 
9104 static int blockClientOnSwappedKeys(struct redisCommand 
*cmd
, redisClient 
*c
) { 
9107     if (cmd
->vm_preload_proc 
!= NULL
) { 
9108         cmd
->vm_preload_proc(c
); 
9110         if (cmd
->vm_firstkey 
== 0) return 0; 
9111         last 
= cmd
->vm_lastkey
; 
9112         if (last 
< 0) last 
= c
->argc
+last
; 
9113         for (j 
= cmd
->vm_firstkey
; j 
<= last
; j 
+= cmd
->vm_keystep
) 
9114             waitForSwappedKey(c
,c
->argv
[j
]); 
9117     /* If the client was blocked for at least one key, mark it as blocked. */ 
9118     if (listLength(c
->io_keys
)) { 
9119         c
->flags 
|= REDIS_IO_WAIT
; 
9120         aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
); 
9121         server
.vm_blocked_clients
++; 
9128 /* Remove the 'key' from the list of blocked keys for a given client. 
9130  * The function returns 1 when there are no longer blocking keys after 
9131  * the current one was removed (and the client can be unblocked). */ 
9132 static int dontWaitForSwappedKey(redisClient 
*c
, robj 
*key
) { 
9136     struct dictEntry 
*de
; 
9138     /* Remove the key from the list of keys this client is waiting for. */ 
9139     listRewind(c
->io_keys
,&li
); 
9140     while ((ln 
= listNext(&li
)) != NULL
) { 
9141         if (compareStringObjects(ln
->value
,key
) == 0) { 
9142             listDelNode(c
->io_keys
,ln
); 
9148     /* Remove the client form the key => waiting clients map. */ 
9149     de 
= dictFind(c
->db
->io_keys
,key
); 
9151     l 
= dictGetEntryVal(de
); 
9152     ln 
= listSearchKey(l
,c
); 
9155     if (listLength(l
) == 0) 
9156         dictDelete(c
->db
->io_keys
,key
); 
9158     return listLength(c
->io_keys
) == 0; 
9161 static void handleClientsBlockedOnSwappedKey(redisDb 
*db
, robj 
*key
) { 
9162     struct dictEntry 
*de
; 
9167     de 
= dictFind(db
->io_keys
,key
); 
9170     l 
= dictGetEntryVal(de
); 
9171     len 
= listLength(l
); 
9172     /* Note: we can't use something like while(listLength(l)) as the list 
9173      * can be freed by the calling function when we remove the last element. */ 
9176         redisClient 
*c 
= ln
->value
; 
9178         if (dontWaitForSwappedKey(c
,key
)) { 
9179             /* Put the client in the list of clients ready to go as we 
9180              * loaded all the keys about it. */ 
9181             listAddNodeTail(server
.io_ready_clients
,c
); 
9186 /* =========================== Remote Configuration ========================= */ 
9188 static void configSetCommand(redisClient 
*c
) { 
9189     robj 
*o 
= getDecodedObject(c
->argv
[3]); 
9190     if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) { 
9191         zfree(server
.dbfilename
); 
9192         server
.dbfilename 
= zstrdup(o
->ptr
); 
9193     } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) { 
9194         zfree(server
.requirepass
); 
9195         server
.requirepass 
= zstrdup(o
->ptr
); 
9196     } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) { 
9197         zfree(server
.masterauth
); 
9198         server
.masterauth 
= zstrdup(o
->ptr
); 
9199     } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) { 
9200         server
.maxmemory 
= strtoll(o
->ptr
, NULL
, 10); 
9202         addReplySds(c
,sdscatprintf(sdsempty(), 
9203             "-ERR not supported CONFIG parameter %s\r\n", 
9204             (char*)c
->argv
[2]->ptr
)); 
9209     addReply(c
,shared
.ok
); 
9212 static void configGetCommand(redisClient 
*c
) { 
9213     robj 
*o 
= getDecodedObject(c
->argv
[2]); 
9214     robj 
*lenobj 
= createObject(REDIS_STRING
,NULL
); 
9215     char *pattern 
= o
->ptr
; 
9219     decrRefCount(lenobj
); 
9221     if (stringmatch(pattern
,"dbfilename",0)) { 
9222         addReplyBulkCString(c
,"dbfilename"); 
9223         addReplyBulkCString(c
,server
.dbfilename
); 
9226     if (stringmatch(pattern
,"requirepass",0)) { 
9227         addReplyBulkCString(c
,"requirepass"); 
9228         addReplyBulkCString(c
,server
.requirepass
); 
9231     if (stringmatch(pattern
,"masterauth",0)) { 
9232         addReplyBulkCString(c
,"masterauth"); 
9233         addReplyBulkCString(c
,server
.masterauth
); 
9236     if (stringmatch(pattern
,"maxmemory",0)) { 
9239         snprintf(buf
,128,"%llu\n",server
.maxmemory
); 
9240         addReplyBulkCString(c
,"maxmemory"); 
9241         addReplyBulkCString(c
,buf
); 
9245     lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2); 
9248 static void configCommand(redisClient 
*c
) { 
9249     if (!strcasecmp(c
->argv
[1]->ptr
,"set")) { 
9250         if (c
->argc 
!= 4) goto badarity
; 
9251         configSetCommand(c
); 
9252     } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) { 
9253         if (c
->argc 
!= 3) goto badarity
; 
9254         configGetCommand(c
); 
9255     } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) { 
9256         if (c
->argc 
!= 2) goto badarity
; 
9257         server
.stat_numcommands 
= 0; 
9258         server
.stat_numconnections 
= 0; 
9259         server
.stat_expiredkeys 
= 0; 
9260         server
.stat_starttime 
= time(NULL
); 
9261         addReply(c
,shared
.ok
); 
9263         addReplySds(c
,sdscatprintf(sdsempty(), 
9264             "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n")); 
9269     addReplySds(c
,sdscatprintf(sdsempty(), 
9270         "-ERR Wrong number of arguments for CONFIG %s\r\n", 
9271         (char*) c
->argv
[1]->ptr
)); 
9274 /* =========================== Pubsub implementation ======================== */ 
9276 static void freePubsubPattern(void *p
) { 
9277     pubsubPattern 
*pat 
= p
; 
9279     decrRefCount(pat
->pattern
); 
9283 static int listMatchPubsubPattern(void *a
, void *b
) { 
9284     pubsubPattern 
*pa 
= a
, *pb 
= b
; 
9286     return (pa
->client 
== pb
->client
) && 
9287            (compareStringObjects(pa
->pattern
,pb
->pattern
) == 0); 
9290 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or 
9291  * 0 if the client was already subscribed to that channel. */ 
9292 static int pubsubSubscribeChannel(redisClient 
*c
, robj 
*channel
) { 
9293     struct dictEntry 
*de
; 
9294     list 
*clients 
= NULL
; 
9297     /* Add the channel to the client -> channels hash table */ 
9298     if (dictAdd(c
->pubsub_channels
,channel
,NULL
) == DICT_OK
) { 
9300         incrRefCount(channel
); 
9301         /* Add the client to the channel -> list of clients hash table */ 
9302         de 
= dictFind(server
.pubsub_channels
,channel
); 
9304             clients 
= listCreate(); 
9305             dictAdd(server
.pubsub_channels
,channel
,clients
); 
9306             incrRefCount(channel
); 
9308             clients 
= dictGetEntryVal(de
); 
9310         listAddNodeTail(clients
,c
); 
9312     /* Notify the client */ 
9313     addReply(c
,shared
.mbulk3
); 
9314     addReply(c
,shared
.subscribebulk
); 
9315     addReplyBulk(c
,channel
); 
9316     addReplyLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
)); 
9320 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or 
9321  * 0 if the client was not subscribed to the specified channel. */ 
9322 static int pubsubUnsubscribeChannel(redisClient 
*c
, robj 
*channel
, int notify
) { 
9323     struct dictEntry 
*de
; 
9328     /* Remove the channel from the client -> channels hash table */ 
9329     incrRefCount(channel
); /* channel may be just a pointer to the same object 
9330                             we have in the hash tables. Protect it... */ 
9331     if (dictDelete(c
->pubsub_channels
,channel
) == DICT_OK
) { 
9333         /* Remove the client from the channel -> clients list hash table */ 
9334         de 
= dictFind(server
.pubsub_channels
,channel
); 
9336         clients 
= dictGetEntryVal(de
); 
9337         ln 
= listSearchKey(clients
,c
); 
9339         listDelNode(clients
,ln
); 
9340         if (listLength(clients
) == 0) { 
9341             /* Free the list and associated hash entry at all if this was 
9342              * the latest client, so that it will be possible to abuse 
9343              * Redis PUBSUB creating millions of channels. */ 
9344             dictDelete(server
.pubsub_channels
,channel
); 
9347     /* Notify the client */ 
9349         addReply(c
,shared
.mbulk3
); 
9350         addReply(c
,shared
.unsubscribebulk
); 
9351         addReplyBulk(c
,channel
); 
9352         addReplyLong(c
,dictSize(c
->pubsub_channels
)+ 
9353                        listLength(c
->pubsub_patterns
)); 
9356     decrRefCount(channel
); /* it is finally safe to release it */ 
9360 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */ 
9361 static int pubsubSubscribePattern(redisClient 
*c
, robj 
*pattern
) { 
9364     if (listSearchKey(c
->pubsub_patterns
,pattern
) == NULL
) { 
9367         listAddNodeTail(c
->pubsub_patterns
,pattern
); 
9368         incrRefCount(pattern
); 
9369         pat 
= zmalloc(sizeof(*pat
)); 
9370         pat
->pattern 
= getDecodedObject(pattern
); 
9372         listAddNodeTail(server
.pubsub_patterns
,pat
); 
9374     /* Notify the client */ 
9375     addReply(c
,shared
.mbulk3
); 
9376     addReply(c
,shared
.psubscribebulk
); 
9377     addReplyBulk(c
,pattern
); 
9378     addReplyLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
)); 
9382 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or 
9383  * 0 if the client was not subscribed to the specified channel. */ 
9384 static int pubsubUnsubscribePattern(redisClient 
*c
, robj 
*pattern
, int notify
) { 
9389     incrRefCount(pattern
); /* Protect the object. May be the same we remove */ 
9390     if ((ln 
= listSearchKey(c
->pubsub_patterns
,pattern
)) != NULL
) { 
9392         listDelNode(c
->pubsub_patterns
,ln
); 
9394         pat
.pattern 
= pattern
; 
9395         ln 
= listSearchKey(server
.pubsub_patterns
,&pat
); 
9396         listDelNode(server
.pubsub_patterns
,ln
); 
9398     /* Notify the client */ 
9400         addReply(c
,shared
.mbulk3
); 
9401         addReply(c
,shared
.punsubscribebulk
); 
9402         addReplyBulk(c
,pattern
); 
9403         addReplyLong(c
,dictSize(c
->pubsub_channels
)+ 
9404                        listLength(c
->pubsub_patterns
)); 
9406     decrRefCount(pattern
); 
9410 /* Unsubscribe from all the channels. Return the number of channels the 
9411  * client was subscribed from. */ 
9412 static int pubsubUnsubscribeAllChannels(redisClient 
*c
, int notify
) { 
9413     dictIterator 
*di 
= dictGetIterator(c
->pubsub_channels
); 
9417     while((de 
= dictNext(di
)) != NULL
) { 
9418         robj 
*channel 
= dictGetEntryKey(de
); 
9420         count 
+= pubsubUnsubscribeChannel(c
,channel
,notify
); 
9422     dictReleaseIterator(di
); 
9426 /* Unsubscribe from all the patterns. Return the number of patterns the 
9427  * client was subscribed from. */ 
9428 static int pubsubUnsubscribeAllPatterns(redisClient 
*c
, int notify
) { 
9433     listRewind(c
->pubsub_patterns
,&li
); 
9434     while ((ln 
= listNext(&li
)) != NULL
) { 
9435         robj 
*pattern 
= ln
->value
; 
9437         count 
+= pubsubUnsubscribePattern(c
,pattern
,notify
); 
9442 /* Publish a message */ 
9443 static int pubsubPublishMessage(robj 
*channel
, robj 
*message
) { 
9445     struct dictEntry 
*de
; 
9449     /* Send to clients listening for that channel */ 
9450     de 
= dictFind(server
.pubsub_channels
,channel
); 
9452         list 
*list 
= dictGetEntryVal(de
); 
9456         listRewind(list
,&li
); 
9457         while ((ln 
= listNext(&li
)) != NULL
) { 
9458             redisClient 
*c 
= ln
->value
; 
9460             addReply(c
,shared
.mbulk3
); 
9461             addReply(c
,shared
.messagebulk
); 
9462             addReplyBulk(c
,channel
); 
9463             addReplyBulk(c
,message
); 
9467     /* Send to clients listening to matching channels */ 
9468     if (listLength(server
.pubsub_patterns
)) { 
9469         listRewind(server
.pubsub_patterns
,&li
); 
9470         channel 
= getDecodedObject(channel
); 
9471         while ((ln 
= listNext(&li
)) != NULL
) { 
9472             pubsubPattern 
*pat 
= ln
->value
; 
9474             if (stringmatchlen((char*)pat
->pattern
->ptr
, 
9475                                 sdslen(pat
->pattern
->ptr
), 
9476                                 (char*)channel
->ptr
, 
9477                                 sdslen(channel
->ptr
),0)) { 
9478                 addReply(pat
->client
,shared
.mbulk3
); 
9479                 addReply(pat
->client
,shared
.messagebulk
); 
9480                 addReplyBulk(pat
->client
,channel
); 
9481                 addReplyBulk(pat
->client
,message
); 
9485         decrRefCount(channel
); 
9490 static void subscribeCommand(redisClient 
*c
) { 
9493     for (j 
= 1; j 
< c
->argc
; j
++) 
9494         pubsubSubscribeChannel(c
,c
->argv
[j
]); 
9497 static void unsubscribeCommand(redisClient 
*c
) { 
9499         pubsubUnsubscribeAllChannels(c
,1); 
9504         for (j 
= 1; j 
< c
->argc
; j
++) 
9505             pubsubUnsubscribeChannel(c
,c
->argv
[j
],1); 
9509 static void psubscribeCommand(redisClient 
*c
) { 
9512     for (j 
= 1; j 
< c
->argc
; j
++) 
9513         pubsubSubscribePattern(c
,c
->argv
[j
]); 
9516 static void punsubscribeCommand(redisClient 
*c
) { 
9518         pubsubUnsubscribeAllPatterns(c
,1); 
9523         for (j 
= 1; j 
< c
->argc
; j
++) 
9524             pubsubUnsubscribePattern(c
,c
->argv
[j
],1); 
9528 static void publishCommand(redisClient 
*c
) { 
9529     int receivers 
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]); 
9530     addReplyLong(c
,receivers
); 
9533 /* ================================= Debugging ============================== */ 
9535 static void debugCommand(redisClient 
*c
) { 
9536     if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) { 
9538     } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) { 
9539         if (rdbSave(server
.dbfilename
) != REDIS_OK
) { 
9540             addReply(c
,shared
.err
); 
9544         if (rdbLoad(server
.dbfilename
) != REDIS_OK
) { 
9545             addReply(c
,shared
.err
); 
9548         redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD"); 
9549         addReply(c
,shared
.ok
); 
9550     } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) { 
9552         if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) { 
9553             addReply(c
,shared
.err
); 
9556         redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF"); 
9557         addReply(c
,shared
.ok
); 
9558     } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc 
== 3) { 
9559         dictEntry 
*de 
= dictFind(c
->db
->dict
,c
->argv
[2]); 
9563             addReply(c
,shared
.nokeyerr
); 
9566         key 
= dictGetEntryKey(de
); 
9567         val 
= dictGetEntryVal(de
); 
9568         if (!server
.vm_enabled 
|| (key
->storage 
== REDIS_VM_MEMORY 
|| 
9569                                    key
->storage 
== REDIS_VM_SWAPPING
)) { 
9573             if (val
->encoding 
< (sizeof(strencoding
)/sizeof(char*))) { 
9574                 strenc 
= strencoding
[val
->encoding
]; 
9576                 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
); 
9579             addReplySds(c
,sdscatprintf(sdsempty(), 
9580                 "+Key at:%p refcount:%d, value at:%p refcount:%d " 
9581                 "encoding:%s serializedlength:%lld\r\n", 
9582                 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
, 
9583                 strenc
, (long long) rdbSavedObjectLen(val
,NULL
))); 
9585             addReplySds(c
,sdscatprintf(sdsempty(), 
9586                 "+Key at:%p refcount:%d, value swapped at: page %llu " 
9587                 "using %llu pages\r\n", 
9588                 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
, 
9589                 (unsigned long long) key
->vm
.usedpages
)); 
9591     } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapin") && c
->argc 
== 3) { 
9592         lookupKeyRead(c
->db
,c
->argv
[2]); 
9593         addReply(c
,shared
.ok
); 
9594     } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc 
== 3) { 
9595         dictEntry 
*de 
= dictFind(c
->db
->dict
,c
->argv
[2]); 
9598         if (!server
.vm_enabled
) { 
9599             addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n")); 
9603             addReply(c
,shared
.nokeyerr
); 
9606         key 
= dictGetEntryKey(de
); 
9607         val 
= dictGetEntryVal(de
); 
9608         /* If the key is shared we want to create a copy */ 
9609         if (key
->refcount 
> 1) { 
9610             robj 
*newkey 
= dupStringObject(key
); 
9612             key 
= dictGetEntryKey(de
) = newkey
; 
9615         if (key
->storage 
!= REDIS_VM_MEMORY
) { 
9616             addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n")); 
9617         } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) { 
9618             dictGetEntryVal(de
) = NULL
; 
9619             addReply(c
,shared
.ok
); 
9621             addReply(c
,shared
.err
); 
9624         addReplySds(c
,sdsnew( 
9625             "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n")); 
9629 static void _redisAssert(char *estr
, char *file
, int line
) { 
9630     redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ==="); 
9631     redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true\n",file
,line
,estr
); 
9632 #ifdef HAVE_BACKTRACE 
9633     redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)"); 
9638 /* =================================== Main! ================================ */ 
9641 int linuxOvercommitMemoryValue(void) { 
9642     FILE *fp 
= fopen("/proc/sys/vm/overcommit_memory","r"); 
9646     if (fgets(buf
,64,fp
) == NULL
) { 
9655 void linuxOvercommitMemoryWarning(void) { 
9656     if (linuxOvercommitMemoryValue() == 0) { 
9657         redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low condition memory. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect."); 
9660 #endif /* __linux__ */ 
9662 static void daemonize(void) { 
9666     if (fork() != 0) exit(0); /* parent exits */ 
9667     setsid(); /* create a new session */ 
9669     /* Every output goes to /dev/null. If Redis is daemonized but 
9670      * the 'logfile' is set to 'stdout' in the configuration file 
9671      * it will not log at all. */ 
9672     if ((fd 
= open("/dev/null", O_RDWR
, 0)) != -1) { 
9673         dup2(fd
, STDIN_FILENO
); 
9674         dup2(fd
, STDOUT_FILENO
); 
9675         dup2(fd
, STDERR_FILENO
); 
9676         if (fd 
> STDERR_FILENO
) close(fd
); 
9678     /* Try to write the pid file */ 
9679     fp 
= fopen(server
.pidfile
,"w"); 
9681         fprintf(fp
,"%d\n",getpid()); 
9686 static void version() { 
9687     printf("Redis server version %s\n", REDIS_VERSION
); 
9691 static void usage() { 
9692     fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n"); 
9693     fprintf(stderr
,"       ./redis-server - (read config from stdin)\n"); 
9697 int main(int argc
, char **argv
) { 
9702         if (strcmp(argv
[1], "-v") == 0 || 
9703             strcmp(argv
[1], "--version") == 0) version(); 
9704         if (strcmp(argv
[1], "--help") == 0) usage(); 
9705         resetServerSaveParams(); 
9706         loadServerConfig(argv
[1]); 
9707     } else if ((argc 
> 2)) { 
9710         redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'"); 
9712     if (server
.daemonize
) daemonize(); 
9714     redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
); 
9716     linuxOvercommitMemoryWarning(); 
9719     if (server
.appendonly
) { 
9720         if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
) 
9721             redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
); 
9723         if (rdbLoad(server
.dbfilename
) == REDIS_OK
) 
9724             redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
); 
9726     redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
); 
9727     aeSetBeforeSleepProc(server
.el
,beforeSleep
); 
9729     aeDeleteEventLoop(server
.el
); 
9733 /* ============================= Backtrace support ========================= */ 
9735 #ifdef HAVE_BACKTRACE 
9736 static char *findFuncName(void *pointer
, unsigned long *offset
); 
9738 static void *getMcontextEip(ucontext_t 
*uc
) { 
9739 #if defined(__FreeBSD__) 
9740     return (void*) uc
->uc_mcontext
.mc_eip
; 
9741 #elif defined(__dietlibc__) 
9742     return (void*) uc
->uc_mcontext
.eip
; 
9743 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6) 
9745     return (void*) uc
->uc_mcontext
->__ss
.__rip
; 
9747     return (void*) uc
->uc_mcontext
->__ss
.__eip
; 
9749 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6) 
9750   #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__) 
9751     return (void*) uc
->uc_mcontext
->__ss
.__rip
; 
9753     return (void*) uc
->uc_mcontext
->__ss
.__eip
; 
9755 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__) 
9756     return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */ 
9757 #elif defined(__ia64__) /* Linux IA64 */ 
9758     return (void*) uc
->uc_mcontext
.sc_ip
; 
9764 static void segvHandler(int sig
, siginfo_t 
*info
, void *secret
) { 
9766     char **messages 
= NULL
; 
9767     int i
, trace_size 
= 0; 
9768     unsigned long offset
=0; 
9769     ucontext_t 
*uc 
= (ucontext_t
*) secret
; 
9771     REDIS_NOTUSED(info
); 
9773     redisLog(REDIS_WARNING
, 
9774         "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
); 
9775     infostring 
= genRedisInfoString(); 
9776     redisLog(REDIS_WARNING
, "%s",infostring
); 
9777     /* It's not safe to sdsfree() the returned string under memory 
9778      * corruption conditions. Let it leak as we are going to abort */ 
9780     trace_size 
= backtrace(trace
, 100); 
9781     /* overwrite sigaction with caller's address */ 
9782     if (getMcontextEip(uc
) != NULL
) { 
9783         trace
[1] = getMcontextEip(uc
); 
9785     messages 
= backtrace_symbols(trace
, trace_size
); 
9787     for (i
=1; i
<trace_size
; ++i
) { 
9788         char *fn 
= findFuncName(trace
[i
], &offset
), *p
; 
9790         p 
= strchr(messages
[i
],'+'); 
9791         if (!fn 
|| (p 
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) { 
9792             redisLog(REDIS_WARNING
,"%s", messages
[i
]); 
9794             redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
); 
9797     /* free(messages); Don't call free() with possibly corrupted memory. */ 
9801 static void setupSigSegvAction(void) { 
9802     struct sigaction act
; 
9804     sigemptyset (&act
.sa_mask
); 
9805     /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction 
9806      * is used. Otherwise, sa_handler is used */ 
9807     act
.sa_flags 
= SA_NODEFER 
| SA_ONSTACK 
| SA_RESETHAND 
| SA_SIGINFO
; 
9808     act
.sa_sigaction 
= segvHandler
; 
9809     sigaction (SIGSEGV
, &act
, NULL
); 
9810     sigaction (SIGBUS
, &act
, NULL
); 
9811     sigaction (SIGFPE
, &act
, NULL
); 
9812     sigaction (SIGILL
, &act
, NULL
); 
9813     sigaction (SIGBUS
, &act
, NULL
); 
9817 #include "staticsymbols.h" 
9818 /* This function try to convert a pointer into a function name. It's used in 
9819  * oreder to provide a backtrace under segmentation fault that's able to 
9820  * display functions declared as static (otherwise the backtrace is useless). */ 
9821 static char *findFuncName(void *pointer
, unsigned long *offset
){ 
9823     unsigned long off
, minoff 
= 0; 
9825     /* Try to match against the Symbol with the smallest offset */ 
9826     for (i
=0; symsTable
[i
].pointer
; i
++) { 
9827         unsigned long lp 
= (unsigned long) pointer
; 
9829         if (lp 
!= (unsigned long)-1 && lp 
>= symsTable
[i
].pointer
) { 
9830             off
=lp
-symsTable
[i
].pointer
; 
9831             if (ret 
< 0 || off 
< minoff
) { 
9837     if (ret 
== -1) return NULL
; 
9839     return symsTable
[ret
].name
; 
9841 #else /* HAVE_BACKTRACE */ 
9842 static void setupSigSegvAction(void) { 
9844 #endif /* HAVE_BACKTRACE */