2  * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com> 
   5  * Redistribution and use in source and binary forms, with or without 
   6  * modification, are permitted provided that the following conditions are met: 
   8  *   * Redistributions of source code must retain the above copyright notice, 
   9  *     this list of conditions and the following disclaimer. 
  10  *   * Redistributions in binary form must reproduce the above copyright 
  11  *     notice, this list of conditions and the following disclaimer in the 
  12  *     documentation and/or other materials provided with the distribution. 
  13  *   * Neither the name of Redis nor the names of its contributors may be used 
  14  *     to endorse or promote products derived from this software without 
  15  *     specific prior written permission. 
  17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
  18  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  20  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
  21  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
  22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
  23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
  24  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
  25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
  26  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
  27  * POSSIBILITY OF SUCH DAMAGE. 
  30 #define REDIS_VERSION "1.3.4" 
  40 #define __USE_POSIX199309 
  47 #endif /* HAVE_BACKTRACE */ 
  55 #include <arpa/inet.h> 
  59 #include <sys/resource.h> 
  66 #include "solarisfixes.h" 
  70 #include "ae.h"     /* Event driven programming library */ 
  71 #include "sds.h"    /* Dynamic safe strings */ 
  72 #include "anet.h"   /* Networking the easy way */ 
  73 #include "dict.h"   /* Hash tables */ 
  74 #include "adlist.h" /* Linked lists */ 
  75 #include "zmalloc.h" /* total memory usage aware version of malloc/free */ 
  76 #include "lzf.h"    /* LZF compression library */ 
  77 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */ 
  83 /* Static server configuration */ 
  84 #define REDIS_SERVERPORT        6379    /* TCP port */ 
  85 #define REDIS_MAXIDLETIME       (60*5)  /* default client timeout */ 
  86 #define REDIS_IOBUF_LEN         1024 
  87 #define REDIS_LOADBUF_LEN       1024 
  88 #define REDIS_STATIC_ARGS       4 
  89 #define REDIS_DEFAULT_DBNUM     16 
  90 #define REDIS_CONFIGLINE_MAX    1024 
  91 #define REDIS_OBJFREELIST_MAX   1000000 /* Max number of objects to cache */ 
  92 #define REDIS_MAX_SYNC_TIME     60      /* Slave can't take more to sync */ 
  93 #define REDIS_EXPIRELOOKUPS_PER_CRON    100 /* try to expire 100 keys/second */ 
  94 #define REDIS_MAX_WRITE_PER_EVENT (1024*64) 
  95 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */ 
  97 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */ 
  98 #define REDIS_WRITEV_THRESHOLD      3 
  99 /* Max number of iovecs used for each writev call */ 
 100 #define REDIS_WRITEV_IOVEC_COUNT    256 
 102 /* Hash table parameters */ 
 103 #define REDIS_HT_MINFILL        10      /* Minimal hash table fill 10% */ 
 106 #define REDIS_CMD_BULK          1       /* Bulk write command */ 
 107 #define REDIS_CMD_INLINE        2       /* Inline command */ 
 108 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with 
 109    this flags will return an error when the 'maxmemory' option is set in the 
 110    config file and the server is using more than maxmemory bytes of memory. 
 111    In short this commands are denied on low memory conditions. */ 
 112 #define REDIS_CMD_DENYOOM       4 
 115 #define REDIS_STRING 0 
 121 /* Objects encoding */ 
 122 #define REDIS_ENCODING_RAW 0    /* Raw representation */ 
 123 #define REDIS_ENCODING_INT 1    /* Encoded as integer */ 
 125 /* Object types only used for dumping to disk */ 
 126 #define REDIS_EXPIRETIME 253 
 127 #define REDIS_SELECTDB 254 
 128 #define REDIS_EOF 255 
 130 /* Defines related to the dump file format. To store 32 bits lengths for short 
 131  * keys requires a lot of space, so we check the most significant 2 bits of 
 132  * the first byte to interpreter the length: 
 134  * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte 
 135  * 01|000000 00000000 =>  01, the len is 14 byes, 6 bits + 8 bits of next byte 
 136  * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow 
 137  * 11|000000 this means: specially encoded object will follow. The six bits 
 138  *           number specify the kind of object that follows. 
 139  *           See the REDIS_RDB_ENC_* defines. 
 141  * Lenghts up to 63 are stored using a single byte, most DB keys, and may 
 142  * values, will fit inside. */ 
 143 #define REDIS_RDB_6BITLEN 0 
 144 #define REDIS_RDB_14BITLEN 1 
 145 #define REDIS_RDB_32BITLEN 2 
 146 #define REDIS_RDB_ENCVAL 3 
 147 #define REDIS_RDB_LENERR UINT_MAX 
 149 /* When a length of a string object stored on disk has the first two bits 
 150  * set, the remaining two bits specify a special encoding for the object 
 151  * accordingly to the following defines: */ 
 152 #define REDIS_RDB_ENC_INT8 0        /* 8 bit signed integer */ 
 153 #define REDIS_RDB_ENC_INT16 1       /* 16 bit signed integer */ 
 154 #define REDIS_RDB_ENC_INT32 2       /* 32 bit signed integer */ 
 155 #define REDIS_RDB_ENC_LZF 3         /* string compressed with FASTLZ */ 
 157 /* Virtual memory object->where field. */ 
 158 #define REDIS_VM_MEMORY 0       /* The object is on memory */ 
 159 #define REDIS_VM_SWAPPED 1      /* The object is on disk */ 
 160 #define REDIS_VM_SWAPPING 2     /* Redis is swapping this object on disk */ 
 161 #define REDIS_VM_LOADING 3      /* Redis is loading this object from disk */ 
 163 /* Virtual memory static configuration stuff. 
 164  * Check vmFindContiguousPages() to know more about this magic numbers. */ 
 165 #define REDIS_VM_MAX_NEAR_PAGES 65536 
 166 #define REDIS_VM_MAX_RANDOM_JUMP 4096 
 167 #define REDIS_VM_MAX_THREADS 32 
 168 #define REDIS_THREAD_STACK_SIZE (1024*1024*4) 
 169 /* The following is the *percentage* of completed I/O jobs to process when the 
 170  * handelr is called. While Virtual Memory I/O operations are performed by 
 171  * threads, this operations must be processed by the main thread when completed 
 172  * in order to take effect. */ 
 173 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1 
 176 #define REDIS_SLAVE 1       /* This client is a slave server */ 
 177 #define REDIS_MASTER 2      /* This client is a master server */ 
 178 #define REDIS_MONITOR 4     /* This client is a slave monitor, see MONITOR */ 
 179 #define REDIS_MULTI 8       /* This client is in a MULTI context */ 
 180 #define REDIS_BLOCKED 16    /* The client is waiting in a blocking operation */ 
 181 #define REDIS_IO_WAIT 32    /* The client is waiting for Virtual Memory I/O */ 
 183 /* Slave replication state - slave side */ 
 184 #define REDIS_REPL_NONE 0   /* No active replication */ 
 185 #define REDIS_REPL_CONNECT 1    /* Must connect to master */ 
 186 #define REDIS_REPL_CONNECTED 2  /* Connected to master */ 
 188 /* Slave replication state - from the point of view of master 
 189  * Note that in SEND_BULK and ONLINE state the slave receives new updates 
 190  * in its output queue. In the WAIT_BGSAVE state instead the server is waiting 
 191  * to start the next background saving in order to send updates to it. */ 
 192 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */ 
 193 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */ 
 194 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */ 
 195 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */ 
 197 /* List related stuff */ 
 201 /* Sort operations */ 
 202 #define REDIS_SORT_GET 0 
 203 #define REDIS_SORT_ASC 1 
 204 #define REDIS_SORT_DESC 2 
 205 #define REDIS_SORTKEY_MAX 1024 
 208 #define REDIS_DEBUG 0 
 209 #define REDIS_VERBOSE 1 
 210 #define REDIS_NOTICE 2 
 211 #define REDIS_WARNING 3 
 213 /* Anti-warning macro... */ 
 214 #define REDIS_NOTUSED(V) ((void) V) 
 216 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */ 
 217 #define ZSKIPLIST_P 0.25      /* Skiplist P = 1/4 */ 
 219 /* Append only defines */ 
 220 #define APPENDFSYNC_NO 0 
 221 #define APPENDFSYNC_ALWAYS 1 
 222 #define APPENDFSYNC_EVERYSEC 2 
 224 /* We can print the stacktrace, so our assert is defined this way: */ 
 225 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1))) 
 226 static void _redisAssert(char *estr
, char *file
, int line
); 
 228 /*================================= Data types ============================== */ 
 230 /* A redis object, that is a type able to hold a string / list / set */ 
 232 /* The VM object structure */ 
 233 struct redisObjectVM 
{ 
 234     off_t page
;         /* the page at witch the object is stored on disk */ 
 235     off_t usedpages
;    /* number of pages used on disk */ 
 236     time_t atime
;       /* Last access time */ 
 239 /* The actual Redis Object */ 
 240 typedef struct redisObject 
{ 
 243     unsigned char encoding
; 
 244     unsigned char storage
;  /* If this object is a key, where is the value? 
 245                              * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */ 
 246     unsigned char vtype
; /* If this object is a key, and value is swapped out, 
 247                           * this is the type of the swapped out object. */ 
 249     /* VM fields, this are only allocated if VM is active, otherwise the 
 250      * object allocation function will just allocate 
 251      * sizeof(redisObjct) minus sizeof(redisObjectVM), so using 
 252      * Redis without VM active will not have any overhead. */ 
 253     struct redisObjectVM vm
; 
 256 /* Macro used to initalize a Redis object allocated on the stack. 
 257  * Note that this macro is taken near the structure definition to make sure 
 258  * we'll update it when the structure is changed, to avoid bugs like 
 259  * bug #85 introduced exactly in this way. */ 
 260 #define initStaticStringObject(_var,_ptr) do { \ 
 262     _var.type = REDIS_STRING; \ 
 263     _var.encoding = REDIS_ENCODING_RAW; \ 
 265     if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \ 
 268 typedef struct redisDb 
{ 
 269     dict 
*dict
;                 /* The keyspace for this DB */ 
 270     dict 
*expires
;              /* Timeout of keys with a timeout set */ 
 271     dict 
*blockingkeys
;         /* Keys with clients waiting for data (BLPOP) */ 
 272     dict 
*io_keys
;              /* Keys with clients waiting for VM I/O */ 
 276 /* Client MULTI/EXEC state */ 
 277 typedef struct multiCmd 
{ 
 280     struct redisCommand 
*cmd
; 
 283 typedef struct multiState 
{ 
 284     multiCmd 
*commands
;     /* Array of MULTI commands */ 
 285     int count
;              /* Total number of MULTI commands */ 
 288 /* With multiplexing we need to take per-clinet state. 
 289  * Clients are taken in a liked list. */ 
 290 typedef struct redisClient 
{ 
 295     robj 
**argv
, **mbargv
; 
 297     int bulklen
;            /* bulk read len. -1 if not in bulk read mode */ 
 298     int multibulk
;          /* multi bulk command format active */ 
 301     time_t lastinteraction
; /* time of the last interaction, used for timeout */ 
 302     int flags
;              /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */ 
 303     int slaveseldb
;         /* slave selected db, if this client is a slave */ 
 304     int authenticated
;      /* when requirepass is non-NULL */ 
 305     int replstate
;          /* replication state if this is a slave */ 
 306     int repldbfd
;           /* replication DB file descriptor */ 
 307     long repldboff
;         /* replication DB file offset */ 
 308     off_t repldbsize
;       /* replication DB file size */ 
 309     multiState mstate
;      /* MULTI/EXEC state */ 
 310     robj 
**blockingkeys
;    /* The key we are waiting to terminate a blocking 
 311                              * operation such as BLPOP. Otherwise NULL. */ 
 312     int blockingkeysnum
;    /* Number of blocking keys */ 
 313     time_t blockingto
;      /* Blocking operation timeout. If UNIX current time 
 314                              * is >= blockingto then the operation timed out. */ 
 315     list 
*io_keys
;          /* Keys this client is waiting to be loaded from the 
 316                              * swap file in order to continue. */ 
 324 /* Global server state structure */ 
 329     dict 
*sharingpool
;          /* Poll used for object sharing */ 
 330     unsigned int sharingpoolsize
; 
 331     long long dirty
;            /* changes to DB from the last save */ 
 333     list 
*slaves
, *monitors
; 
 334     char neterr
[ANET_ERR_LEN
]; 
 336     int cronloops
;              /* number of times the cron function run */ 
 337     list 
*objfreelist
;          /* A list of freed objects to avoid malloc() */ 
 338     time_t lastsave
;            /* Unix time of last save succeeede */ 
 339     /* Fields used only for stats */ 
 340     time_t stat_starttime
;         /* server start time */ 
 341     long long stat_numcommands
;    /* number of processed commands */ 
 342     long long stat_numconnections
; /* number of connections received */ 
 355     pid_t bgsavechildpid
; 
 356     pid_t bgrewritechildpid
; 
 357     sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */ 
 358     struct saveparam 
*saveparams
; 
 363     char *appendfilename
; 
 367     /* Replication related */ 
 372     redisClient 
*master
;    /* client that is master for this slave */ 
 374     unsigned int maxclients
; 
 375     unsigned long long maxmemory
; 
 376     unsigned int blpop_blocked_clients
; 
 377     unsigned int vm_blocked_clients
; 
 378     /* Sort parameters - qsort_r() is only available under BSD so we 
 379      * have to take this state global, in order to pass it to sortCompare() */ 
 383     /* Virtual memory configuration */ 
 388     unsigned long long vm_max_memory
; 
 389     /* Virtual memory state */ 
 392     off_t vm_next_page
; /* Next probably empty page */ 
 393     off_t vm_near_pages
; /* Number of pages allocated sequentially */ 
 394     unsigned char *vm_bitmap
; /* Bitmap of free/used pages */ 
 395     time_t unixtime
;    /* Unix time sampled every second. */ 
 396     /* Virtual memory I/O threads stuff */ 
 397     /* An I/O thread process an element taken from the io_jobs queue and 
 398      * put the result of the operation in the io_done list. While the 
 399      * job is being processed, it's put on io_processing queue. */ 
 400     list 
*io_newjobs
; /* List of VM I/O jobs yet to be processed */ 
 401     list 
*io_processing
; /* List of VM I/O jobs being processed */ 
 402     list 
*io_processed
; /* List of VM I/O jobs already processed */ 
 403     list 
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */ 
 404     pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */ 
 405     pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */ 
 406     pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */ 
 407     pthread_attr_t io_threads_attr
; /* attributes for threads creation */ 
 408     int io_active_threads
; /* Number of running I/O threads */ 
 409     int vm_max_threads
; /* Max number of I/O threads running at the same time */ 
 410     /* Our main thread is blocked on the event loop, locking for sockets ready 
 411      * to be read or written, so when a threaded I/O operation is ready to be 
 412      * processed by the main thread, the I/O thread will use a unix pipe to 
 413      * awake the main thread. The followings are the two pipe FDs. */ 
 414     int io_ready_pipe_read
; 
 415     int io_ready_pipe_write
; 
 416     /* Virtual memory stats */ 
 417     unsigned long long vm_stats_used_pages
; 
 418     unsigned long long vm_stats_swapped_objects
; 
 419     unsigned long long vm_stats_swapouts
; 
 420     unsigned long long vm_stats_swapins
; 
 424 typedef void redisCommandProc(redisClient 
*c
); 
 425 struct redisCommand 
{ 
 427     redisCommandProc 
*proc
; 
 430     /* What keys should be loaded in background when calling this command? */ 
 431     int vm_firstkey
; /* The first argument that's a key (0 = no keys) */ 
 432     int vm_lastkey
;  /* THe last argument that's a key */ 
 433     int vm_keystep
;  /* The step between first and last key */ 
 436 struct redisFunctionSym 
{ 
 438     unsigned long pointer
; 
 441 typedef struct _redisSortObject 
{ 
 449 typedef struct _redisSortOperation 
{ 
 452 } redisSortOperation
; 
 454 /* ZSETs use a specialized version of Skiplists */ 
 456 typedef struct zskiplistNode 
{ 
 457     struct zskiplistNode 
**forward
; 
 458     struct zskiplistNode 
*backward
; 
 463 typedef struct zskiplist 
{ 
 464     struct zskiplistNode 
*header
, *tail
; 
 465     unsigned long length
; 
 469 typedef struct zset 
{ 
 474 /* Our shared "common" objects */ 
 476 struct sharedObjectsStruct 
{ 
 477     robj 
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
, 
 478     *colon
, *nullbulk
, *nullmultibulk
, *queued
, 
 479     *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
, 
 480     *outofrangeerr
, *plus
, 
 481     *select0
, *select1
, *select2
, *select3
, *select4
, 
 482     *select5
, *select6
, *select7
, *select8
, *select9
; 
 485 /* Global vars that are actally used as constants. The following double 
 486  * values are used for double on-disk serialization, and are initialized 
 487  * at runtime to avoid strange compiler optimizations. */ 
 489 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
; 
 491 /* VM threaded I/O request message */ 
 492 #define REDIS_IOJOB_LOAD 0          /* Load from disk to memory */ 
 493 #define REDIS_IOJOB_PREPARE_SWAP 1  /* Compute needed pages */ 
 494 #define REDIS_IOJOB_DO_SWAP 2       /* Swap from memory to disk */ 
 495 typedef struct iojob 
{ 
 496     int type
;   /* Request type, REDIS_IOJOB_* */ 
 497     redisDb 
*db
;/* Redis database */ 
 498     robj 
*key
;  /* This I/O request is about swapping this key */ 
 499     robj 
*val
;  /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this 
 500                  * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */ 
 501     off_t page
; /* Swap page where to read/write the object */ 
 502     off_t pages
; /* Swap pages needed to safe object. PREPARE_SWAP return val */ 
 503     int canceled
; /* True if this command was canceled by blocking side of VM */ 
 504     pthread_t thread
; /* ID of the thread processing this entry */ 
 507 /*================================ Prototypes =============================== */ 
 509 static void freeStringObject(robj 
*o
); 
 510 static void freeListObject(robj 
*o
); 
 511 static void freeSetObject(robj 
*o
); 
 512 static void decrRefCount(void *o
); 
 513 static robj 
*createObject(int type
, void *ptr
); 
 514 static void freeClient(redisClient 
*c
); 
 515 static int rdbLoad(char *filename
); 
 516 static void addReply(redisClient 
*c
, robj 
*obj
); 
 517 static void addReplySds(redisClient 
*c
, sds s
); 
 518 static void incrRefCount(robj 
*o
); 
 519 static int rdbSaveBackground(char *filename
); 
 520 static robj 
*createStringObject(char *ptr
, size_t len
); 
 521 static robj 
*dupStringObject(robj 
*o
); 
 522 static void replicationFeedSlaves(list 
*slaves
, struct redisCommand 
*cmd
, int dictid
, robj 
**argv
, int argc
); 
 523 static void feedAppendOnlyFile(struct redisCommand 
*cmd
, int dictid
, robj 
**argv
, int argc
); 
 524 static int syncWithMaster(void); 
 525 static robj 
*tryObjectSharing(robj 
*o
); 
 526 static int tryObjectEncoding(robj 
*o
); 
 527 static robj 
*getDecodedObject(robj 
*o
); 
 528 static int removeExpire(redisDb 
*db
, robj 
*key
); 
 529 static int expireIfNeeded(redisDb 
*db
, robj 
*key
); 
 530 static int deleteIfVolatile(redisDb 
*db
, robj 
*key
); 
 531 static int deleteIfSwapped(redisDb 
*db
, robj 
*key
); 
 532 static int deleteKey(redisDb 
*db
, robj 
*key
); 
 533 static time_t getExpire(redisDb 
*db
, robj 
*key
); 
 534 static int setExpire(redisDb 
*db
, robj 
*key
, time_t when
); 
 535 static void updateSlavesWaitingBgsave(int bgsaveerr
); 
 536 static void freeMemoryIfNeeded(void); 
 537 static int processCommand(redisClient 
*c
); 
 538 static void setupSigSegvAction(void); 
 539 static void rdbRemoveTempFile(pid_t childpid
); 
 540 static void aofRemoveTempFile(pid_t childpid
); 
 541 static size_t stringObjectLen(robj 
*o
); 
 542 static void processInputBuffer(redisClient 
*c
); 
 543 static zskiplist 
*zslCreate(void); 
 544 static void zslFree(zskiplist 
*zsl
); 
 545 static void zslInsert(zskiplist 
*zsl
, double score
, robj 
*obj
); 
 546 static void sendReplyToClientWritev(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 547 static void initClientMultiState(redisClient 
*c
); 
 548 static void freeClientMultiState(redisClient 
*c
); 
 549 static void queueMultiCommand(redisClient 
*c
, struct redisCommand 
*cmd
); 
 550 static void unblockClientWaitingData(redisClient 
*c
); 
 551 static int handleClientsWaitingListPush(redisClient 
*c
, robj 
*key
, robj 
*ele
); 
 552 static void vmInit(void); 
 553 static void vmMarkPagesFree(off_t page
, off_t count
); 
 554 static robj 
*vmLoadObject(robj 
*key
); 
 555 static robj 
*vmPreviewObject(robj 
*key
); 
 556 static int vmSwapOneObjectBlocking(void); 
 557 static int vmSwapOneObjectThreaded(void); 
 558 static int vmCanSwapOut(void); 
 559 static int tryFreeOneObjectFromFreelist(void); 
 560 static void acceptHandler(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 561 static void vmThreadedIOCompletedJob(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 562 static void vmCancelThreadedIOJob(robj 
*o
); 
 563 static void lockThreadedIO(void); 
 564 static void unlockThreadedIO(void); 
 565 static int vmSwapObjectThreaded(robj 
*key
, robj 
*val
, redisDb 
*db
); 
 566 static void freeIOJob(iojob 
*j
); 
 567 static void queueIOJob(iojob 
*j
); 
 568 static int vmWriteObjectOnSwap(robj 
*o
, off_t page
); 
 569 static robj 
*vmReadObjectFromSwap(off_t page
, int type
); 
 570 static void waitEmptyIOJobsQueue(void); 
 571 static void vmReopenSwapFile(void); 
 572 static int vmFreePage(off_t page
); 
 573 static int blockClientOnSwappedKeys(struct redisCommand 
*cmd
, redisClient 
*c
); 
 574 static int dontWaitForSwappedKey(redisClient 
*c
, robj 
*key
); 
 575 static void handleClientsBlockedOnSwappedKey(redisDb 
*db
, robj 
*key
); 
 576 static void readQueryFromClient(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 577 static struct redisCommand 
*lookupCommand(char *name
); 
 578 static void call(redisClient 
*c
, struct redisCommand 
*cmd
); 
 579 static void resetClient(redisClient 
*c
); 
 581 static void authCommand(redisClient 
*c
); 
 582 static void pingCommand(redisClient 
*c
); 
 583 static void echoCommand(redisClient 
*c
); 
 584 static void setCommand(redisClient 
*c
); 
 585 static void setnxCommand(redisClient 
*c
); 
 586 static void getCommand(redisClient 
*c
); 
 587 static void delCommand(redisClient 
*c
); 
 588 static void existsCommand(redisClient 
*c
); 
 589 static void incrCommand(redisClient 
*c
); 
 590 static void decrCommand(redisClient 
*c
); 
 591 static void incrbyCommand(redisClient 
*c
); 
 592 static void decrbyCommand(redisClient 
*c
); 
 593 static void selectCommand(redisClient 
*c
); 
 594 static void randomkeyCommand(redisClient 
*c
); 
 595 static void keysCommand(redisClient 
*c
); 
 596 static void dbsizeCommand(redisClient 
*c
); 
 597 static void lastsaveCommand(redisClient 
*c
); 
 598 static void saveCommand(redisClient 
*c
); 
 599 static void bgsaveCommand(redisClient 
*c
); 
 600 static void bgrewriteaofCommand(redisClient 
*c
); 
 601 static void shutdownCommand(redisClient 
*c
); 
 602 static void moveCommand(redisClient 
*c
); 
 603 static void renameCommand(redisClient 
*c
); 
 604 static void renamenxCommand(redisClient 
*c
); 
 605 static void lpushCommand(redisClient 
*c
); 
 606 static void rpushCommand(redisClient 
*c
); 
 607 static void lpopCommand(redisClient 
*c
); 
 608 static void rpopCommand(redisClient 
*c
); 
 609 static void llenCommand(redisClient 
*c
); 
 610 static void lindexCommand(redisClient 
*c
); 
 611 static void lrangeCommand(redisClient 
*c
); 
 612 static void ltrimCommand(redisClient 
*c
); 
 613 static void typeCommand(redisClient 
*c
); 
 614 static void lsetCommand(redisClient 
*c
); 
 615 static void saddCommand(redisClient 
*c
); 
 616 static void sremCommand(redisClient 
*c
); 
 617 static void smoveCommand(redisClient 
*c
); 
 618 static void sismemberCommand(redisClient 
*c
); 
 619 static void scardCommand(redisClient 
*c
); 
 620 static void spopCommand(redisClient 
*c
); 
 621 static void srandmemberCommand(redisClient 
*c
); 
 622 static void sinterCommand(redisClient 
*c
); 
 623 static void sinterstoreCommand(redisClient 
*c
); 
 624 static void sunionCommand(redisClient 
*c
); 
 625 static void sunionstoreCommand(redisClient 
*c
); 
 626 static void sdiffCommand(redisClient 
*c
); 
 627 static void sdiffstoreCommand(redisClient 
*c
); 
 628 static void syncCommand(redisClient 
*c
); 
 629 static void flushdbCommand(redisClient 
*c
); 
 630 static void flushallCommand(redisClient 
*c
); 
 631 static void sortCommand(redisClient 
*c
); 
 632 static void lremCommand(redisClient 
*c
); 
 633 static void rpoplpushcommand(redisClient 
*c
); 
 634 static void infoCommand(redisClient 
*c
); 
 635 static void mgetCommand(redisClient 
*c
); 
 636 static void monitorCommand(redisClient 
*c
); 
 637 static void expireCommand(redisClient 
*c
); 
 638 static void expireatCommand(redisClient 
*c
); 
 639 static void getsetCommand(redisClient 
*c
); 
 640 static void ttlCommand(redisClient 
*c
); 
 641 static void slaveofCommand(redisClient 
*c
); 
 642 static void debugCommand(redisClient 
*c
); 
 643 static void msetCommand(redisClient 
*c
); 
 644 static void msetnxCommand(redisClient 
*c
); 
 645 static void zaddCommand(redisClient 
*c
); 
 646 static void zincrbyCommand(redisClient 
*c
); 
 647 static void zrangeCommand(redisClient 
*c
); 
 648 static void zrangebyscoreCommand(redisClient 
*c
); 
 649 static void zcountCommand(redisClient 
*c
); 
 650 static void zrevrangeCommand(redisClient 
*c
); 
 651 static void zcardCommand(redisClient 
*c
); 
 652 static void zremCommand(redisClient 
*c
); 
 653 static void zscoreCommand(redisClient 
*c
); 
 654 static void zremrangebyscoreCommand(redisClient 
*c
); 
 655 static void multiCommand(redisClient 
*c
); 
 656 static void execCommand(redisClient 
*c
); 
 657 static void discardCommand(redisClient 
*c
); 
 658 static void blpopCommand(redisClient 
*c
); 
 659 static void brpopCommand(redisClient 
*c
); 
 660 static void appendCommand(redisClient 
*c
); 
 662 /*================================= Globals ================================= */ 
 665 static struct redisServer server
; /* server global state */ 
 666 static struct redisCommand cmdTable
[] = { 
 667     {"get",getCommand
,2,REDIS_CMD_INLINE
,1,1,1}, 
 668     {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,0,0,0}, 
 669     {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,0,0,0}, 
 670     {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1}, 
 671     {"del",delCommand
,-2,REDIS_CMD_INLINE
,0,0,0}, 
 672     {"exists",existsCommand
,2,REDIS_CMD_INLINE
,1,1,1}, 
 673     {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,1,1}, 
 674     {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,1,1}, 
 675     {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,1,-1,1}, 
 676     {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1}, 
 677     {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1}, 
 678     {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,1,1,1}, 
 679     {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,1,1,1}, 
 680     {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,1,1,1}, 
 681     {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,1,1,1}, 
 682     {"llen",llenCommand
,2,REDIS_CMD_INLINE
,1,1,1}, 
 683     {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,1,1,1}, 
 684     {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1}, 
 685     {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,1,1,1}, 
 686     {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,1,1,1}, 
 687     {"lrem",lremCommand
,4,REDIS_CMD_BULK
,1,1,1}, 
 688     {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,2,1}, 
 689     {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1}, 
 690     {"srem",sremCommand
,3,REDIS_CMD_BULK
,1,1,1}, 
 691     {"smove",smoveCommand
,4,REDIS_CMD_BULK
,1,2,1}, 
 692     {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,1,1,1}, 
 693     {"scard",scardCommand
,2,REDIS_CMD_INLINE
,1,1,1}, 
 694     {"spop",spopCommand
,2,REDIS_CMD_INLINE
,1,1,1}, 
 695     {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,1,1,1}, 
 696     {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,-1,1}, 
 697     {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,2,-1,1}, 
 698     {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,-1,1}, 
 699     {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,2,-1,1}, 
 700     {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,-1,1}, 
 701     {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,2,-1,1}, 
 702     {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,1,1,1}, 
 703     {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1}, 
 704     {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1}, 
 705     {"zrem",zremCommand
,3,REDIS_CMD_BULK
,1,1,1}, 
 706     {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,1,1,1}, 
 707     {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,1,1,1}, 
 708     {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,1,1,1}, 
 709     {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,1,1,1}, 
 710     {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,1,1,1}, 
 711     {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,1,1,1}, 
 712     {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1}, 
 713     {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,1,1}, 
 714     {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,1,1}, 
 715     {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1}, 
 716     {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,-1,2}, 
 717     {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,-1,2}, 
 718     {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,0,0,0}, 
 719     {"select",selectCommand
,2,REDIS_CMD_INLINE
,0,0,0}, 
 720     {"move",moveCommand
,3,REDIS_CMD_INLINE
,1,1,1}, 
 721     {"rename",renameCommand
,3,REDIS_CMD_INLINE
,1,1,1}, 
 722     {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,1,1,1}, 
 723     {"expire",expireCommand
,3,REDIS_CMD_INLINE
,0,0,0}, 
 724     {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,0,0,0}, 
 725     {"keys",keysCommand
,2,REDIS_CMD_INLINE
,0,0,0}, 
 726     {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,0,0,0}, 
 727     {"auth",authCommand
,2,REDIS_CMD_INLINE
,0,0,0}, 
 728     {"ping",pingCommand
,1,REDIS_CMD_INLINE
,0,0,0}, 
 729     {"echo",echoCommand
,2,REDIS_CMD_BULK
,0,0,0}, 
 730     {"save",saveCommand
,1,REDIS_CMD_INLINE
,0,0,0}, 
 731     {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,0,0,0}, 
 732     {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,0,0,0}, 
 733     {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,0,0,0}, 
 734     {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,0,0,0}, 
 735     {"type",typeCommand
,2,REDIS_CMD_INLINE
,1,1,1}, 
 736     {"multi",multiCommand
,1,REDIS_CMD_INLINE
,0,0,0}, 
 737     {"exec",execCommand
,1,REDIS_CMD_INLINE
,0,0,0}, 
 738     {"discard",discardCommand
,1,REDIS_CMD_INLINE
,0,0,0}, 
 739     {"sync",syncCommand
,1,REDIS_CMD_INLINE
,0,0,0}, 
 740     {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,0,0,0}, 
 741     {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,0,0,0}, 
 742     {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,1,1}, 
 743     {"info",infoCommand
,1,REDIS_CMD_INLINE
,0,0,0}, 
 744     {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,0,0,0}, 
 745     {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,1,1,1}, 
 746     {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,0,0,0}, 
 747     {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,0,0,0}, 
 748     {NULL
,NULL
,0,0,0,0,0} 
 751 /*============================ Utility functions ============================ */ 
 753 /* Glob-style pattern matching. */ 
 754 int stringmatchlen(const char *pattern
, int patternLen
, 
 755         const char *string
, int stringLen
, int nocase
) 
 760             while (pattern
[1] == '*') { 
 765                 return 1; /* match */ 
 767                 if (stringmatchlen(pattern
+1, patternLen
-1, 
 768                             string
, stringLen
, nocase
)) 
 769                     return 1; /* match */ 
 773             return 0; /* no match */ 
 777                 return 0; /* no match */ 
 787             not = pattern
[0] == '^'; 
 794                 if (pattern
[0] == '\\') { 
 797                     if (pattern
[0] == string
[0]) 
 799                 } else if (pattern
[0] == ']') { 
 801                 } else if (patternLen 
== 0) { 
 805                 } else if (pattern
[1] == '-' && patternLen 
>= 3) { 
 806                     int start 
= pattern
[0]; 
 807                     int end 
= pattern
[2]; 
 815                         start 
= tolower(start
); 
 821                     if (c 
>= start 
&& c 
<= end
) 
 825                         if (pattern
[0] == string
[0]) 
 828                         if (tolower((int)pattern
[0]) == tolower((int)string
[0])) 
 838                 return 0; /* no match */ 
 844             if (patternLen 
>= 2) { 
 851                 if (pattern
[0] != string
[0]) 
 852                     return 0; /* no match */ 
 854                 if (tolower((int)pattern
[0]) != tolower((int)string
[0])) 
 855                     return 0; /* no match */ 
 863         if (stringLen 
== 0) { 
 864             while(*pattern 
== '*') { 
 871     if (patternLen 
== 0 && stringLen 
== 0) 
 876 static void redisLog(int level
, const char *fmt
, ...) { 
 880     fp 
= (server
.logfile 
== NULL
) ? stdout 
: fopen(server
.logfile
,"a"); 
 884     if (level 
>= server
.verbosity
) { 
 890         strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
)); 
 891         fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]); 
 892         vfprintf(fp
, fmt
, ap
); 
 898     if (server
.logfile
) fclose(fp
); 
 901 /*====================== Hash table type implementation  ==================== */ 
 903 /* This is an hash table type that uses the SDS dynamic strings libary as 
 904  * keys and radis objects as values (objects can hold SDS strings, 
 907 static void dictVanillaFree(void *privdata
, void *val
) 
 909     DICT_NOTUSED(privdata
); 
 913 static void dictListDestructor(void *privdata
, void *val
) 
 915     DICT_NOTUSED(privdata
); 
 916     listRelease((list
*)val
); 
 919 static int sdsDictKeyCompare(void *privdata
, const void *key1
, 
 923     DICT_NOTUSED(privdata
); 
 925     l1 
= sdslen((sds
)key1
); 
 926     l2 
= sdslen((sds
)key2
); 
 927     if (l1 
!= l2
) return 0; 
 928     return memcmp(key1
, key2
, l1
) == 0; 
 931 static void dictRedisObjectDestructor(void *privdata
, void *val
) 
 933     DICT_NOTUSED(privdata
); 
 935     if (val 
== NULL
) return; /* Values of swapped out keys as set to NULL */ 
 939 static int dictObjKeyCompare(void *privdata
, const void *key1
, 
 942     const robj 
*o1 
= key1
, *o2 
= key2
; 
 943     return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
); 
 946 static unsigned int dictObjHash(const void *key
) { 
 948     return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
)); 
 951 static int dictEncObjKeyCompare(void *privdata
, const void *key1
, 
 954     robj 
*o1 
= (robj
*) key1
, *o2 
= (robj
*) key2
; 
 957     o1 
= getDecodedObject(o1
); 
 958     o2 
= getDecodedObject(o2
); 
 959     cmp 
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
); 
 965 static unsigned int dictEncObjHash(const void *key
) { 
 966     robj 
*o 
= (robj
*) key
; 
 968     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
 969         return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
)); 
 971         if (o
->encoding 
== REDIS_ENCODING_INT
) { 
 975             len 
= snprintf(buf
,32,"%ld",(long)o
->ptr
); 
 976             return dictGenHashFunction((unsigned char*)buf
, len
); 
 980             o 
= getDecodedObject(o
); 
 981             hash 
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
)); 
 988 /* Sets type and expires */ 
 989 static dictType setDictType 
= { 
 990     dictEncObjHash
,            /* hash function */ 
 993     dictEncObjKeyCompare
,      /* key compare */ 
 994     dictRedisObjectDestructor
, /* key destructor */ 
 995     NULL                       
/* val destructor */ 
 998 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */ 
 999 static dictType zsetDictType 
= { 
1000     dictEncObjHash
,            /* hash function */ 
1003     dictEncObjKeyCompare
,      /* key compare */ 
1004     dictRedisObjectDestructor
, /* key destructor */ 
1005     dictVanillaFree            
/* val destructor of malloc(sizeof(double)) */ 
1009 static dictType hashDictType 
= { 
1010     dictObjHash
,                /* hash function */ 
1013     dictObjKeyCompare
,          /* key compare */ 
1014     dictRedisObjectDestructor
,  /* key destructor */ 
1015     dictRedisObjectDestructor   
/* val destructor */ 
1019 static dictType keyptrDictType 
= { 
1020     dictObjHash
,               /* hash function */ 
1023     dictObjKeyCompare
,         /* key compare */ 
1024     dictRedisObjectDestructor
, /* key destructor */ 
1025     NULL                       
/* val destructor */ 
1028 /* Keylist hash table type has unencoded redis objects as keys and 
1029  * lists as values. It's used for blocking operations (BLPOP) and to 
1030  * map swapped keys to a list of clients waiting for this keys to be loaded. */ 
1031 static dictType keylistDictType 
= { 
1032     dictObjHash
,                /* hash function */ 
1035     dictObjKeyCompare
,          /* key compare */ 
1036     dictRedisObjectDestructor
,  /* key destructor */ 
1037     dictListDestructor          
/* val destructor */ 
1040 /* ========================= Random utility functions ======================= */ 
1042 /* Redis generally does not try to recover from out of memory conditions 
1043  * when allocating objects or strings, it is not clear if it will be possible 
1044  * to report this condition to the client since the networking layer itself 
1045  * is based on heap allocation for send buffers, so we simply abort. 
1046  * At least the code will be simpler to read... */ 
1047 static void oom(const char *msg
) { 
1048     redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
); 
1053 /* ====================== Redis server networking stuff ===================== */ 
1054 static void closeTimedoutClients(void) { 
1057     time_t now 
= time(NULL
); 
1060     listRewind(server
.clients
,&li
); 
1061     while ((ln 
= listNext(&li
)) != NULL
) { 
1062         c 
= listNodeValue(ln
); 
1063         if (server
.maxidletime 
&& 
1064             !(c
->flags 
& REDIS_SLAVE
) &&    /* no timeout for slaves */ 
1065             !(c
->flags 
& REDIS_MASTER
) &&   /* no timeout for masters */ 
1066              (now 
- c
->lastinteraction 
> server
.maxidletime
)) 
1068             redisLog(REDIS_VERBOSE
,"Closing idle client"); 
1070         } else if (c
->flags 
& REDIS_BLOCKED
) { 
1071             if (c
->blockingto 
!= 0 && c
->blockingto 
< now
) { 
1072                 addReply(c
,shared
.nullmultibulk
); 
1073                 unblockClientWaitingData(c
); 
1079 static int htNeedsResize(dict 
*dict
) { 
1080     long long size
, used
; 
1082     size 
= dictSlots(dict
); 
1083     used 
= dictSize(dict
); 
1084     return (size 
&& used 
&& size 
> DICT_HT_INITIAL_SIZE 
&& 
1085             (used
*100/size 
< REDIS_HT_MINFILL
)); 
1088 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL 
1089  * we resize the hash table to save memory */ 
1090 static void tryResizeHashTables(void) { 
1093     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1094         if (htNeedsResize(server
.db
[j
].dict
)) { 
1095             redisLog(REDIS_VERBOSE
,"The hash table %d is too sparse, resize it...",j
); 
1096             dictResize(server
.db
[j
].dict
); 
1097             redisLog(REDIS_VERBOSE
,"Hash table %d resized.",j
); 
1099         if (htNeedsResize(server
.db
[j
].expires
)) 
1100             dictResize(server
.db
[j
].expires
); 
1104 /* A background saving child (BGSAVE) terminated its work. Handle this. */ 
1105 void backgroundSaveDoneHandler(int statloc
) { 
1106     int exitcode 
= WEXITSTATUS(statloc
); 
1107     int bysignal 
= WIFSIGNALED(statloc
); 
1109     if (!bysignal 
&& exitcode 
== 0) { 
1110         redisLog(REDIS_NOTICE
, 
1111             "Background saving terminated with success"); 
1113         server
.lastsave 
= time(NULL
); 
1114     } else if (!bysignal 
&& exitcode 
!= 0) { 
1115         redisLog(REDIS_WARNING
, "Background saving error"); 
1117         redisLog(REDIS_WARNING
, 
1118             "Background saving terminated by signal"); 
1119         rdbRemoveTempFile(server
.bgsavechildpid
); 
1121     server
.bgsavechildpid 
= -1; 
1122     /* Possibly there are slaves waiting for a BGSAVE in order to be served 
1123      * (the first stage of SYNC is a bulk transfer of dump.rdb) */ 
1124     updateSlavesWaitingBgsave(exitcode 
== 0 ? REDIS_OK 
: REDIS_ERR
); 
1127 /* A background append only file rewriting (BGREWRITEAOF) terminated its work. 
1129 void backgroundRewriteDoneHandler(int statloc
) { 
1130     int exitcode 
= WEXITSTATUS(statloc
); 
1131     int bysignal 
= WIFSIGNALED(statloc
); 
1133     if (!bysignal 
&& exitcode 
== 0) { 
1137         redisLog(REDIS_NOTICE
, 
1138             "Background append only file rewriting terminated with success"); 
1139         /* Now it's time to flush the differences accumulated by the parent */ 
1140         snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
); 
1141         fd 
= open(tmpfile
,O_WRONLY
|O_APPEND
); 
1143             redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
)); 
1146         /* Flush our data... */ 
1147         if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) != 
1148                 (signed) sdslen(server
.bgrewritebuf
)) { 
1149             redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
)); 
1153         redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
)); 
1154         /* Now our work is to rename the temp file into the stable file. And 
1155          * switch the file descriptor used by the server for append only. */ 
1156         if (rename(tmpfile
,server
.appendfilename
) == -1) { 
1157             redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
)); 
1161         /* Mission completed... almost */ 
1162         redisLog(REDIS_NOTICE
,"Append only file successfully rewritten."); 
1163         if (server
.appendfd 
!= -1) { 
1164             /* If append only is actually enabled... */ 
1165             close(server
.appendfd
); 
1166             server
.appendfd 
= fd
; 
1168             server
.appendseldb 
= -1; /* Make sure it will issue SELECT */ 
1169             redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends."); 
1171             /* If append only is disabled we just generate a dump in this 
1172              * format. Why not? */ 
1175     } else if (!bysignal 
&& exitcode 
!= 0) { 
1176         redisLog(REDIS_WARNING
, "Background append only file rewriting error"); 
1178         redisLog(REDIS_WARNING
, 
1179             "Background append only file rewriting terminated by signal"); 
1182     sdsfree(server
.bgrewritebuf
); 
1183     server
.bgrewritebuf 
= sdsempty(); 
1184     aofRemoveTempFile(server
.bgrewritechildpid
); 
1185     server
.bgrewritechildpid 
= -1; 
1188 static int serverCron(struct aeEventLoop 
*eventLoop
, long long id
, void *clientData
) { 
1189     int j
, loops 
= server
.cronloops
++; 
1190     REDIS_NOTUSED(eventLoop
); 
1192     REDIS_NOTUSED(clientData
); 
1194     /* We take a cached value of the unix time in the global state because 
1195      * with virtual memory and aging there is to store the current time 
1196      * in objects at every object access, and accuracy is not needed. 
1197      * To access a global var is faster than calling time(NULL) */ 
1198     server
.unixtime 
= time(NULL
); 
1200     /* Show some info about non-empty databases */ 
1201     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1202         long long size
, used
, vkeys
; 
1204         size 
= dictSlots(server
.db
[j
].dict
); 
1205         used 
= dictSize(server
.db
[j
].dict
); 
1206         vkeys 
= dictSize(server
.db
[j
].expires
); 
1207         if (!(loops 
% 5) && (used 
|| vkeys
)) { 
1208             redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
); 
1209             /* dictPrintStats(server.dict); */ 
1213     /* We don't want to resize the hash tables while a bacground saving 
1214      * is in progress: the saving child is created using fork() that is 
1215      * implemented with a copy-on-write semantic in most modern systems, so 
1216      * if we resize the HT while there is the saving child at work actually 
1217      * a lot of memory movements in the parent will cause a lot of pages 
1219     if (server
.bgsavechildpid 
== -1) tryResizeHashTables(); 
1221     /* Show information about connected clients */ 
1223         redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use, %d shared objects", 
1224             listLength(server
.clients
)-listLength(server
.slaves
), 
1225             listLength(server
.slaves
), 
1226             zmalloc_used_memory(), 
1227             dictSize(server
.sharingpool
)); 
1230     /* Close connections of timedout clients */ 
1231     if ((server
.maxidletime 
&& !(loops 
% 10)) || server
.blpop_blocked_clients
) 
1232         closeTimedoutClients(); 
1234     /* Check if a background saving or AOF rewrite in progress terminated */ 
1235     if (server
.bgsavechildpid 
!= -1 || server
.bgrewritechildpid 
!= -1) { 
1239         if ((pid 
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) { 
1240             if (pid 
== server
.bgsavechildpid
) { 
1241                 backgroundSaveDoneHandler(statloc
); 
1243                 backgroundRewriteDoneHandler(statloc
); 
1247         /* If there is not a background saving in progress check if 
1248          * we have to save now */ 
1249          time_t now 
= time(NULL
); 
1250          for (j 
= 0; j 
< server
.saveparamslen
; j
++) { 
1251             struct saveparam 
*sp 
= server
.saveparams
+j
; 
1253             if (server
.dirty 
>= sp
->changes 
&& 
1254                 now
-server
.lastsave 
> sp
->seconds
) { 
1255                 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...", 
1256                     sp
->changes
, sp
->seconds
); 
1257                 rdbSaveBackground(server
.dbfilename
); 
1263     /* Try to expire a few timed out keys. The algorithm used is adaptive and 
1264      * will use few CPU cycles if there are few expiring keys, otherwise 
1265      * it will get more aggressive to avoid that too much memory is used by 
1266      * keys that can be removed from the keyspace. */ 
1267     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1269         redisDb 
*db 
= server
.db
+j
; 
1271         /* Continue to expire if at the end of the cycle more than 25% 
1272          * of the keys were expired. */ 
1274             long num 
= dictSize(db
->expires
); 
1275             time_t now 
= time(NULL
); 
1278             if (num 
> REDIS_EXPIRELOOKUPS_PER_CRON
) 
1279                 num 
= REDIS_EXPIRELOOKUPS_PER_CRON
; 
1284                 if ((de 
= dictGetRandomKey(db
->expires
)) == NULL
) break; 
1285                 t 
= (time_t) dictGetEntryVal(de
); 
1287                     deleteKey(db
,dictGetEntryKey(de
)); 
1291         } while (expired 
> REDIS_EXPIRELOOKUPS_PER_CRON
/4); 
1294     /* Swap a few keys on disk if we are over the memory limit and VM 
1295      * is enbled. Try to free objects from the free list first. */ 
1296     if (vmCanSwapOut()) { 
1297         while (server
.vm_enabled 
&& zmalloc_used_memory() > 
1298                 server
.vm_max_memory
) 
1302             if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue; 
1303             retval 
= (server
.vm_max_threads 
== 0) ? 
1304                         vmSwapOneObjectBlocking() : 
1305                         vmSwapOneObjectThreaded(); 
1306             if (retval 
== REDIS_ERR 
&& (loops 
% 30) == 0 && 
1307                 zmalloc_used_memory() > 
1308                 (server
.vm_max_memory
+server
.vm_max_memory
/10)) 
1310                 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!"); 
1312             /* Note that when using threade I/O we free just one object, 
1313              * because anyway when the I/O thread in charge to swap this 
1314              * object out will finish, the handler of completed jobs 
1315              * will try to swap more objects if we are still out of memory. */ 
1316             if (retval 
== REDIS_ERR 
|| server
.vm_max_threads 
> 0) break; 
1320     /* Check if we should connect to a MASTER */ 
1321     if (server
.replstate 
== REDIS_REPL_CONNECT
) { 
1322         redisLog(REDIS_NOTICE
,"Connecting to MASTER..."); 
1323         if (syncWithMaster() == REDIS_OK
) { 
1324             redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded"); 
1330 /* This function gets called every time Redis is entering the 
1331  * main loop of the event driven library, that is, before to sleep 
1332  * for ready file descriptors. */ 
1333 static void beforeSleep(struct aeEventLoop 
*eventLoop
) { 
1334     REDIS_NOTUSED(eventLoop
); 
1336     if (server
.vm_enabled 
&& listLength(server
.io_ready_clients
)) { 
1340         listRewind(server
.io_ready_clients
,&li
); 
1341         while((ln 
= listNext(&li
))) { 
1342             redisClient 
*c 
= ln
->value
; 
1343             struct redisCommand 
*cmd
; 
1345             /* Resume the client. */ 
1346             listDelNode(server
.io_ready_clients
,ln
); 
1347             c
->flags 
&= (~REDIS_IO_WAIT
); 
1348             server
.vm_blocked_clients
--; 
1349             aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
, 
1350                 readQueryFromClient
, c
); 
1351             cmd 
= lookupCommand(c
->argv
[0]->ptr
); 
1352             assert(cmd 
!= NULL
); 
1355             /* There may be more data to process in the input buffer. */ 
1356             if (c
->querybuf 
&& sdslen(c
->querybuf
) > 0) 
1357                 processInputBuffer(c
); 
1362 static void createSharedObjects(void) { 
1363     shared
.crlf 
= createObject(REDIS_STRING
,sdsnew("\r\n")); 
1364     shared
.ok 
= createObject(REDIS_STRING
,sdsnew("+OK\r\n")); 
1365     shared
.err 
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n")); 
1366     shared
.emptybulk 
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n")); 
1367     shared
.czero 
= createObject(REDIS_STRING
,sdsnew(":0\r\n")); 
1368     shared
.cone 
= createObject(REDIS_STRING
,sdsnew(":1\r\n")); 
1369     shared
.nullbulk 
= createObject(REDIS_STRING
,sdsnew("$-1\r\n")); 
1370     shared
.nullmultibulk 
= createObject(REDIS_STRING
,sdsnew("*-1\r\n")); 
1371     shared
.emptymultibulk 
= createObject(REDIS_STRING
,sdsnew("*0\r\n")); 
1372     shared
.pong 
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n")); 
1373     shared
.queued 
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n")); 
1374     shared
.wrongtypeerr 
= createObject(REDIS_STRING
,sdsnew( 
1375         "-ERR Operation against a key holding the wrong kind of value\r\n")); 
1376     shared
.nokeyerr 
= createObject(REDIS_STRING
,sdsnew( 
1377         "-ERR no such key\r\n")); 
1378     shared
.syntaxerr 
= createObject(REDIS_STRING
,sdsnew( 
1379         "-ERR syntax error\r\n")); 
1380     shared
.sameobjecterr 
= createObject(REDIS_STRING
,sdsnew( 
1381         "-ERR source and destination objects are the same\r\n")); 
1382     shared
.outofrangeerr 
= createObject(REDIS_STRING
,sdsnew( 
1383         "-ERR index out of range\r\n")); 
1384     shared
.space 
= createObject(REDIS_STRING
,sdsnew(" ")); 
1385     shared
.colon 
= createObject(REDIS_STRING
,sdsnew(":")); 
1386     shared
.plus 
= createObject(REDIS_STRING
,sdsnew("+")); 
1387     shared
.select0 
= createStringObject("select 0\r\n",10); 
1388     shared
.select1 
= createStringObject("select 1\r\n",10); 
1389     shared
.select2 
= createStringObject("select 2\r\n",10); 
1390     shared
.select3 
= createStringObject("select 3\r\n",10); 
1391     shared
.select4 
= createStringObject("select 4\r\n",10); 
1392     shared
.select5 
= createStringObject("select 5\r\n",10); 
1393     shared
.select6 
= createStringObject("select 6\r\n",10); 
1394     shared
.select7 
= createStringObject("select 7\r\n",10); 
1395     shared
.select8 
= createStringObject("select 8\r\n",10); 
1396     shared
.select9 
= createStringObject("select 9\r\n",10); 
1399 static void appendServerSaveParams(time_t seconds
, int changes
) { 
1400     server
.saveparams 
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1)); 
1401     server
.saveparams
[server
.saveparamslen
].seconds 
= seconds
; 
1402     server
.saveparams
[server
.saveparamslen
].changes 
= changes
; 
1403     server
.saveparamslen
++; 
1406 static void resetServerSaveParams() { 
1407     zfree(server
.saveparams
); 
1408     server
.saveparams 
= NULL
; 
1409     server
.saveparamslen 
= 0; 
1412 static void initServerConfig() { 
1413     server
.dbnum 
= REDIS_DEFAULT_DBNUM
; 
1414     server
.port 
= REDIS_SERVERPORT
; 
1415     server
.verbosity 
= REDIS_VERBOSE
; 
1416     server
.maxidletime 
= REDIS_MAXIDLETIME
; 
1417     server
.saveparams 
= NULL
; 
1418     server
.logfile 
= NULL
; /* NULL = log on standard output */ 
1419     server
.bindaddr 
= NULL
; 
1420     server
.glueoutputbuf 
= 1; 
1421     server
.daemonize 
= 0; 
1422     server
.appendonly 
= 0; 
1423     server
.appendfsync 
= APPENDFSYNC_ALWAYS
; 
1424     server
.lastfsync 
= time(NULL
); 
1425     server
.appendfd 
= -1; 
1426     server
.appendseldb 
= -1; /* Make sure the first time will not match */ 
1427     server
.pidfile 
= "/var/run/redis.pid"; 
1428     server
.dbfilename 
= "dump.rdb"; 
1429     server
.appendfilename 
= "appendonly.aof"; 
1430     server
.requirepass 
= NULL
; 
1431     server
.shareobjects 
= 0; 
1432     server
.rdbcompression 
= 1; 
1433     server
.sharingpoolsize 
= 1024; 
1434     server
.maxclients 
= 0; 
1435     server
.blpop_blocked_clients 
= 0; 
1436     server
.maxmemory 
= 0; 
1437     server
.vm_enabled 
= 0; 
1438     server
.vm_swap_file 
= zstrdup("/tmp/redis-%p.vm"); 
1439     server
.vm_page_size 
= 256;          /* 256 bytes per page */ 
1440     server
.vm_pages 
= 1024*1024*100;    /* 104 millions of pages */ 
1441     server
.vm_max_memory 
= 1024LL*1024*1024*1; /* 1 GB of RAM */ 
1442     server
.vm_max_threads 
= 4; 
1443     server
.vm_blocked_clients 
= 0; 
1445     resetServerSaveParams(); 
1447     appendServerSaveParams(60*60,1);  /* save after 1 hour and 1 change */ 
1448     appendServerSaveParams(300,100);  /* save after 5 minutes and 100 changes */ 
1449     appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */ 
1450     /* Replication related */ 
1452     server
.masterauth 
= NULL
; 
1453     server
.masterhost 
= NULL
; 
1454     server
.masterport 
= 6379; 
1455     server
.master 
= NULL
; 
1456     server
.replstate 
= REDIS_REPL_NONE
; 
1458     /* Double constants initialization */ 
1460     R_PosInf 
= 1.0/R_Zero
; 
1461     R_NegInf 
= -1.0/R_Zero
; 
1462     R_Nan 
= R_Zero
/R_Zero
; 
1465 static void initServer() { 
1468     signal(SIGHUP
, SIG_IGN
); 
1469     signal(SIGPIPE
, SIG_IGN
); 
1470     setupSigSegvAction(); 
1472     server
.devnull 
= fopen("/dev/null","w"); 
1473     if (server
.devnull 
== NULL
) { 
1474         redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
); 
1477     server
.clients 
= listCreate(); 
1478     server
.slaves 
= listCreate(); 
1479     server
.monitors 
= listCreate(); 
1480     server
.objfreelist 
= listCreate(); 
1481     createSharedObjects(); 
1482     server
.el 
= aeCreateEventLoop(); 
1483     server
.db 
= zmalloc(sizeof(redisDb
)*server
.dbnum
); 
1484     server
.sharingpool 
= dictCreate(&setDictType
,NULL
); 
1485     server
.fd 
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
); 
1486     if (server
.fd 
== -1) { 
1487         redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
); 
1490     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1491         server
.db
[j
].dict 
= dictCreate(&hashDictType
,NULL
); 
1492         server
.db
[j
].expires 
= dictCreate(&keyptrDictType
,NULL
); 
1493         server
.db
[j
].blockingkeys 
= dictCreate(&keylistDictType
,NULL
); 
1494         if (server
.vm_enabled
) 
1495             server
.db
[j
].io_keys 
= dictCreate(&keylistDictType
,NULL
); 
1496         server
.db
[j
].id 
= j
; 
1498     server
.cronloops 
= 0; 
1499     server
.bgsavechildpid 
= -1; 
1500     server
.bgrewritechildpid 
= -1; 
1501     server
.bgrewritebuf 
= sdsempty(); 
1502     server
.lastsave 
= time(NULL
); 
1504     server
.stat_numcommands 
= 0; 
1505     server
.stat_numconnections 
= 0; 
1506     server
.stat_starttime 
= time(NULL
); 
1507     server
.unixtime 
= time(NULL
); 
1508     aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
); 
1509     if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
, 
1510         acceptHandler
, NULL
) == AE_ERR
) oom("creating file event"); 
1512     if (server
.appendonly
) { 
1513         server
.appendfd 
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644); 
1514         if (server
.appendfd 
== -1) { 
1515             redisLog(REDIS_WARNING
, "Can't open the append-only file: %s", 
1521     if (server
.vm_enabled
) vmInit(); 
1524 /* Empty the whole database */ 
1525 static long long emptyDb() { 
1527     long long removed 
= 0; 
1529     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1530         removed 
+= dictSize(server
.db
[j
].dict
); 
1531         dictEmpty(server
.db
[j
].dict
); 
1532         dictEmpty(server
.db
[j
].expires
); 
1537 static int yesnotoi(char *s
) { 
1538     if (!strcasecmp(s
,"yes")) return 1; 
1539     else if (!strcasecmp(s
,"no")) return 0; 
1543 /* I agree, this is a very rudimental way to load a configuration... 
1544    will improve later if the config gets more complex */ 
1545 static void loadServerConfig(char *filename
) { 
1547     char buf
[REDIS_CONFIGLINE_MAX
+1], *err 
= NULL
; 
1551     if (filename
[0] == '-' && filename
[1] == '\0') 
1554         if ((fp 
= fopen(filename
,"r")) == NULL
) { 
1555             redisLog(REDIS_WARNING
,"Fatal error, can't open config file"); 
1560     while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) { 
1566         line 
= sdstrim(line
," \t\r\n"); 
1568         /* Skip comments and blank lines*/ 
1569         if (line
[0] == '#' || line
[0] == '\0') { 
1574         /* Split into arguments */ 
1575         argv 
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
); 
1576         sdstolower(argv
[0]); 
1578         /* Execute config directives */ 
1579         if (!strcasecmp(argv
[0],"timeout") && argc 
== 2) { 
1580             server
.maxidletime 
= atoi(argv
[1]); 
1581             if (server
.maxidletime 
< 0) { 
1582                 err 
= "Invalid timeout value"; goto loaderr
; 
1584         } else if (!strcasecmp(argv
[0],"port") && argc 
== 2) { 
1585             server
.port 
= atoi(argv
[1]); 
1586             if (server
.port 
< 1 || server
.port 
> 65535) { 
1587                 err 
= "Invalid port"; goto loaderr
; 
1589         } else if (!strcasecmp(argv
[0],"bind") && argc 
== 2) { 
1590             server
.bindaddr 
= zstrdup(argv
[1]); 
1591         } else if (!strcasecmp(argv
[0],"save") && argc 
== 3) { 
1592             int seconds 
= atoi(argv
[1]); 
1593             int changes 
= atoi(argv
[2]); 
1594             if (seconds 
< 1 || changes 
< 0) { 
1595                 err 
= "Invalid save parameters"; goto loaderr
; 
1597             appendServerSaveParams(seconds
,changes
); 
1598         } else if (!strcasecmp(argv
[0],"dir") && argc 
== 2) { 
1599             if (chdir(argv
[1]) == -1) { 
1600                 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s", 
1601                     argv
[1], strerror(errno
)); 
1604         } else if (!strcasecmp(argv
[0],"loglevel") && argc 
== 2) { 
1605             if (!strcasecmp(argv
[1],"debug")) server
.verbosity 
= REDIS_DEBUG
; 
1606             else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity 
= REDIS_VERBOSE
; 
1607             else if (!strcasecmp(argv
[1],"notice")) server
.verbosity 
= REDIS_NOTICE
; 
1608             else if (!strcasecmp(argv
[1],"warning")) server
.verbosity 
= REDIS_WARNING
; 
1610                 err 
= "Invalid log level. Must be one of debug, notice, warning"; 
1613         } else if (!strcasecmp(argv
[0],"logfile") && argc 
== 2) { 
1616             server
.logfile 
= zstrdup(argv
[1]); 
1617             if (!strcasecmp(server
.logfile
,"stdout")) { 
1618                 zfree(server
.logfile
); 
1619                 server
.logfile 
= NULL
; 
1621             if (server
.logfile
) { 
1622                 /* Test if we are able to open the file. The server will not 
1623                  * be able to abort just for this problem later... */ 
1624                 logfp 
= fopen(server
.logfile
,"a"); 
1625                 if (logfp 
== NULL
) { 
1626                     err 
= sdscatprintf(sdsempty(), 
1627                         "Can't open the log file: %s", strerror(errno
)); 
1632         } else if (!strcasecmp(argv
[0],"databases") && argc 
== 2) { 
1633             server
.dbnum 
= atoi(argv
[1]); 
1634             if (server
.dbnum 
< 1) { 
1635                 err 
= "Invalid number of databases"; goto loaderr
; 
1637         } else if (!strcasecmp(argv
[0],"maxclients") && argc 
== 2) { 
1638             server
.maxclients 
= atoi(argv
[1]); 
1639         } else if (!strcasecmp(argv
[0],"maxmemory") && argc 
== 2) { 
1640             server
.maxmemory 
= strtoll(argv
[1], NULL
, 10); 
1641         } else if (!strcasecmp(argv
[0],"slaveof") && argc 
== 3) { 
1642             server
.masterhost 
= sdsnew(argv
[1]); 
1643             server
.masterport 
= atoi(argv
[2]); 
1644             server
.replstate 
= REDIS_REPL_CONNECT
; 
1645         } else if (!strcasecmp(argv
[0],"masterauth") && argc 
== 2) { 
1646                 server
.masterauth 
= zstrdup(argv
[1]); 
1647         } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc 
== 2) { 
1648             if ((server
.glueoutputbuf 
= yesnotoi(argv
[1])) == -1) { 
1649                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1651         } else if (!strcasecmp(argv
[0],"shareobjects") && argc 
== 2) { 
1652             if ((server
.shareobjects 
= yesnotoi(argv
[1])) == -1) { 
1653                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1655         } else if (!strcasecmp(argv
[0],"rdbcompression") && argc 
== 2) { 
1656             if ((server
.rdbcompression 
= yesnotoi(argv
[1])) == -1) { 
1657                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1659         } else if (!strcasecmp(argv
[0],"shareobjectspoolsize") && argc 
== 2) { 
1660             server
.sharingpoolsize 
= atoi(argv
[1]); 
1661             if (server
.sharingpoolsize 
< 1) { 
1662                 err 
= "invalid object sharing pool size"; goto loaderr
; 
1664         } else if (!strcasecmp(argv
[0],"daemonize") && argc 
== 2) { 
1665             if ((server
.daemonize 
= yesnotoi(argv
[1])) == -1) { 
1666                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1668         } else if (!strcasecmp(argv
[0],"appendonly") && argc 
== 2) { 
1669             if ((server
.appendonly 
= yesnotoi(argv
[1])) == -1) { 
1670                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1672         } else if (!strcasecmp(argv
[0],"appendfsync") && argc 
== 2) { 
1673             if (!strcasecmp(argv
[1],"no")) { 
1674                 server
.appendfsync 
= APPENDFSYNC_NO
; 
1675             } else if (!strcasecmp(argv
[1],"always")) { 
1676                 server
.appendfsync 
= APPENDFSYNC_ALWAYS
; 
1677             } else if (!strcasecmp(argv
[1],"everysec")) { 
1678                 server
.appendfsync 
= APPENDFSYNC_EVERYSEC
; 
1680                 err 
= "argument must be 'no', 'always' or 'everysec'"; 
1683         } else if (!strcasecmp(argv
[0],"requirepass") && argc 
== 2) { 
1684             server
.requirepass 
= zstrdup(argv
[1]); 
1685         } else if (!strcasecmp(argv
[0],"pidfile") && argc 
== 2) { 
1686             server
.pidfile 
= zstrdup(argv
[1]); 
1687         } else if (!strcasecmp(argv
[0],"dbfilename") && argc 
== 2) { 
1688             server
.dbfilename 
= zstrdup(argv
[1]); 
1689         } else if (!strcasecmp(argv
[0],"vm-enabled") && argc 
== 2) { 
1690             if ((server
.vm_enabled 
= yesnotoi(argv
[1])) == -1) { 
1691                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1693         } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc 
== 2) { 
1694             zfree(server
.vm_swap_file
); 
1695             server
.vm_swap_file 
= zstrdup(argv
[1]); 
1696         } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc 
== 2) { 
1697             server
.vm_max_memory 
= strtoll(argv
[1], NULL
, 10); 
1698         } else if (!strcasecmp(argv
[0],"vm-page-size") && argc 
== 2) { 
1699             server
.vm_page_size 
= strtoll(argv
[1], NULL
, 10); 
1700         } else if (!strcasecmp(argv
[0],"vm-pages") && argc 
== 2) { 
1701             server
.vm_pages 
= strtoll(argv
[1], NULL
, 10); 
1702         } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc 
== 2) { 
1703             server
.vm_max_threads 
= strtoll(argv
[1], NULL
, 10); 
1705             err 
= "Bad directive or wrong number of arguments"; goto loaderr
; 
1707         for (j 
= 0; j 
< argc
; j
++) 
1712     if (fp 
!= stdin
) fclose(fp
); 
1716     fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n"); 
1717     fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
); 
1718     fprintf(stderr
, ">>> '%s'\n", line
); 
1719     fprintf(stderr
, "%s\n", err
); 
1723 static void freeClientArgv(redisClient 
*c
) { 
1726     for (j 
= 0; j 
< c
->argc
; j
++) 
1727         decrRefCount(c
->argv
[j
]); 
1728     for (j 
= 0; j 
< c
->mbargc
; j
++) 
1729         decrRefCount(c
->mbargv
[j
]); 
1734 static void freeClient(redisClient 
*c
) { 
1737     /* Note that if the client we are freeing is blocked into a blocking 
1738      * call, we have to set querybuf to NULL *before* to call 
1739      * unblockClientWaitingData() to avoid processInputBuffer() will get 
1740      * called. Also it is important to remove the file events after 
1741      * this, because this call adds the READABLE event. */ 
1742     sdsfree(c
->querybuf
); 
1744     if (c
->flags 
& REDIS_BLOCKED
) 
1745         unblockClientWaitingData(c
); 
1747     aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
); 
1748     aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
); 
1749     listRelease(c
->reply
); 
1752     /* Remove from the list of clients */ 
1753     ln 
= listSearchKey(server
.clients
,c
); 
1754     redisAssert(ln 
!= NULL
); 
1755     listDelNode(server
.clients
,ln
); 
1756     /* Remove from the list of clients waiting for swapped keys */ 
1757     if (c
->flags 
& REDIS_IO_WAIT 
&& listLength(c
->io_keys
) == 0) { 
1758         ln 
= listSearchKey(server
.io_ready_clients
,c
); 
1760             listDelNode(server
.io_ready_clients
,ln
); 
1761             server
.vm_blocked_clients
--; 
1764     while (server
.vm_enabled 
&& listLength(c
->io_keys
)) { 
1765         ln 
= listFirst(c
->io_keys
); 
1766         dontWaitForSwappedKey(c
,ln
->value
); 
1768     listRelease(c
->io_keys
); 
1770     if (c
->flags 
& REDIS_SLAVE
) { 
1771         if (c
->replstate 
== REDIS_REPL_SEND_BULK 
&& c
->repldbfd 
!= -1) 
1773         list 
*l 
= (c
->flags 
& REDIS_MONITOR
) ? server
.monitors 
: server
.slaves
; 
1774         ln 
= listSearchKey(l
,c
); 
1775         redisAssert(ln 
!= NULL
); 
1778     if (c
->flags 
& REDIS_MASTER
) { 
1779         server
.master 
= NULL
; 
1780         server
.replstate 
= REDIS_REPL_CONNECT
; 
1784     freeClientMultiState(c
); 
1788 #define GLUEREPLY_UP_TO (1024) 
1789 static void glueReplyBuffersIfNeeded(redisClient 
*c
) { 
1791     char buf
[GLUEREPLY_UP_TO
]; 
1796     listRewind(c
->reply
,&li
); 
1797     while((ln 
= listNext(&li
))) { 
1801         objlen 
= sdslen(o
->ptr
); 
1802         if (copylen 
+ objlen 
<= GLUEREPLY_UP_TO
) { 
1803             memcpy(buf
+copylen
,o
->ptr
,objlen
); 
1805             listDelNode(c
->reply
,ln
); 
1807             if (copylen 
== 0) return; 
1811     /* Now the output buffer is empty, add the new single element */ 
1812     o 
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
)); 
1813     listAddNodeHead(c
->reply
,o
); 
1816 static void sendReplyToClient(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
1817     redisClient 
*c 
= privdata
; 
1818     int nwritten 
= 0, totwritten 
= 0, objlen
; 
1821     REDIS_NOTUSED(mask
); 
1823     /* Use writev() if we have enough buffers to send */ 
1824     if (!server
.glueoutputbuf 
&& 
1825         listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD 
&&  
1826         !(c
->flags 
& REDIS_MASTER
)) 
1828         sendReplyToClientWritev(el
, fd
, privdata
, mask
); 
1832     while(listLength(c
->reply
)) { 
1833         if (server
.glueoutputbuf 
&& listLength(c
->reply
) > 1) 
1834             glueReplyBuffersIfNeeded(c
); 
1836         o 
= listNodeValue(listFirst(c
->reply
)); 
1837         objlen 
= sdslen(o
->ptr
); 
1840             listDelNode(c
->reply
,listFirst(c
->reply
)); 
1844         if (c
->flags 
& REDIS_MASTER
) { 
1845             /* Don't reply to a master */ 
1846             nwritten 
= objlen 
- c
->sentlen
; 
1848             nwritten 
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen 
- c
->sentlen
); 
1849             if (nwritten 
<= 0) break; 
1851         c
->sentlen 
+= nwritten
; 
1852         totwritten 
+= nwritten
; 
1853         /* If we fully sent the object on head go to the next one */ 
1854         if (c
->sentlen 
== objlen
) { 
1855             listDelNode(c
->reply
,listFirst(c
->reply
)); 
1858         /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT 
1859          * bytes, in a single threaded server it's a good idea to serve 
1860          * other clients as well, even if a very large request comes from 
1861          * super fast link that is always able to accept data (in real world 
1862          * scenario think about 'KEYS *' against the loopback interfae) */ 
1863         if (totwritten 
> REDIS_MAX_WRITE_PER_EVENT
) break; 
1865     if (nwritten 
== -1) { 
1866         if (errno 
== EAGAIN
) { 
1869             redisLog(REDIS_VERBOSE
, 
1870                 "Error writing to client: %s", strerror(errno
)); 
1875     if (totwritten 
> 0) c
->lastinteraction 
= time(NULL
); 
1876     if (listLength(c
->reply
) == 0) { 
1878         aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
); 
1882 static void sendReplyToClientWritev(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) 
1884     redisClient 
*c 
= privdata
; 
1885     int nwritten 
= 0, totwritten 
= 0, objlen
, willwrite
; 
1887     struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
]; 
1888     int offset
, ion 
= 0; 
1890     REDIS_NOTUSED(mask
); 
1893     while (listLength(c
->reply
)) { 
1894         offset 
= c
->sentlen
; 
1898         /* fill-in the iov[] array */ 
1899         for(node 
= listFirst(c
->reply
); node
; node 
= listNextNode(node
)) { 
1900             o 
= listNodeValue(node
); 
1901             objlen 
= sdslen(o
->ptr
); 
1903             if (totwritten 
+ objlen 
- offset 
> REDIS_MAX_WRITE_PER_EVENT
)  
1906             if(ion 
== REDIS_WRITEV_IOVEC_COUNT
) 
1907                 break; /* no more iovecs */ 
1909             iov
[ion
].iov_base 
= ((char*)o
->ptr
) + offset
; 
1910             iov
[ion
].iov_len 
= objlen 
- offset
; 
1911             willwrite 
+= objlen 
- offset
; 
1912             offset 
= 0; /* just for the first item */ 
1919         /* write all collected blocks at once */ 
1920         if((nwritten 
= writev(fd
, iov
, ion
)) < 0) { 
1921             if (errno 
!= EAGAIN
) { 
1922                 redisLog(REDIS_VERBOSE
, 
1923                          "Error writing to client: %s", strerror(errno
)); 
1930         totwritten 
+= nwritten
; 
1931         offset 
= c
->sentlen
; 
1933         /* remove written robjs from c->reply */ 
1934         while (nwritten 
&& listLength(c
->reply
)) { 
1935             o 
= listNodeValue(listFirst(c
->reply
)); 
1936             objlen 
= sdslen(o
->ptr
); 
1938             if(nwritten 
>= objlen 
- offset
) { 
1939                 listDelNode(c
->reply
, listFirst(c
->reply
)); 
1940                 nwritten 
-= objlen 
- offset
; 
1944                 c
->sentlen 
+= nwritten
; 
1952         c
->lastinteraction 
= time(NULL
); 
1954     if (listLength(c
->reply
) == 0) { 
1956         aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
); 
1960 static struct redisCommand 
*lookupCommand(char *name
) { 
1962     while(cmdTable
[j
].name 
!= NULL
) { 
1963         if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
]; 
1969 /* resetClient prepare the client to process the next command */ 
1970 static void resetClient(redisClient 
*c
) { 
1976 /* Call() is the core of Redis execution of a command */ 
1977 static void call(redisClient 
*c
, struct redisCommand 
*cmd
) { 
1980     dirty 
= server
.dirty
; 
1982     if (server
.appendonly 
&& server
.dirty
-dirty
) 
1983         feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
); 
1984     if (server
.dirty
-dirty 
&& listLength(server
.slaves
)) 
1985         replicationFeedSlaves(server
.slaves
,cmd
,c
->db
->id
,c
->argv
,c
->argc
); 
1986     if (listLength(server
.monitors
)) 
1987         replicationFeedSlaves(server
.monitors
,cmd
,c
->db
->id
,c
->argv
,c
->argc
); 
1988     server
.stat_numcommands
++; 
1991 /* If this function gets called we already read a whole 
1992  * command, argments are in the client argv/argc fields. 
1993  * processCommand() execute the command or prepare the 
1994  * server for a bulk read from the client. 
1996  * If 1 is returned the client is still alive and valid and 
1997  * and other operations can be performed by the caller. Otherwise 
1998  * if 0 is returned the client was destroied (i.e. after QUIT). */ 
1999 static int processCommand(redisClient 
*c
) { 
2000     struct redisCommand 
*cmd
; 
2002     /* Free some memory if needed (maxmemory setting) */ 
2003     if (server
.maxmemory
) freeMemoryIfNeeded(); 
2005     /* Handle the multi bulk command type. This is an alternative protocol 
2006      * supported by Redis in order to receive commands that are composed of 
2007      * multiple binary-safe "bulk" arguments. The latency of processing is 
2008      * a bit higher but this allows things like multi-sets, so if this 
2009      * protocol is used only for MSET and similar commands this is a big win. */ 
2010     if (c
->multibulk 
== 0 && c
->argc 
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') { 
2011         c
->multibulk 
= atoi(((char*)c
->argv
[0]->ptr
)+1); 
2012         if (c
->multibulk 
<= 0) { 
2016             decrRefCount(c
->argv
[c
->argc
-1]); 
2020     } else if (c
->multibulk
) { 
2021         if (c
->bulklen 
== -1) { 
2022             if (((char*)c
->argv
[0]->ptr
)[0] != '$') { 
2023                 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n")); 
2027                 int bulklen 
= atoi(((char*)c
->argv
[0]->ptr
)+1); 
2028                 decrRefCount(c
->argv
[0]); 
2029                 if (bulklen 
< 0 || bulklen 
> 1024*1024*1024) { 
2031                     addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n")); 
2036                 c
->bulklen 
= bulklen
+2; /* add two bytes for CR+LF */ 
2040             c
->mbargv 
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1)); 
2041             c
->mbargv
[c
->mbargc
] = c
->argv
[0]; 
2045             if (c
->multibulk 
== 0) { 
2049                 /* Here we need to swap the multi-bulk argc/argv with the 
2050                  * normal argc/argv of the client structure. */ 
2052                 c
->argv 
= c
->mbargv
; 
2053                 c
->mbargv 
= auxargv
; 
2056                 c
->argc 
= c
->mbargc
; 
2057                 c
->mbargc 
= auxargc
; 
2059                 /* We need to set bulklen to something different than -1 
2060                  * in order for the code below to process the command without 
2061                  * to try to read the last argument of a bulk command as 
2062                  * a special argument. */ 
2064                 /* continue below and process the command */ 
2071     /* -- end of multi bulk commands processing -- */ 
2073     /* The QUIT command is handled as a special case. Normal command 
2074      * procs are unable to close the client connection safely */ 
2075     if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) { 
2080     /* Now lookup the command and check ASAP about trivial error conditions 
2081      * such wrong arity, bad command name and so forth. */ 
2082     cmd 
= lookupCommand(c
->argv
[0]->ptr
); 
2085             sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n", 
2086                 (char*)c
->argv
[0]->ptr
)); 
2089     } else if ((cmd
->arity 
> 0 && cmd
->arity 
!= c
->argc
) || 
2090                (c
->argc 
< -cmd
->arity
)) { 
2092             sdscatprintf(sdsempty(), 
2093                 "-ERR wrong number of arguments for '%s' command\r\n", 
2097     } else if (server
.maxmemory 
&& cmd
->flags 
& REDIS_CMD_DENYOOM 
&& zmalloc_used_memory() > server
.maxmemory
) { 
2098         addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n")); 
2101     } else if (cmd
->flags 
& REDIS_CMD_BULK 
&& c
->bulklen 
== -1) { 
2102         /* This is a bulk command, we have to read the last argument yet. */ 
2103         int bulklen 
= atoi(c
->argv
[c
->argc
-1]->ptr
); 
2105         decrRefCount(c
->argv
[c
->argc
-1]); 
2106         if (bulklen 
< 0 || bulklen 
> 1024*1024*1024) { 
2108             addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n")); 
2113         c
->bulklen 
= bulklen
+2; /* add two bytes for CR+LF */ 
2114         /* It is possible that the bulk read is already in the 
2115          * buffer. Check this condition and handle it accordingly. 
2116          * This is just a fast path, alternative to call processInputBuffer(). 
2117          * It's a good idea since the code is small and this condition 
2118          * happens most of the times. */ 
2119         if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) { 
2120             c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2); 
2122             c
->querybuf 
= sdsrange(c
->querybuf
,c
->bulklen
,-1); 
2124             /* Otherwise return... there is to read the last argument 
2125              * from the socket. */ 
2129     /* Let's try to share objects on the command arguments vector */ 
2130     if (server
.shareobjects
) { 
2132         for(j 
= 1; j 
< c
->argc
; j
++) 
2133             c
->argv
[j
] = tryObjectSharing(c
->argv
[j
]); 
2135     /* Let's try to encode the bulk object to save space. */ 
2136     if (cmd
->flags 
& REDIS_CMD_BULK
) 
2137         tryObjectEncoding(c
->argv
[c
->argc
-1]); 
2139     /* Check if the user is authenticated */ 
2140     if (server
.requirepass 
&& !c
->authenticated 
&& cmd
->proc 
!= authCommand
) { 
2141         addReplySds(c
,sdsnew("-ERR operation not permitted\r\n")); 
2146     /* Exec the command */ 
2147     if (c
->flags 
& REDIS_MULTI 
&& cmd
->proc 
!= execCommand 
&& cmd
->proc 
!= discardCommand
) { 
2148         queueMultiCommand(c
,cmd
); 
2149         addReply(c
,shared
.queued
); 
2151         if (server
.vm_enabled 
&& server
.vm_max_threads 
> 0 && 
2152             blockClientOnSwappedKeys(cmd
,c
)) return 1; 
2156     /* Prepare the client for the next command */ 
2161 static void replicationFeedSlaves(list 
*slaves
, struct redisCommand 
*cmd
, int dictid
, robj 
**argv
, int argc
) { 
2166     /* (args*2)+1 is enough room for args, spaces, newlines */ 
2167     robj 
*static_outv
[REDIS_STATIC_ARGS
*2+1]; 
2169     if (argc 
<= REDIS_STATIC_ARGS
) { 
2172         outv 
= zmalloc(sizeof(robj
*)*(argc
*2+1)); 
2175     for (j 
= 0; j 
< argc
; j
++) { 
2176         if (j 
!= 0) outv
[outc
++] = shared
.space
; 
2177         if ((cmd
->flags 
& REDIS_CMD_BULK
) && j 
== argc
-1) { 
2180             lenobj 
= createObject(REDIS_STRING
, 
2181                 sdscatprintf(sdsempty(),"%lu\r\n", 
2182                     (unsigned long) stringObjectLen(argv
[j
]))); 
2183             lenobj
->refcount 
= 0; 
2184             outv
[outc
++] = lenobj
; 
2186         outv
[outc
++] = argv
[j
]; 
2188     outv
[outc
++] = shared
.crlf
; 
2190     /* Increment all the refcounts at start and decrement at end in order to 
2191      * be sure to free objects if there is no slave in a replication state 
2192      * able to be feed with commands */ 
2193     for (j 
= 0; j 
< outc
; j
++) incrRefCount(outv
[j
]); 
2194     listRewind(slaves
,&li
); 
2195     while((ln 
= listNext(&li
))) { 
2196         redisClient 
*slave 
= ln
->value
; 
2198         /* Don't feed slaves that are still waiting for BGSAVE to start */ 
2199         if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_START
) continue; 
2201         /* Feed all the other slaves, MONITORs and so on */ 
2202         if (slave
->slaveseldb 
!= dictid
) { 
2206             case 0: selectcmd 
= shared
.select0
; break; 
2207             case 1: selectcmd 
= shared
.select1
; break; 
2208             case 2: selectcmd 
= shared
.select2
; break; 
2209             case 3: selectcmd 
= shared
.select3
; break; 
2210             case 4: selectcmd 
= shared
.select4
; break; 
2211             case 5: selectcmd 
= shared
.select5
; break; 
2212             case 6: selectcmd 
= shared
.select6
; break; 
2213             case 7: selectcmd 
= shared
.select7
; break; 
2214             case 8: selectcmd 
= shared
.select8
; break; 
2215             case 9: selectcmd 
= shared
.select9
; break; 
2217                 selectcmd 
= createObject(REDIS_STRING
, 
2218                     sdscatprintf(sdsempty(),"select %d\r\n",dictid
)); 
2219                 selectcmd
->refcount 
= 0; 
2222             addReply(slave
,selectcmd
); 
2223             slave
->slaveseldb 
= dictid
; 
2225         for (j 
= 0; j 
< outc
; j
++) addReply(slave
,outv
[j
]); 
2227     for (j 
= 0; j 
< outc
; j
++) decrRefCount(outv
[j
]); 
2228     if (outv 
!= static_outv
) zfree(outv
); 
2231 static void processInputBuffer(redisClient 
*c
) { 
2233     /* Before to process the input buffer, make sure the client is not 
2234      * waitig for a blocking operation such as BLPOP. Note that the first 
2235      * iteration the client is never blocked, otherwise the processInputBuffer 
2236      * would not be called at all, but after the execution of the first commands 
2237      * in the input buffer the client may be blocked, and the "goto again" 
2238      * will try to reiterate. The following line will make it return asap. */ 
2239     if (c
->flags 
& REDIS_BLOCKED 
|| c
->flags 
& REDIS_IO_WAIT
) return; 
2240     if (c
->bulklen 
== -1) { 
2241         /* Read the first line of the query */ 
2242         char *p 
= strchr(c
->querybuf
,'\n'); 
2249             query 
= c
->querybuf
; 
2250             c
->querybuf 
= sdsempty(); 
2251             querylen 
= 1+(p
-(query
)); 
2252             if (sdslen(query
) > querylen
) { 
2253                 /* leave data after the first line of the query in the buffer */ 
2254                 c
->querybuf 
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
); 
2256             *p 
= '\0'; /* remove "\n" */ 
2257             if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */ 
2258             sdsupdatelen(query
); 
2260             /* Now we can split the query in arguments */ 
2261             argv 
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
); 
2264             if (c
->argv
) zfree(c
->argv
); 
2265             c
->argv 
= zmalloc(sizeof(robj
*)*argc
); 
2267             for (j 
= 0; j 
< argc
; j
++) { 
2268                 if (sdslen(argv
[j
])) { 
2269                     c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]); 
2277                 /* Execute the command. If the client is still valid 
2278                  * after processCommand() return and there is something 
2279                  * on the query buffer try to process the next command. */ 
2280                 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
; 
2282                 /* Nothing to process, argc == 0. Just process the query 
2283                  * buffer if it's not empty or return to the caller */ 
2284                 if (sdslen(c
->querybuf
)) goto again
; 
2287         } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) { 
2288             redisLog(REDIS_VERBOSE
, "Client protocol error"); 
2293         /* Bulk read handling. Note that if we are at this point 
2294            the client already sent a command terminated with a newline, 
2295            we are reading the bulk data that is actually the last 
2296            argument of the command. */ 
2297         int qbl 
= sdslen(c
->querybuf
); 
2299         if (c
->bulklen 
<= qbl
) { 
2300             /* Copy everything but the final CRLF as final argument */ 
2301             c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2); 
2303             c
->querybuf 
= sdsrange(c
->querybuf
,c
->bulklen
,-1); 
2304             /* Process the command. If the client is still valid after 
2305              * the processing and there is more data in the buffer 
2306              * try to parse it. */ 
2307             if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
; 
2313 static void readQueryFromClient(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
2314     redisClient 
*c 
= (redisClient
*) privdata
; 
2315     char buf
[REDIS_IOBUF_LEN
]; 
2318     REDIS_NOTUSED(mask
); 
2320     nread 
= read(fd
, buf
, REDIS_IOBUF_LEN
); 
2322         if (errno 
== EAGAIN
) { 
2325             redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
)); 
2329     } else if (nread 
== 0) { 
2330         redisLog(REDIS_VERBOSE
, "Client closed connection"); 
2335         c
->querybuf 
= sdscatlen(c
->querybuf
, buf
, nread
); 
2336         c
->lastinteraction 
= time(NULL
); 
2340     if (!(c
->flags 
& REDIS_BLOCKED
)) 
2341         processInputBuffer(c
); 
2344 static int selectDb(redisClient 
*c
, int id
) { 
2345     if (id 
< 0 || id 
>= server
.dbnum
) 
2347     c
->db 
= &server
.db
[id
]; 
2351 static void *dupClientReplyValue(void *o
) { 
2352     incrRefCount((robj
*)o
); 
2356 static redisClient 
*createClient(int fd
) { 
2357     redisClient 
*c 
= zmalloc(sizeof(*c
)); 
2359     anetNonBlock(NULL
,fd
); 
2360     anetTcpNoDelay(NULL
,fd
); 
2361     if (!c
) return NULL
; 
2364     c
->querybuf 
= sdsempty(); 
2373     c
->lastinteraction 
= time(NULL
); 
2374     c
->authenticated 
= 0; 
2375     c
->replstate 
= REDIS_REPL_NONE
; 
2376     c
->reply 
= listCreate(); 
2377     listSetFreeMethod(c
->reply
,decrRefCount
); 
2378     listSetDupMethod(c
->reply
,dupClientReplyValue
); 
2379     c
->blockingkeys 
= NULL
; 
2380     c
->blockingkeysnum 
= 0; 
2381     c
->io_keys 
= listCreate(); 
2382     listSetFreeMethod(c
->io_keys
,decrRefCount
); 
2383     if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
, 
2384         readQueryFromClient
, c
) == AE_ERR
) { 
2388     listAddNodeTail(server
.clients
,c
); 
2389     initClientMultiState(c
); 
2393 static void addReply(redisClient 
*c
, robj 
*obj
) { 
2394     if (listLength(c
->reply
) == 0 && 
2395         (c
->replstate 
== REDIS_REPL_NONE 
|| 
2396          c
->replstate 
== REDIS_REPL_ONLINE
) && 
2397         aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
, 
2398         sendReplyToClient
, c
) == AE_ERR
) return; 
2400     if (server
.vm_enabled 
&& obj
->storage 
!= REDIS_VM_MEMORY
) { 
2401         obj 
= dupStringObject(obj
); 
2402         obj
->refcount 
= 0; /* getDecodedObject() will increment the refcount */ 
2404     listAddNodeTail(c
->reply
,getDecodedObject(obj
)); 
2407 static void addReplySds(redisClient 
*c
, sds s
) { 
2408     robj 
*o 
= createObject(REDIS_STRING
,s
); 
2413 static void addReplyDouble(redisClient 
*c
, double d
) { 
2416     snprintf(buf
,sizeof(buf
),"%.17g",d
); 
2417     addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n", 
2418         (unsigned long) strlen(buf
),buf
)); 
2421 static void addReplyLong(redisClient 
*c
, long l
) { 
2425     len 
= snprintf(buf
,sizeof(buf
),":%ld\r\n",l
); 
2426     addReplySds(c
,sdsnewlen(buf
,len
)); 
2429 static void addReplyBulkLen(redisClient 
*c
, robj 
*obj
) { 
2432     if (obj
->encoding 
== REDIS_ENCODING_RAW
) { 
2433         len 
= sdslen(obj
->ptr
); 
2435         long n 
= (long)obj
->ptr
; 
2437         /* Compute how many bytes will take this integer as a radix 10 string */ 
2443         while((n 
= n
/10) != 0) { 
2447     addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",(unsigned long)len
)); 
2450 static void acceptHandler(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
2455     REDIS_NOTUSED(mask
); 
2456     REDIS_NOTUSED(privdata
); 
2458     cfd 
= anetAccept(server
.neterr
, fd
, cip
, &cport
); 
2459     if (cfd 
== AE_ERR
) { 
2460         redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
); 
2463     redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
); 
2464     if ((c 
= createClient(cfd
)) == NULL
) { 
2465         redisLog(REDIS_WARNING
,"Error allocating resoures for the client"); 
2466         close(cfd
); /* May be already closed, just ingore errors */ 
2469     /* If maxclient directive is set and this is one client more... close the 
2470      * connection. Note that we create the client instead to check before 
2471      * for this condition, since now the socket is already set in nonblocking 
2472      * mode and we can send an error for free using the Kernel I/O */ 
2473     if (server
.maxclients 
&& listLength(server
.clients
) > server
.maxclients
) { 
2474         char *err 
= "-ERR max number of clients reached\r\n"; 
2476         /* That's a best effort error message, don't check write errors */ 
2477         if (write(c
->fd
,err
,strlen(err
)) == -1) { 
2478             /* Nothing to do, Just to avoid the warning... */ 
2483     server
.stat_numconnections
++; 
2486 /* ======================= Redis objects implementation ===================== */ 
2488 static robj 
*createObject(int type
, void *ptr
) { 
2491     if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
); 
2492     if (listLength(server
.objfreelist
)) { 
2493         listNode 
*head 
= listFirst(server
.objfreelist
); 
2494         o 
= listNodeValue(head
); 
2495         listDelNode(server
.objfreelist
,head
); 
2496         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
2498         if (server
.vm_enabled
) { 
2499             pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
2500             o 
= zmalloc(sizeof(*o
)); 
2502             o 
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
)); 
2506     o
->encoding 
= REDIS_ENCODING_RAW
; 
2509     if (server
.vm_enabled
) { 
2510         /* Note that this code may run in the context of an I/O thread 
2511          * and accessing to server.unixtime in theory is an error 
2512          * (no locks). But in practice this is safe, and even if we read 
2513          * garbage Redis will not fail, as it's just a statistical info */ 
2514         o
->vm
.atime 
= server
.unixtime
; 
2515         o
->storage 
= REDIS_VM_MEMORY
; 
2520 static robj 
*createStringObject(char *ptr
, size_t len
) { 
2521     return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
)); 
2524 static robj 
*dupStringObject(robj 
*o
) { 
2525     assert(o
->encoding 
== REDIS_ENCODING_RAW
); 
2526     return createStringObject(o
->ptr
,sdslen(o
->ptr
)); 
2529 static robj 
*createListObject(void) { 
2530     list 
*l 
= listCreate(); 
2532     listSetFreeMethod(l
,decrRefCount
); 
2533     return createObject(REDIS_LIST
,l
); 
2536 static robj 
*createSetObject(void) { 
2537     dict 
*d 
= dictCreate(&setDictType
,NULL
); 
2538     return createObject(REDIS_SET
,d
); 
2541 static robj 
*createZsetObject(void) { 
2542     zset 
*zs 
= zmalloc(sizeof(*zs
)); 
2544     zs
->dict 
= dictCreate(&zsetDictType
,NULL
); 
2545     zs
->zsl 
= zslCreate(); 
2546     return createObject(REDIS_ZSET
,zs
); 
2549 static void freeStringObject(robj 
*o
) { 
2550     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
2555 static void freeListObject(robj 
*o
) { 
2556     listRelease((list
*) o
->ptr
); 
2559 static void freeSetObject(robj 
*o
) { 
2560     dictRelease((dict
*) o
->ptr
); 
2563 static void freeZsetObject(robj 
*o
) { 
2566     dictRelease(zs
->dict
); 
2571 static void freeHashObject(robj 
*o
) { 
2572     dictRelease((dict
*) o
->ptr
); 
2575 static void incrRefCount(robj 
*o
) { 
2576     redisAssert(!server
.vm_enabled 
|| o
->storage 
== REDIS_VM_MEMORY
); 
2580 static void decrRefCount(void *obj
) { 
2583     /* Object is a key of a swapped out value, or in the process of being 
2585     if (server
.vm_enabled 
&& 
2586         (o
->storage 
== REDIS_VM_SWAPPED 
|| o
->storage 
== REDIS_VM_LOADING
)) 
2588         if (o
->storage 
== REDIS_VM_SWAPPED 
|| o
->storage 
== REDIS_VM_LOADING
) { 
2589             redisAssert(o
->refcount 
== 1); 
2591         if (o
->storage 
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
); 
2592         redisAssert(o
->type 
== REDIS_STRING
); 
2593         freeStringObject(o
); 
2594         vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
); 
2595         pthread_mutex_lock(&server
.obj_freelist_mutex
); 
2596         if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX 
|| 
2597             !listAddNodeHead(server
.objfreelist
,o
)) 
2599         pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
2600         server
.vm_stats_swapped_objects
--; 
2603     /* Object is in memory, or in the process of being swapped out. */ 
2604     if (--(o
->refcount
) == 0) { 
2605         if (server
.vm_enabled 
&& o
->storage 
== REDIS_VM_SWAPPING
) 
2606             vmCancelThreadedIOJob(obj
); 
2608         case REDIS_STRING
: freeStringObject(o
); break; 
2609         case REDIS_LIST
: freeListObject(o
); break; 
2610         case REDIS_SET
: freeSetObject(o
); break; 
2611         case REDIS_ZSET
: freeZsetObject(o
); break; 
2612         case REDIS_HASH
: freeHashObject(o
); break; 
2613         default: redisAssert(0 != 0); break; 
2615         if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
); 
2616         if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX 
|| 
2617             !listAddNodeHead(server
.objfreelist
,o
)) 
2619         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
2623 static robj 
*lookupKey(redisDb 
*db
, robj 
*key
) { 
2624     dictEntry 
*de 
= dictFind(db
->dict
,key
); 
2626         robj 
*key 
= dictGetEntryKey(de
); 
2627         robj 
*val 
= dictGetEntryVal(de
); 
2629         if (server
.vm_enabled
) { 
2630             if (key
->storage 
== REDIS_VM_MEMORY 
|| 
2631                 key
->storage 
== REDIS_VM_SWAPPING
) 
2633                 /* If we were swapping the object out, stop it, this key 
2635                 if (key
->storage 
== REDIS_VM_SWAPPING
) 
2636                     vmCancelThreadedIOJob(key
); 
2637                 /* Update the access time of the key for the aging algorithm. */ 
2638                 key
->vm
.atime 
= server
.unixtime
; 
2640                 int notify 
= (key
->storage 
== REDIS_VM_LOADING
); 
2642                 /* Our value was swapped on disk. Bring it at home. */ 
2643                 redisAssert(val 
== NULL
); 
2644                 val 
= vmLoadObject(key
); 
2645                 dictGetEntryVal(de
) = val
; 
2647                 /* Clients blocked by the VM subsystem may be waiting for 
2649                 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
); 
2658 static robj 
*lookupKeyRead(redisDb 
*db
, robj 
*key
) { 
2659     expireIfNeeded(db
,key
); 
2660     return lookupKey(db
,key
); 
2663 static robj 
*lookupKeyWrite(redisDb 
*db
, robj 
*key
) { 
2664     deleteIfVolatile(db
,key
); 
2665     return lookupKey(db
,key
); 
2668 static int deleteKey(redisDb 
*db
, robj 
*key
) { 
2671     /* We need to protect key from destruction: after the first dictDelete() 
2672      * it may happen that 'key' is no longer valid if we don't increment 
2673      * it's count. This may happen when we get the object reference directly 
2674      * from the hash table with dictRandomKey() or dict iterators */ 
2676     if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
); 
2677     retval 
= dictDelete(db
->dict
,key
); 
2680     return retval 
== DICT_OK
; 
2683 /* Try to share an object against the shared objects pool */ 
2684 static robj 
*tryObjectSharing(robj 
*o
) { 
2685     struct dictEntry 
*de
; 
2688     if (o 
== NULL 
|| server
.shareobjects 
== 0) return o
; 
2690     redisAssert(o
->type 
== REDIS_STRING
); 
2691     de 
= dictFind(server
.sharingpool
,o
); 
2693         robj 
*shared 
= dictGetEntryKey(de
); 
2695         c 
= ((unsigned long) dictGetEntryVal(de
))+1; 
2696         dictGetEntryVal(de
) = (void*) c
; 
2697         incrRefCount(shared
); 
2701         /* Here we are using a stream algorihtm: Every time an object is 
2702          * shared we increment its count, everytime there is a miss we 
2703          * recrement the counter of a random object. If this object reaches 
2704          * zero we remove the object and put the current object instead. */ 
2705         if (dictSize(server
.sharingpool
) >= 
2706                 server
.sharingpoolsize
) { 
2707             de 
= dictGetRandomKey(server
.sharingpool
); 
2708             redisAssert(de 
!= NULL
); 
2709             c 
= ((unsigned long) dictGetEntryVal(de
))-1; 
2710             dictGetEntryVal(de
) = (void*) c
; 
2712                 dictDelete(server
.sharingpool
,de
->key
); 
2715             c 
= 0; /* If the pool is empty we want to add this object */ 
2720             retval 
= dictAdd(server
.sharingpool
,o
,(void*)1); 
2721             redisAssert(retval 
== DICT_OK
); 
2728 /* Check if the nul-terminated string 's' can be represented by a long 
2729  * (that is, is a number that fits into long without any other space or 
2730  * character before or after the digits). 
2732  * If so, the function returns REDIS_OK and *longval is set to the value 
2733  * of the number. Otherwise REDIS_ERR is returned */ 
2734 static int isStringRepresentableAsLong(sds s
, long *longval
) { 
2735     char buf
[32], *endptr
; 
2739     value 
= strtol(s
, &endptr
, 10); 
2740     if (endptr
[0] != '\0') return REDIS_ERR
; 
2741     slen 
= snprintf(buf
,32,"%ld",value
); 
2743     /* If the number converted back into a string is not identical 
2744      * then it's not possible to encode the string as integer */ 
2745     if (sdslen(s
) != (unsigned)slen 
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
; 
2746     if (longval
) *longval 
= value
; 
2750 /* Try to encode a string object in order to save space */ 
2751 static int tryObjectEncoding(robj 
*o
) { 
2755     if (o
->encoding 
!= REDIS_ENCODING_RAW
) 
2756         return REDIS_ERR
; /* Already encoded */ 
2758     /* It's not save to encode shared objects: shared objects can be shared 
2759      * everywhere in the "object space" of Redis. Encoded objects can only 
2760      * appear as "values" (and not, for instance, as keys) */ 
2761      if (o
->refcount 
> 1) return REDIS_ERR
; 
2763     /* Currently we try to encode only strings */ 
2764     redisAssert(o
->type 
== REDIS_STRING
); 
2766     /* Check if we can represent this string as a long integer */ 
2767     if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return REDIS_ERR
; 
2769     /* Ok, this object can be encoded */ 
2770     o
->encoding 
= REDIS_ENCODING_INT
; 
2772     o
->ptr 
= (void*) value
; 
2776 /* Get a decoded version of an encoded object (returned as a new object). 
2777  * If the object is already raw-encoded just increment the ref count. */ 
2778 static robj 
*getDecodedObject(robj 
*o
) { 
2781     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
2785     if (o
->type 
== REDIS_STRING 
&& o
->encoding 
== REDIS_ENCODING_INT
) { 
2788         snprintf(buf
,32,"%ld",(long)o
->ptr
); 
2789         dec 
= createStringObject(buf
,strlen(buf
)); 
2792         redisAssert(1 != 1); 
2796 /* Compare two string objects via strcmp() or alike. 
2797  * Note that the objects may be integer-encoded. In such a case we 
2798  * use snprintf() to get a string representation of the numbers on the stack 
2799  * and compare the strings, it's much faster than calling getDecodedObject(). 
2801  * Important note: if objects are not integer encoded, but binary-safe strings, 
2802  * sdscmp() from sds.c will apply memcmp() so this function ca be considered 
2804 static int compareStringObjects(robj 
*a
, robj 
*b
) { 
2805     redisAssert(a
->type 
== REDIS_STRING 
&& b
->type 
== REDIS_STRING
); 
2806     char bufa
[128], bufb
[128], *astr
, *bstr
; 
2809     if (a 
== b
) return 0; 
2810     if (a
->encoding 
!= REDIS_ENCODING_RAW
) { 
2811         snprintf(bufa
,sizeof(bufa
),"%ld",(long) a
->ptr
); 
2817     if (b
->encoding 
!= REDIS_ENCODING_RAW
) { 
2818         snprintf(bufb
,sizeof(bufb
),"%ld",(long) b
->ptr
); 
2824     return bothsds 
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
); 
2827 static size_t stringObjectLen(robj 
*o
) { 
2828     redisAssert(o
->type 
== REDIS_STRING
); 
2829     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
2830         return sdslen(o
->ptr
); 
2834         return snprintf(buf
,32,"%ld",(long)o
->ptr
); 
2838 /*============================ RDB saving/loading =========================== */ 
2840 static int rdbSaveType(FILE *fp
, unsigned char type
) { 
2841     if (fwrite(&type
,1,1,fp
) == 0) return -1; 
2845 static int rdbSaveTime(FILE *fp
, time_t t
) { 
2846     int32_t t32 
= (int32_t) t
; 
2847     if (fwrite(&t32
,4,1,fp
) == 0) return -1; 
2851 /* check rdbLoadLen() comments for more info */ 
2852 static int rdbSaveLen(FILE *fp
, uint32_t len
) { 
2853     unsigned char buf
[2]; 
2856         /* Save a 6 bit len */ 
2857         buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6); 
2858         if (fwrite(buf
,1,1,fp
) == 0) return -1; 
2859     } else if (len 
< (1<<14)) { 
2860         /* Save a 14 bit len */ 
2861         buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6); 
2863         if (fwrite(buf
,2,1,fp
) == 0) return -1; 
2865         /* Save a 32 bit len */ 
2866         buf
[0] = (REDIS_RDB_32BITLEN
<<6); 
2867         if (fwrite(buf
,1,1,fp
) == 0) return -1; 
2869         if (fwrite(&len
,4,1,fp
) == 0) return -1; 
2874 /* String objects in the form "2391" "-100" without any space and with a 
2875  * range of values that can fit in an 8, 16 or 32 bit signed value can be 
2876  * encoded as integers to save space */ 
2877 static int rdbTryIntegerEncoding(sds s
, unsigned char *enc
) { 
2879     char *endptr
, buf
[32]; 
2881     /* Check if it's possible to encode this value as a number */ 
2882     value 
= strtoll(s
, &endptr
, 10); 
2883     if (endptr
[0] != '\0') return 0; 
2884     snprintf(buf
,32,"%lld",value
); 
2886     /* If the number converted back into a string is not identical 
2887      * then it's not possible to encode the string as integer */ 
2888     if (strlen(buf
) != sdslen(s
) || memcmp(buf
,s
,sdslen(s
))) return 0; 
2890     /* Finally check if it fits in our ranges */ 
2891     if (value 
>= -(1<<7) && value 
<= (1<<7)-1) { 
2892         enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
; 
2893         enc
[1] = value
&0xFF; 
2895     } else if (value 
>= -(1<<15) && value 
<= (1<<15)-1) { 
2896         enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
; 
2897         enc
[1] = value
&0xFF; 
2898         enc
[2] = (value
>>8)&0xFF; 
2900     } else if (value 
>= -((long long)1<<31) && value 
<= ((long long)1<<31)-1) { 
2901         enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
; 
2902         enc
[1] = value
&0xFF; 
2903         enc
[2] = (value
>>8)&0xFF; 
2904         enc
[3] = (value
>>16)&0xFF; 
2905         enc
[4] = (value
>>24)&0xFF; 
2912 static int rdbSaveLzfStringObject(FILE *fp
, robj 
*obj
) { 
2913     unsigned int comprlen
, outlen
; 
2917     /* We require at least four bytes compression for this to be worth it */ 
2918     outlen 
= sdslen(obj
->ptr
)-4; 
2919     if (outlen 
<= 0) return 0; 
2920     if ((out 
= zmalloc(outlen
+1)) == NULL
) return 0; 
2921     comprlen 
= lzf_compress(obj
->ptr
, sdslen(obj
->ptr
), out
, outlen
); 
2922     if (comprlen 
== 0) { 
2926     /* Data compressed! Let's save it on disk */ 
2927     byte 
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
; 
2928     if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
; 
2929     if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
; 
2930     if (rdbSaveLen(fp
,sdslen(obj
->ptr
)) == -1) goto writeerr
; 
2931     if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
; 
2940 /* Save a string objet as [len][data] on disk. If the object is a string 
2941  * representation of an integer value we try to safe it in a special form */ 
2942 static int rdbSaveStringObjectRaw(FILE *fp
, robj 
*obj
) { 
2946     len 
= sdslen(obj
->ptr
); 
2948     /* Try integer encoding */ 
2950         unsigned char buf
[5]; 
2951         if ((enclen 
= rdbTryIntegerEncoding(obj
->ptr
,buf
)) > 0) { 
2952             if (fwrite(buf
,enclen
,1,fp
) == 0) return -1; 
2957     /* Try LZF compression - under 20 bytes it's unable to compress even 
2958      * aaaaaaaaaaaaaaaaaa so skip it */ 
2959     if (server
.rdbcompression 
&& len 
> 20) { 
2962         retval 
= rdbSaveLzfStringObject(fp
,obj
); 
2963         if (retval 
== -1) return -1; 
2964         if (retval 
> 0) return 0; 
2965         /* retval == 0 means data can't be compressed, save the old way */ 
2968     /* Store verbatim */ 
2969     if (rdbSaveLen(fp
,len
) == -1) return -1; 
2970     if (len 
&& fwrite(obj
->ptr
,len
,1,fp
) == 0) return -1; 
2974 /* Like rdbSaveStringObjectRaw() but handle encoded objects */ 
2975 static int rdbSaveStringObject(FILE *fp
, robj 
*obj
) { 
2978     /* Avoid incr/decr ref count business when possible. 
2979      * This plays well with copy-on-write given that we are probably 
2980      * in a child process (BGSAVE). Also this makes sure key objects 
2981      * of swapped objects are not incRefCount-ed (an assert does not allow 
2982      * this in order to avoid bugs) */ 
2983     if (obj
->encoding 
!= REDIS_ENCODING_RAW
) { 
2984         obj 
= getDecodedObject(obj
); 
2985         retval 
= rdbSaveStringObjectRaw(fp
,obj
); 
2988         retval 
= rdbSaveStringObjectRaw(fp
,obj
); 
2993 /* Save a double value. Doubles are saved as strings prefixed by an unsigned 
2994  * 8 bit integer specifing the length of the representation. 
2995  * This 8 bit integer has special values in order to specify the following 
3001 static int rdbSaveDoubleValue(FILE *fp
, double val
) { 
3002     unsigned char buf
[128]; 
3008     } else if (!isfinite(val
)) { 
3010         buf
[0] = (val 
< 0) ? 255 : 254; 
3012         snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
); 
3013         buf
[0] = strlen((char*)buf
+1); 
3016     if (fwrite(buf
,len
,1,fp
) == 0) return -1; 
3020 /* Save a Redis object. */ 
3021 static int rdbSaveObject(FILE *fp
, robj 
*o
) { 
3022     if (o
->type 
== REDIS_STRING
) { 
3023         /* Save a string value */ 
3024         if (rdbSaveStringObject(fp
,o
) == -1) return -1; 
3025     } else if (o
->type 
== REDIS_LIST
) { 
3026         /* Save a list value */ 
3027         list 
*list 
= o
->ptr
; 
3031         if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1; 
3032         listRewind(list
,&li
); 
3033         while((ln 
= listNext(&li
))) { 
3034             robj 
*eleobj 
= listNodeValue(ln
); 
3036             if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1; 
3038     } else if (o
->type 
== REDIS_SET
) { 
3039         /* Save a set value */ 
3041         dictIterator 
*di 
= dictGetIterator(set
); 
3044         if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1; 
3045         while((de 
= dictNext(di
)) != NULL
) { 
3046             robj 
*eleobj 
= dictGetEntryKey(de
); 
3048             if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1; 
3050         dictReleaseIterator(di
); 
3051     } else if (o
->type 
== REDIS_ZSET
) { 
3052         /* Save a set value */ 
3054         dictIterator 
*di 
= dictGetIterator(zs
->dict
); 
3057         if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1; 
3058         while((de 
= dictNext(di
)) != NULL
) { 
3059             robj 
*eleobj 
= dictGetEntryKey(de
); 
3060             double *score 
= dictGetEntryVal(de
); 
3062             if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1; 
3063             if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1; 
3065         dictReleaseIterator(di
); 
3067         redisAssert(0 != 0); 
3072 /* Return the length the object will have on disk if saved with 
3073  * the rdbSaveObject() function. Currently we use a trick to get 
3074  * this length with very little changes to the code. In the future 
3075  * we could switch to a faster solution. */ 
3076 static off_t 
rdbSavedObjectLen(robj 
*o
, FILE *fp
) { 
3077     if (fp 
== NULL
) fp 
= server
.devnull
; 
3079     assert(rdbSaveObject(fp
,o
) != 1); 
3083 /* Return the number of pages required to save this object in the swap file */ 
3084 static off_t 
rdbSavedObjectPages(robj 
*o
, FILE *fp
) { 
3085     off_t bytes 
= rdbSavedObjectLen(o
,fp
); 
3087     return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
; 
3090 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */ 
3091 static int rdbSave(char *filename
) { 
3092     dictIterator 
*di 
= NULL
; 
3097     time_t now 
= time(NULL
); 
3099     /* Wait for I/O therads to terminate, just in case this is a 
3100      * foreground-saving, to avoid seeking the swap file descriptor at the 
3102     if (server
.vm_enabled
) 
3103         waitEmptyIOJobsQueue(); 
3105     snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid()); 
3106     fp 
= fopen(tmpfile
,"w"); 
3108         redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
)); 
3111     if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
; 
3112     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
3113         redisDb 
*db 
= server
.db
+j
; 
3115         if (dictSize(d
) == 0) continue; 
3116         di 
= dictGetIterator(d
); 
3122         /* Write the SELECT DB opcode */ 
3123         if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
; 
3124         if (rdbSaveLen(fp
,j
) == -1) goto werr
; 
3126         /* Iterate this DB writing every entry */ 
3127         while((de 
= dictNext(di
)) != NULL
) { 
3128             robj 
*key 
= dictGetEntryKey(de
); 
3129             robj 
*o 
= dictGetEntryVal(de
); 
3130             time_t expiretime 
= getExpire(db
,key
); 
3132             /* Save the expire time */ 
3133             if (expiretime 
!= -1) { 
3134                 /* If this key is already expired skip it */ 
3135                 if (expiretime 
< now
) continue; 
3136                 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
; 
3137                 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
; 
3139             /* Save the key and associated value. This requires special 
3140              * handling if the value is swapped out. */ 
3141             if (!server
.vm_enabled 
|| key
->storage 
== REDIS_VM_MEMORY 
|| 
3142                                       key
->storage 
== REDIS_VM_SWAPPING
) { 
3143                 /* Save type, key, value */ 
3144                 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
; 
3145                 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
; 
3146                 if (rdbSaveObject(fp
,o
) == -1) goto werr
; 
3148                 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */ 
3150                 /* Get a preview of the object in memory */ 
3151                 po 
= vmPreviewObject(key
); 
3152                 /* Save type, key, value */ 
3153                 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
; 
3154                 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
; 
3155                 if (rdbSaveObject(fp
,po
) == -1) goto werr
; 
3156                 /* Remove the loaded object from memory */ 
3160         dictReleaseIterator(di
); 
3163     if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
; 
3165     /* Make sure data will not remain on the OS's output buffers */ 
3170     /* Use RENAME to make sure the DB file is changed atomically only 
3171      * if the generate DB file is ok. */ 
3172     if (rename(tmpfile
,filename
) == -1) { 
3173         redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
)); 
3177     redisLog(REDIS_NOTICE
,"DB saved on disk"); 
3179     server
.lastsave 
= time(NULL
); 
3185     redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
)); 
3186     if (di
) dictReleaseIterator(di
); 
3190 static int rdbSaveBackground(char *filename
) { 
3193     if (server
.bgsavechildpid 
!= -1) return REDIS_ERR
; 
3194     if (server
.vm_enabled
) waitEmptyIOJobsQueue(); 
3195     if ((childpid 
= fork()) == 0) { 
3197         if (server
.vm_enabled
) vmReopenSwapFile(); 
3199         if (rdbSave(filename
) == REDIS_OK
) { 
3206         if (childpid 
== -1) { 
3207             redisLog(REDIS_WARNING
,"Can't save in background: fork: %s", 
3211         redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
); 
3212         server
.bgsavechildpid 
= childpid
; 
3215     return REDIS_OK
; /* unreached */ 
3218 static void rdbRemoveTempFile(pid_t childpid
) { 
3221     snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
); 
3225 static int rdbLoadType(FILE *fp
) { 
3227     if (fread(&type
,1,1,fp
) == 0) return -1; 
3231 static time_t rdbLoadTime(FILE *fp
) { 
3233     if (fread(&t32
,4,1,fp
) == 0) return -1; 
3234     return (time_t) t32
; 
3237 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top 
3238  * of this file for a description of how this are stored on disk. 
3240  * isencoded is set to 1 if the readed length is not actually a length but 
3241  * an "encoding type", check the above comments for more info */ 
3242 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) { 
3243     unsigned char buf
[2]; 
3247     if (isencoded
) *isencoded 
= 0; 
3248     if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
; 
3249     type 
= (buf
[0]&0xC0)>>6; 
3250     if (type 
== REDIS_RDB_6BITLEN
) { 
3251         /* Read a 6 bit len */ 
3253     } else if (type 
== REDIS_RDB_ENCVAL
) { 
3254         /* Read a 6 bit len encoding type */ 
3255         if (isencoded
) *isencoded 
= 1; 
3257     } else if (type 
== REDIS_RDB_14BITLEN
) { 
3258         /* Read a 14 bit len */ 
3259         if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
; 
3260         return ((buf
[0]&0x3F)<<8)|buf
[1]; 
3262         /* Read a 32 bit len */ 
3263         if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
; 
3268 static robj 
*rdbLoadIntegerObject(FILE *fp
, int enctype
) { 
3269     unsigned char enc
[4]; 
3272     if (enctype 
== REDIS_RDB_ENC_INT8
) { 
3273         if (fread(enc
,1,1,fp
) == 0) return NULL
; 
3274         val 
= (signed char)enc
[0]; 
3275     } else if (enctype 
== REDIS_RDB_ENC_INT16
) { 
3277         if (fread(enc
,2,1,fp
) == 0) return NULL
; 
3278         v 
= enc
[0]|(enc
[1]<<8); 
3280     } else if (enctype 
== REDIS_RDB_ENC_INT32
) { 
3282         if (fread(enc
,4,1,fp
) == 0) return NULL
; 
3283         v 
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24); 
3286         val 
= 0; /* anti-warning */ 
3289     return createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",val
)); 
3292 static robj 
*rdbLoadLzfStringObject(FILE*fp
) { 
3293     unsigned int len
, clen
; 
3294     unsigned char *c 
= NULL
; 
3297     if ((clen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
3298     if ((len 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
3299     if ((c 
= zmalloc(clen
)) == NULL
) goto err
; 
3300     if ((val 
= sdsnewlen(NULL
,len
)) == NULL
) goto err
; 
3301     if (fread(c
,clen
,1,fp
) == 0) goto err
; 
3302     if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
; 
3304     return createObject(REDIS_STRING
,val
); 
3311 static robj 
*rdbLoadStringObject(FILE*fp
) { 
3316     len 
= rdbLoadLen(fp
,&isencoded
); 
3319         case REDIS_RDB_ENC_INT8
: 
3320         case REDIS_RDB_ENC_INT16
: 
3321         case REDIS_RDB_ENC_INT32
: 
3322             return tryObjectSharing(rdbLoadIntegerObject(fp
,len
)); 
3323         case REDIS_RDB_ENC_LZF
: 
3324             return tryObjectSharing(rdbLoadLzfStringObject(fp
)); 
3330     if (len 
== REDIS_RDB_LENERR
) return NULL
; 
3331     val 
= sdsnewlen(NULL
,len
); 
3332     if (len 
&& fread(val
,len
,1,fp
) == 0) { 
3336     return tryObjectSharing(createObject(REDIS_STRING
,val
)); 
3339 /* For information about double serialization check rdbSaveDoubleValue() */ 
3340 static int rdbLoadDoubleValue(FILE *fp
, double *val
) { 
3344     if (fread(&len
,1,1,fp
) == 0) return -1; 
3346     case 255: *val 
= R_NegInf
; return 0; 
3347     case 254: *val 
= R_PosInf
; return 0; 
3348     case 253: *val 
= R_Nan
; return 0; 
3350         if (fread(buf
,len
,1,fp
) == 0) return -1; 
3352         sscanf(buf
, "%lg", val
); 
3357 /* Load a Redis object of the specified type from the specified file. 
3358  * On success a newly allocated object is returned, otherwise NULL. */ 
3359 static robj 
*rdbLoadObject(int type
, FILE *fp
) { 
3362     if (type 
== REDIS_STRING
) { 
3363         /* Read string value */ 
3364         if ((o 
= rdbLoadStringObject(fp
)) == NULL
) return NULL
; 
3365         tryObjectEncoding(o
); 
3366     } else if (type 
== REDIS_LIST 
|| type 
== REDIS_SET
) { 
3367         /* Read list/set value */ 
3370         if ((listlen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
3371         o 
= (type 
== REDIS_LIST
) ? createListObject() : createSetObject(); 
3372         /* It's faster to expand the dict to the right size asap in order 
3373          * to avoid rehashing */ 
3374         if (type 
== REDIS_SET 
&& listlen 
> DICT_HT_INITIAL_SIZE
) 
3375             dictExpand(o
->ptr
,listlen
); 
3376         /* Load every single element of the list/set */ 
3380             if ((ele 
= rdbLoadStringObject(fp
)) == NULL
) return NULL
; 
3381             tryObjectEncoding(ele
); 
3382             if (type 
== REDIS_LIST
) { 
3383                 listAddNodeTail((list
*)o
->ptr
,ele
); 
3385                 dictAdd((dict
*)o
->ptr
,ele
,NULL
); 
3388     } else if (type 
== REDIS_ZSET
) { 
3389         /* Read list/set value */ 
3393         if ((zsetlen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
3394         o 
= createZsetObject(); 
3396         /* Load every single element of the list/set */ 
3399             double *score 
= zmalloc(sizeof(double)); 
3401             if ((ele 
= rdbLoadStringObject(fp
)) == NULL
) return NULL
; 
3402             tryObjectEncoding(ele
); 
3403             if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
; 
3404             dictAdd(zs
->dict
,ele
,score
); 
3405             zslInsert(zs
->zsl
,*score
,ele
); 
3406             incrRefCount(ele
); /* added to skiplist */ 
3409         redisAssert(0 != 0); 
3414 static int rdbLoad(char *filename
) { 
3416     robj 
*keyobj 
= NULL
; 
3418     int type
, retval
, rdbver
; 
3419     dict 
*d 
= server
.db
[0].dict
; 
3420     redisDb 
*db 
= server
.db
+0; 
3422     time_t expiretime 
= -1, now 
= time(NULL
); 
3423     long long loadedkeys 
= 0; 
3425     fp 
= fopen(filename
,"r"); 
3426     if (!fp
) return REDIS_ERR
; 
3427     if (fread(buf
,9,1,fp
) == 0) goto eoferr
; 
3429     if (memcmp(buf
,"REDIS",5) != 0) { 
3431         redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file"); 
3434     rdbver 
= atoi(buf
+5); 
3437         redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
); 
3444         if ((type 
= rdbLoadType(fp
)) == -1) goto eoferr
; 
3445         if (type 
== REDIS_EXPIRETIME
) { 
3446             if ((expiretime 
= rdbLoadTime(fp
)) == -1) goto eoferr
; 
3447             /* We read the time so we need to read the object type again */ 
3448             if ((type 
= rdbLoadType(fp
)) == -1) goto eoferr
; 
3450         if (type 
== REDIS_EOF
) break; 
3451         /* Handle SELECT DB opcode as a special case */ 
3452         if (type 
== REDIS_SELECTDB
) { 
3453             if ((dbid 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) 
3455             if (dbid 
>= (unsigned)server
.dbnum
) { 
3456                 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
); 
3459             db 
= server
.db
+dbid
; 
3464         if ((keyobj 
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
; 
3466         if ((o 
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
; 
3467         /* Add the new object in the hash table */ 
3468         retval 
= dictAdd(d
,keyobj
,o
); 
3469         if (retval 
== DICT_ERR
) { 
3470             redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj
->ptr
); 
3473         /* Set the expire time if needed */ 
3474         if (expiretime 
!= -1) { 
3475             setExpire(db
,keyobj
,expiretime
); 
3476             /* Delete this key if already expired */ 
3477             if (expiretime 
< now
) deleteKey(db
,keyobj
); 
3481         /* Handle swapping while loading big datasets when VM is on */ 
3483         if (server
.vm_enabled 
&& (loadedkeys 
% 5000) == 0) { 
3484             while (zmalloc_used_memory() > server
.vm_max_memory
) { 
3485                 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break; 
3492 eoferr
: /* unexpected end of file is handled here with a fatal exit */ 
3493     if (keyobj
) decrRefCount(keyobj
); 
3494     redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now."); 
3496     return REDIS_ERR
; /* Just to avoid warning */ 
3499 /*================================== Commands =============================== */ 
3501 static void authCommand(redisClient 
*c
) { 
3502     if (!server
.requirepass 
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) { 
3503       c
->authenticated 
= 1; 
3504       addReply(c
,shared
.ok
); 
3506       c
->authenticated 
= 0; 
3507       addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n")); 
3511 static void pingCommand(redisClient 
*c
) { 
3512     addReply(c
,shared
.pong
); 
3515 static void echoCommand(redisClient 
*c
) { 
3516     addReplyBulkLen(c
,c
->argv
[1]); 
3517     addReply(c
,c
->argv
[1]); 
3518     addReply(c
,shared
.crlf
); 
3521 /*=================================== Strings =============================== */ 
3523 static void setGenericCommand(redisClient 
*c
, int nx
) { 
3526     if (nx
) deleteIfVolatile(c
->db
,c
->argv
[1]); 
3527     retval 
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]); 
3528     if (retval 
== DICT_ERR
) { 
3530             /* If the key is about a swapped value, we want a new key object 
3531              * to overwrite the old. So we delete the old key in the database. 
3532              * This will also make sure that swap pages about the old object 
3533              * will be marked as free. */ 
3534             if (deleteIfSwapped(c
->db
,c
->argv
[1])) 
3535                 incrRefCount(c
->argv
[1]); 
3536             dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]); 
3537             incrRefCount(c
->argv
[2]); 
3539             addReply(c
,shared
.czero
); 
3543         incrRefCount(c
->argv
[1]); 
3544         incrRefCount(c
->argv
[2]); 
3547     removeExpire(c
->db
,c
->argv
[1]); 
3548     addReply(c
, nx 
? shared
.cone 
: shared
.ok
); 
3551 static void setCommand(redisClient 
*c
) { 
3552     setGenericCommand(c
,0); 
3555 static void setnxCommand(redisClient 
*c
) { 
3556     setGenericCommand(c
,1); 
3559 static int getGenericCommand(redisClient 
*c
) { 
3560     robj 
*o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
3563         addReply(c
,shared
.nullbulk
); 
3566         if (o
->type 
!= REDIS_STRING
) { 
3567             addReply(c
,shared
.wrongtypeerr
); 
3570             addReplyBulkLen(c
,o
); 
3572             addReply(c
,shared
.crlf
); 
3578 static void getCommand(redisClient 
*c
) { 
3579     getGenericCommand(c
); 
3582 static void getsetCommand(redisClient 
*c
) { 
3583     if (getGenericCommand(c
) == REDIS_ERR
) return; 
3584     if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) { 
3585         dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]); 
3587         incrRefCount(c
->argv
[1]); 
3589     incrRefCount(c
->argv
[2]); 
3591     removeExpire(c
->db
,c
->argv
[1]); 
3594 static void mgetCommand(redisClient 
*c
) { 
3597     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1)); 
3598     for (j 
= 1; j 
< c
->argc
; j
++) { 
3599         robj 
*o 
= lookupKeyRead(c
->db
,c
->argv
[j
]); 
3601             addReply(c
,shared
.nullbulk
); 
3603             if (o
->type 
!= REDIS_STRING
) { 
3604                 addReply(c
,shared
.nullbulk
); 
3606                 addReplyBulkLen(c
,o
); 
3608                 addReply(c
,shared
.crlf
); 
3614 static void msetGenericCommand(redisClient 
*c
, int nx
) { 
3615     int j
, busykeys 
= 0; 
3617     if ((c
->argc 
% 2) == 0) { 
3618         addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n")); 
3621     /* Handle the NX flag. The MSETNX semantic is to return zero and don't 
3622      * set nothing at all if at least one already key exists. */ 
3624         for (j 
= 1; j 
< c
->argc
; j 
+= 2) { 
3625             if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) { 
3631         addReply(c
, shared
.czero
); 
3635     for (j 
= 1; j 
< c
->argc
; j 
+= 2) { 
3638         tryObjectEncoding(c
->argv
[j
+1]); 
3639         retval 
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]); 
3640         if (retval 
== DICT_ERR
) { 
3641             dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]); 
3642             incrRefCount(c
->argv
[j
+1]); 
3644             incrRefCount(c
->argv
[j
]); 
3645             incrRefCount(c
->argv
[j
+1]); 
3647         removeExpire(c
->db
,c
->argv
[j
]); 
3649     server
.dirty 
+= (c
->argc
-1)/2; 
3650     addReply(c
, nx 
? shared
.cone 
: shared
.ok
); 
3653 static void msetCommand(redisClient 
*c
) { 
3654     msetGenericCommand(c
,0); 
3657 static void msetnxCommand(redisClient 
*c
) { 
3658     msetGenericCommand(c
,1); 
3661 static void incrDecrCommand(redisClient 
*c
, long long incr
) { 
3666     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
3670         if (o
->type 
!= REDIS_STRING
) { 
3675             if (o
->encoding 
== REDIS_ENCODING_RAW
) 
3676                 value 
= strtoll(o
->ptr
, &eptr
, 10); 
3677             else if (o
->encoding 
== REDIS_ENCODING_INT
) 
3678                 value 
= (long)o
->ptr
; 
3680                 redisAssert(1 != 1); 
3685     o 
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
)); 
3686     tryObjectEncoding(o
); 
3687     retval 
= dictAdd(c
->db
->dict
,c
->argv
[1],o
); 
3688     if (retval 
== DICT_ERR
) { 
3689         dictReplace(c
->db
->dict
,c
->argv
[1],o
); 
3690         removeExpire(c
->db
,c
->argv
[1]); 
3692         incrRefCount(c
->argv
[1]); 
3695     addReply(c
,shared
.colon
); 
3697     addReply(c
,shared
.crlf
); 
3700 static void incrCommand(redisClient 
*c
) { 
3701     incrDecrCommand(c
,1); 
3704 static void decrCommand(redisClient 
*c
) { 
3705     incrDecrCommand(c
,-1); 
3708 static void incrbyCommand(redisClient 
*c
) { 
3709     long long incr 
= strtoll(c
->argv
[2]->ptr
, NULL
, 10); 
3710     incrDecrCommand(c
,incr
); 
3713 static void decrbyCommand(redisClient 
*c
) { 
3714     long long incr 
= strtoll(c
->argv
[2]->ptr
, NULL
, 10); 
3715     incrDecrCommand(c
,-incr
); 
3718 static void appendCommand(redisClient 
*c
) { 
3723     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
3725         /* Create the key */ 
3726         retval 
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]); 
3727         incrRefCount(c
->argv
[1]); 
3728         incrRefCount(c
->argv
[2]); 
3729         totlen 
= stringObjectLen(c
->argv
[2]); 
3733         de 
= dictFind(c
->db
->dict
,c
->argv
[1]); 
3736         o 
= dictGetEntryVal(de
); 
3737         if (o
->type 
!= REDIS_STRING
) { 
3738             addReply(c
,shared
.wrongtypeerr
); 
3741         /* If the object is specially encoded or shared we have to make 
3743         if (o
->refcount 
!= 1 || o
->encoding 
!= REDIS_ENCODING_RAW
) { 
3744             robj 
*decoded 
= getDecodedObject(o
); 
3746             o 
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
)); 
3747             decrRefCount(decoded
); 
3748             dictReplace(c
->db
->dict
,c
->argv
[1],o
); 
3751         if (c
->argv
[2]->encoding 
== REDIS_ENCODING_RAW
) { 
3752             o
->ptr 
= sdscatlen(o
->ptr
, 
3753                 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
)); 
3755             o
->ptr 
= sdscatprintf(o
->ptr
, "%ld", 
3756                 (unsigned long) c
->argv
[2]->ptr
); 
3758         totlen 
= sdslen(o
->ptr
); 
3761     addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
)); 
3764 /* ========================= Type agnostic commands ========================= */ 
3766 static void delCommand(redisClient 
*c
) { 
3769     for (j 
= 1; j 
< c
->argc
; j
++) { 
3770         if (deleteKey(c
->db
,c
->argv
[j
])) { 
3777         addReply(c
,shared
.czero
); 
3780         addReply(c
,shared
.cone
); 
3783         addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",deleted
)); 
3788 static void existsCommand(redisClient 
*c
) { 
3789     addReply(c
,lookupKeyRead(c
->db
,c
->argv
[1]) ? shared
.cone 
: shared
.czero
); 
3792 static void selectCommand(redisClient 
*c
) { 
3793     int id 
= atoi(c
->argv
[1]->ptr
); 
3795     if (selectDb(c
,id
) == REDIS_ERR
) { 
3796         addReplySds(c
,sdsnew("-ERR invalid DB index\r\n")); 
3798         addReply(c
,shared
.ok
); 
3802 static void randomkeyCommand(redisClient 
*c
) { 
3806         de 
= dictGetRandomKey(c
->db
->dict
); 
3807         if (!de 
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break; 
3810         addReply(c
,shared
.plus
); 
3811         addReply(c
,shared
.crlf
); 
3813         addReply(c
,shared
.plus
); 
3814         addReply(c
,dictGetEntryKey(de
)); 
3815         addReply(c
,shared
.crlf
); 
3819 static void keysCommand(redisClient 
*c
) { 
3822     sds pattern 
= c
->argv
[1]->ptr
; 
3823     int plen 
= sdslen(pattern
); 
3824     unsigned long numkeys 
= 0; 
3825     robj 
*lenobj 
= createObject(REDIS_STRING
,NULL
); 
3827     di 
= dictGetIterator(c
->db
->dict
); 
3829     decrRefCount(lenobj
); 
3830     while((de 
= dictNext(di
)) != NULL
) { 
3831         robj 
*keyobj 
= dictGetEntryKey(de
); 
3833         sds key 
= keyobj
->ptr
; 
3834         if ((pattern
[0] == '*' && pattern
[1] == '\0') || 
3835             stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) { 
3836             if (expireIfNeeded(c
->db
,keyobj
) == 0) { 
3837                 addReplyBulkLen(c
,keyobj
); 
3839                 addReply(c
,shared
.crlf
); 
3844     dictReleaseIterator(di
); 
3845     lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
); 
3848 static void dbsizeCommand(redisClient 
*c
) { 
3850         sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
))); 
3853 static void lastsaveCommand(redisClient 
*c
) { 
3855         sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
)); 
3858 static void typeCommand(redisClient 
*c
) { 
3862     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
3867         case REDIS_STRING
: type 
= "+string"; break; 
3868         case REDIS_LIST
: type 
= "+list"; break; 
3869         case REDIS_SET
: type 
= "+set"; break; 
3870         case REDIS_ZSET
: type 
= "+zset"; break; 
3871         default: type 
= "unknown"; break; 
3874     addReplySds(c
,sdsnew(type
)); 
3875     addReply(c
,shared
.crlf
); 
3878 static void saveCommand(redisClient 
*c
) { 
3879     if (server
.bgsavechildpid 
!= -1) { 
3880         addReplySds(c
,sdsnew("-ERR background save in progress\r\n")); 
3883     if (rdbSave(server
.dbfilename
) == REDIS_OK
) { 
3884         addReply(c
,shared
.ok
); 
3886         addReply(c
,shared
.err
); 
3890 static void bgsaveCommand(redisClient 
*c
) { 
3891     if (server
.bgsavechildpid 
!= -1) { 
3892         addReplySds(c
,sdsnew("-ERR background save already in progress\r\n")); 
3895     if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) { 
3896         char *status 
= "+Background saving started\r\n"; 
3897         addReplySds(c
,sdsnew(status
)); 
3899         addReply(c
,shared
.err
); 
3903 static void shutdownCommand(redisClient 
*c
) { 
3904     redisLog(REDIS_WARNING
,"User requested shutdown, saving DB..."); 
3905     /* Kill the saving child if there is a background saving in progress. 
3906        We want to avoid race conditions, for instance our saving child may 
3907        overwrite the synchronous saving did by SHUTDOWN. */ 
3908     if (server
.bgsavechildpid 
!= -1) { 
3909         redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!"); 
3910         kill(server
.bgsavechildpid
,SIGKILL
); 
3911         rdbRemoveTempFile(server
.bgsavechildpid
); 
3913     if (server
.appendonly
) { 
3914         /* Append only file: fsync() the AOF and exit */ 
3915         fsync(server
.appendfd
); 
3916         if (server
.vm_enabled
) unlink(server
.vm_swap_file
); 
3919         /* Snapshotting. Perform a SYNC SAVE and exit */ 
3920         if (rdbSave(server
.dbfilename
) == REDIS_OK
) { 
3921             if (server
.daemonize
) 
3922                 unlink(server
.pidfile
); 
3923             redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory()); 
3924             redisLog(REDIS_WARNING
,"Server exit now, bye bye..."); 
3925             if (server
.vm_enabled
) unlink(server
.vm_swap_file
); 
3928             /* Ooops.. error saving! The best we can do is to continue operating. 
3929              * Note that if there was a background saving process, in the next 
3930              * cron() Redis will be notified that the background saving aborted, 
3931              * handling special stuff like slaves pending for synchronization... */ 
3932             redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");  
3933             addReplySds(c
,sdsnew("-ERR can't quit, problems saving the DB\r\n")); 
3938 static void renameGenericCommand(redisClient 
*c
, int nx
) { 
3941     /* To use the same key as src and dst is probably an error */ 
3942     if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) { 
3943         addReply(c
,shared
.sameobjecterr
); 
3947     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
3949         addReply(c
,shared
.nokeyerr
); 
3953     deleteIfVolatile(c
->db
,c
->argv
[2]); 
3954     if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) { 
3957             addReply(c
,shared
.czero
); 
3960         dictReplace(c
->db
->dict
,c
->argv
[2],o
); 
3962         incrRefCount(c
->argv
[2]); 
3964     deleteKey(c
->db
,c
->argv
[1]); 
3966     addReply(c
,nx 
? shared
.cone 
: shared
.ok
); 
3969 static void renameCommand(redisClient 
*c
) { 
3970     renameGenericCommand(c
,0); 
3973 static void renamenxCommand(redisClient 
*c
) { 
3974     renameGenericCommand(c
,1); 
3977 static void moveCommand(redisClient 
*c
) { 
3982     /* Obtain source and target DB pointers */ 
3985     if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) { 
3986         addReply(c
,shared
.outofrangeerr
); 
3990     selectDb(c
,srcid
); /* Back to the source DB */ 
3992     /* If the user is moving using as target the same 
3993      * DB as the source DB it is probably an error. */ 
3995         addReply(c
,shared
.sameobjecterr
); 
3999     /* Check if the element exists and get a reference */ 
4000     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4002         addReply(c
,shared
.czero
); 
4006     /* Try to add the element to the target DB */ 
4007     deleteIfVolatile(dst
,c
->argv
[1]); 
4008     if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) { 
4009         addReply(c
,shared
.czero
); 
4012     incrRefCount(c
->argv
[1]); 
4015     /* OK! key moved, free the entry in the source DB */ 
4016     deleteKey(src
,c
->argv
[1]); 
4018     addReply(c
,shared
.cone
); 
4021 /* =================================== Lists ================================ */ 
4022 static void pushGenericCommand(redisClient 
*c
, int where
) { 
4026     lobj 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4028         if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) { 
4029             addReply(c
,shared
.ok
); 
4032         lobj 
= createListObject(); 
4034         if (where 
== REDIS_HEAD
) { 
4035             listAddNodeHead(list
,c
->argv
[2]); 
4037             listAddNodeTail(list
,c
->argv
[2]); 
4039         dictAdd(c
->db
->dict
,c
->argv
[1],lobj
); 
4040         incrRefCount(c
->argv
[1]); 
4041         incrRefCount(c
->argv
[2]); 
4043         if (lobj
->type 
!= REDIS_LIST
) { 
4044             addReply(c
,shared
.wrongtypeerr
); 
4047         if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) { 
4048             addReply(c
,shared
.ok
); 
4052         if (where 
== REDIS_HEAD
) { 
4053             listAddNodeHead(list
,c
->argv
[2]); 
4055             listAddNodeTail(list
,c
->argv
[2]); 
4057         incrRefCount(c
->argv
[2]); 
4060     addReply(c
,shared
.ok
); 
4063 static void lpushCommand(redisClient 
*c
) { 
4064     pushGenericCommand(c
,REDIS_HEAD
); 
4067 static void rpushCommand(redisClient 
*c
) { 
4068     pushGenericCommand(c
,REDIS_TAIL
); 
4071 static void llenCommand(redisClient 
*c
) { 
4075     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
4077         addReply(c
,shared
.czero
); 
4080         if (o
->type 
!= REDIS_LIST
) { 
4081             addReply(c
,shared
.wrongtypeerr
); 
4084             addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",listLength(l
))); 
4089 static void lindexCommand(redisClient 
*c
) { 
4091     int index 
= atoi(c
->argv
[2]->ptr
); 
4093     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
4095         addReply(c
,shared
.nullbulk
); 
4097         if (o
->type 
!= REDIS_LIST
) { 
4098             addReply(c
,shared
.wrongtypeerr
); 
4100             list 
*list 
= o
->ptr
; 
4103             ln 
= listIndex(list
, index
); 
4105                 addReply(c
,shared
.nullbulk
); 
4107                 robj 
*ele 
= listNodeValue(ln
); 
4108                 addReplyBulkLen(c
,ele
); 
4110                 addReply(c
,shared
.crlf
); 
4116 static void lsetCommand(redisClient 
*c
) { 
4118     int index 
= atoi(c
->argv
[2]->ptr
); 
4120     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4122         addReply(c
,shared
.nokeyerr
); 
4124         if (o
->type 
!= REDIS_LIST
) { 
4125             addReply(c
,shared
.wrongtypeerr
); 
4127             list 
*list 
= o
->ptr
; 
4130             ln 
= listIndex(list
, index
); 
4132                 addReply(c
,shared
.outofrangeerr
); 
4134                 robj 
*ele 
= listNodeValue(ln
); 
4137                 listNodeValue(ln
) = c
->argv
[3]; 
4138                 incrRefCount(c
->argv
[3]); 
4139                 addReply(c
,shared
.ok
); 
4146 static void popGenericCommand(redisClient 
*c
, int where
) { 
4149     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4151         addReply(c
,shared
.nullbulk
); 
4153         if (o
->type 
!= REDIS_LIST
) { 
4154             addReply(c
,shared
.wrongtypeerr
); 
4156             list 
*list 
= o
->ptr
; 
4159             if (where 
== REDIS_HEAD
) 
4160                 ln 
= listFirst(list
); 
4162                 ln 
= listLast(list
); 
4165                 addReply(c
,shared
.nullbulk
); 
4167                 robj 
*ele 
= listNodeValue(ln
); 
4168                 addReplyBulkLen(c
,ele
); 
4170                 addReply(c
,shared
.crlf
); 
4171                 listDelNode(list
,ln
); 
4178 static void lpopCommand(redisClient 
*c
) { 
4179     popGenericCommand(c
,REDIS_HEAD
); 
4182 static void rpopCommand(redisClient 
*c
) { 
4183     popGenericCommand(c
,REDIS_TAIL
); 
4186 static void lrangeCommand(redisClient 
*c
) { 
4188     int start 
= atoi(c
->argv
[2]->ptr
); 
4189     int end 
= atoi(c
->argv
[3]->ptr
); 
4191     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
4193         addReply(c
,shared
.nullmultibulk
); 
4195         if (o
->type 
!= REDIS_LIST
) { 
4196             addReply(c
,shared
.wrongtypeerr
); 
4198             list 
*list 
= o
->ptr
; 
4200             int llen 
= listLength(list
); 
4204             /* convert negative indexes */ 
4205             if (start 
< 0) start 
= llen
+start
; 
4206             if (end 
< 0) end 
= llen
+end
; 
4207             if (start 
< 0) start 
= 0; 
4208             if (end 
< 0) end 
= 0; 
4210             /* indexes sanity checks */ 
4211             if (start 
> end 
|| start 
>= llen
) { 
4212                 /* Out of range start or start > end result in empty list */ 
4213                 addReply(c
,shared
.emptymultibulk
); 
4216             if (end 
>= llen
) end 
= llen
-1; 
4217             rangelen 
= (end
-start
)+1; 
4219             /* Return the result in form of a multi-bulk reply */ 
4220             ln 
= listIndex(list
, start
); 
4221             addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
)); 
4222             for (j 
= 0; j 
< rangelen
; j
++) { 
4223                 ele 
= listNodeValue(ln
); 
4224                 addReplyBulkLen(c
,ele
); 
4226                 addReply(c
,shared
.crlf
); 
4233 static void ltrimCommand(redisClient 
*c
) { 
4235     int start 
= atoi(c
->argv
[2]->ptr
); 
4236     int end 
= atoi(c
->argv
[3]->ptr
); 
4238     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4240         addReply(c
,shared
.ok
); 
4242         if (o
->type 
!= REDIS_LIST
) { 
4243             addReply(c
,shared
.wrongtypeerr
); 
4245             list 
*list 
= o
->ptr
; 
4247             int llen 
= listLength(list
); 
4248             int j
, ltrim
, rtrim
; 
4250             /* convert negative indexes */ 
4251             if (start 
< 0) start 
= llen
+start
; 
4252             if (end 
< 0) end 
= llen
+end
; 
4253             if (start 
< 0) start 
= 0; 
4254             if (end 
< 0) end 
= 0; 
4256             /* indexes sanity checks */ 
4257             if (start 
> end 
|| start 
>= llen
) { 
4258                 /* Out of range start or start > end result in empty list */ 
4262                 if (end 
>= llen
) end 
= llen
-1; 
4267             /* Remove list elements to perform the trim */ 
4268             for (j 
= 0; j 
< ltrim
; j
++) { 
4269                 ln 
= listFirst(list
); 
4270                 listDelNode(list
,ln
); 
4272             for (j 
= 0; j 
< rtrim
; j
++) { 
4273                 ln 
= listLast(list
); 
4274                 listDelNode(list
,ln
); 
4277             addReply(c
,shared
.ok
); 
4282 static void lremCommand(redisClient 
*c
) { 
4285     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4287         addReply(c
,shared
.czero
); 
4289         if (o
->type 
!= REDIS_LIST
) { 
4290             addReply(c
,shared
.wrongtypeerr
); 
4292             list 
*list 
= o
->ptr
; 
4293             listNode 
*ln
, *next
; 
4294             int toremove 
= atoi(c
->argv
[2]->ptr
); 
4299                 toremove 
= -toremove
; 
4302             ln 
= fromtail 
? list
->tail 
: list
->head
; 
4304                 robj 
*ele 
= listNodeValue(ln
); 
4306                 next 
= fromtail 
? ln
->prev 
: ln
->next
; 
4307                 if (compareStringObjects(ele
,c
->argv
[3]) == 0) { 
4308                     listDelNode(list
,ln
); 
4311                     if (toremove 
&& removed 
== toremove
) break; 
4315             addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
)); 
4320 /* This is the semantic of this command: 
4321  *  RPOPLPUSH srclist dstlist: 
4322  *   IF LLEN(srclist) > 0 
4323  *     element = RPOP srclist 
4324  *     LPUSH dstlist element 
4331  * The idea is to be able to get an element from a list in a reliable way 
4332  * since the element is not just returned but pushed against another list 
4333  * as well. This command was originally proposed by Ezra Zygmuntowicz. 
4335 static void rpoplpushcommand(redisClient 
*c
) { 
4338     sobj 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4340         addReply(c
,shared
.nullbulk
); 
4342         if (sobj
->type 
!= REDIS_LIST
) { 
4343             addReply(c
,shared
.wrongtypeerr
); 
4345             list 
*srclist 
= sobj
->ptr
; 
4346             listNode 
*ln 
= listLast(srclist
); 
4349                 addReply(c
,shared
.nullbulk
); 
4351                 robj 
*dobj 
= lookupKeyWrite(c
->db
,c
->argv
[2]); 
4352                 robj 
*ele 
= listNodeValue(ln
); 
4355                 if (dobj 
&& dobj
->type 
!= REDIS_LIST
) { 
4356                     addReply(c
,shared
.wrongtypeerr
); 
4360                 /* Add the element to the target list (unless it's directly 
4361                  * passed to some BLPOP-ing client */ 
4362                 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) { 
4364                         /* Create the list if the key does not exist */ 
4365                         dobj 
= createListObject(); 
4366                         dictAdd(c
->db
->dict
,c
->argv
[2],dobj
); 
4367                         incrRefCount(c
->argv
[2]); 
4369                     dstlist 
= dobj
->ptr
; 
4370                     listAddNodeHead(dstlist
,ele
); 
4374                 /* Send the element to the client as reply as well */ 
4375                 addReplyBulkLen(c
,ele
); 
4377                 addReply(c
,shared
.crlf
); 
4379                 /* Finally remove the element from the source list */ 
4380                 listDelNode(srclist
,ln
); 
4388 /* ==================================== Sets ================================ */ 
4390 static void saddCommand(redisClient 
*c
) { 
4393     set 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4395         set 
= createSetObject(); 
4396         dictAdd(c
->db
->dict
,c
->argv
[1],set
); 
4397         incrRefCount(c
->argv
[1]); 
4399         if (set
->type 
!= REDIS_SET
) { 
4400             addReply(c
,shared
.wrongtypeerr
); 
4404     if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) { 
4405         incrRefCount(c
->argv
[2]); 
4407         addReply(c
,shared
.cone
); 
4409         addReply(c
,shared
.czero
); 
4413 static void sremCommand(redisClient 
*c
) { 
4416     set 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4418         addReply(c
,shared
.czero
); 
4420         if (set
->type 
!= REDIS_SET
) { 
4421             addReply(c
,shared
.wrongtypeerr
); 
4424         if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) { 
4426             if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
); 
4427             addReply(c
,shared
.cone
); 
4429             addReply(c
,shared
.czero
); 
4434 static void smoveCommand(redisClient 
*c
) { 
4435     robj 
*srcset
, *dstset
; 
4437     srcset 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4438     dstset 
= lookupKeyWrite(c
->db
,c
->argv
[2]); 
4440     /* If the source key does not exist return 0, if it's of the wrong type 
4442     if (srcset 
== NULL 
|| srcset
->type 
!= REDIS_SET
) { 
4443         addReply(c
, srcset 
? shared
.wrongtypeerr 
: shared
.czero
); 
4446     /* Error if the destination key is not a set as well */ 
4447     if (dstset 
&& dstset
->type 
!= REDIS_SET
) { 
4448         addReply(c
,shared
.wrongtypeerr
); 
4451     /* Remove the element from the source set */ 
4452     if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) { 
4453         /* Key not found in the src set! return zero */ 
4454         addReply(c
,shared
.czero
); 
4458     /* Add the element to the destination set */ 
4460         dstset 
= createSetObject(); 
4461         dictAdd(c
->db
->dict
,c
->argv
[2],dstset
); 
4462         incrRefCount(c
->argv
[2]); 
4464     if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
) 
4465         incrRefCount(c
->argv
[3]); 
4466     addReply(c
,shared
.cone
); 
4469 static void sismemberCommand(redisClient 
*c
) { 
4472     set 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
4474         addReply(c
,shared
.czero
); 
4476         if (set
->type 
!= REDIS_SET
) { 
4477             addReply(c
,shared
.wrongtypeerr
); 
4480         if (dictFind(set
->ptr
,c
->argv
[2])) 
4481             addReply(c
,shared
.cone
); 
4483             addReply(c
,shared
.czero
); 
4487 static void scardCommand(redisClient 
*c
) { 
4491     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
4493         addReply(c
,shared
.czero
); 
4496         if (o
->type 
!= REDIS_SET
) { 
4497             addReply(c
,shared
.wrongtypeerr
); 
4500             addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n", 
4506 static void spopCommand(redisClient 
*c
) { 
4510     set 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4512         addReply(c
,shared
.nullbulk
); 
4514         if (set
->type 
!= REDIS_SET
) { 
4515             addReply(c
,shared
.wrongtypeerr
); 
4518         de 
= dictGetRandomKey(set
->ptr
); 
4520             addReply(c
,shared
.nullbulk
); 
4522             robj 
*ele 
= dictGetEntryKey(de
); 
4524             addReplyBulkLen(c
,ele
); 
4526             addReply(c
,shared
.crlf
); 
4527             dictDelete(set
->ptr
,ele
); 
4528             if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
); 
4534 static void srandmemberCommand(redisClient 
*c
) { 
4538     set 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
4540         addReply(c
,shared
.nullbulk
); 
4542         if (set
->type 
!= REDIS_SET
) { 
4543             addReply(c
,shared
.wrongtypeerr
); 
4546         de 
= dictGetRandomKey(set
->ptr
); 
4548             addReply(c
,shared
.nullbulk
); 
4550             robj 
*ele 
= dictGetEntryKey(de
); 
4552             addReplyBulkLen(c
,ele
); 
4554             addReply(c
,shared
.crlf
); 
4559 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) { 
4560     dict 
**d1 
= (void*) s1
, **d2 
= (void*) s2
; 
4562     return dictSize(*d1
)-dictSize(*d2
); 
4565 static void sinterGenericCommand(redisClient 
*c
, robj 
**setskeys
, unsigned long setsnum
, robj 
*dstkey
) { 
4566     dict 
**dv 
= zmalloc(sizeof(dict
*)*setsnum
); 
4569     robj 
*lenobj 
= NULL
, *dstset 
= NULL
; 
4570     unsigned long j
, cardinality 
= 0; 
4572     for (j 
= 0; j 
< setsnum
; j
++) { 
4576                     lookupKeyWrite(c
->db
,setskeys
[j
]) : 
4577                     lookupKeyRead(c
->db
,setskeys
[j
]); 
4581                 if (deleteKey(c
->db
,dstkey
)) 
4583                 addReply(c
,shared
.czero
); 
4585                 addReply(c
,shared
.nullmultibulk
); 
4589         if (setobj
->type 
!= REDIS_SET
) { 
4591             addReply(c
,shared
.wrongtypeerr
); 
4594         dv
[j
] = setobj
->ptr
; 
4596     /* Sort sets from the smallest to largest, this will improve our 
4597      * algorithm's performace */ 
4598     qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
); 
4600     /* The first thing we should output is the total number of elements... 
4601      * since this is a multi-bulk write, but at this stage we don't know 
4602      * the intersection set size, so we use a trick, append an empty object 
4603      * to the output list and save the pointer to later modify it with the 
4606         lenobj 
= createObject(REDIS_STRING
,NULL
); 
4608         decrRefCount(lenobj
); 
4610         /* If we have a target key where to store the resulting set 
4611          * create this key with an empty set inside */ 
4612         dstset 
= createSetObject(); 
4615     /* Iterate all the elements of the first (smallest) set, and test 
4616      * the element against all the other sets, if at least one set does 
4617      * not include the element it is discarded */ 
4618     di 
= dictGetIterator(dv
[0]); 
4620     while((de 
= dictNext(di
)) != NULL
) { 
4623         for (j 
= 1; j 
< setsnum
; j
++) 
4624             if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break; 
4626             continue; /* at least one set does not contain the member */ 
4627         ele 
= dictGetEntryKey(de
); 
4629             addReplyBulkLen(c
,ele
); 
4631             addReply(c
,shared
.crlf
); 
4634             dictAdd(dstset
->ptr
,ele
,NULL
); 
4638     dictReleaseIterator(di
); 
4641         /* Store the resulting set into the target */ 
4642         deleteKey(c
->db
,dstkey
); 
4643         dictAdd(c
->db
->dict
,dstkey
,dstset
); 
4644         incrRefCount(dstkey
); 
4648         lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
); 
4650         addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n", 
4651             dictSize((dict
*)dstset
->ptr
))); 
4657 static void sinterCommand(redisClient 
*c
) { 
4658     sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
); 
4661 static void sinterstoreCommand(redisClient 
*c
) { 
4662     sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]); 
4665 #define REDIS_OP_UNION 0 
4666 #define REDIS_OP_DIFF 1 
4668 static void sunionDiffGenericCommand(redisClient 
*c
, robj 
**setskeys
, int setsnum
, robj 
*dstkey
, int op
) { 
4669     dict 
**dv 
= zmalloc(sizeof(dict
*)*setsnum
); 
4672     robj 
*dstset 
= NULL
; 
4673     int j
, cardinality 
= 0; 
4675     for (j 
= 0; j 
< setsnum
; j
++) { 
4679                     lookupKeyWrite(c
->db
,setskeys
[j
]) : 
4680                     lookupKeyRead(c
->db
,setskeys
[j
]); 
4685         if (setobj
->type 
!= REDIS_SET
) { 
4687             addReply(c
,shared
.wrongtypeerr
); 
4690         dv
[j
] = setobj
->ptr
; 
4693     /* We need a temp set object to store our union. If the dstkey 
4694      * is not NULL (that is, we are inside an SUNIONSTORE operation) then 
4695      * this set object will be the resulting object to set into the target key*/ 
4696     dstset 
= createSetObject(); 
4698     /* Iterate all the elements of all the sets, add every element a single 
4699      * time to the result set */ 
4700     for (j 
= 0; j 
< setsnum
; j
++) { 
4701         if (op 
== REDIS_OP_DIFF 
&& j 
== 0 && !dv
[j
]) break; /* result set is empty */ 
4702         if (!dv
[j
]) continue; /* non existing keys are like empty sets */ 
4704         di 
= dictGetIterator(dv
[j
]); 
4706         while((de 
= dictNext(di
)) != NULL
) { 
4709             /* dictAdd will not add the same element multiple times */ 
4710             ele 
= dictGetEntryKey(de
); 
4711             if (op 
== REDIS_OP_UNION 
|| j 
== 0) { 
4712                 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) { 
4716             } else if (op 
== REDIS_OP_DIFF
) { 
4717                 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) { 
4722         dictReleaseIterator(di
); 
4724         if (op 
== REDIS_OP_DIFF 
&& cardinality 
== 0) break; /* result set is empty */ 
4727     /* Output the content of the resulting set, if not in STORE mode */ 
4729         addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
)); 
4730         di 
= dictGetIterator(dstset
->ptr
); 
4731         while((de 
= dictNext(di
)) != NULL
) { 
4734             ele 
= dictGetEntryKey(de
); 
4735             addReplyBulkLen(c
,ele
); 
4737             addReply(c
,shared
.crlf
); 
4739         dictReleaseIterator(di
); 
4741         /* If we have a target key where to store the resulting set 
4742          * create this key with the result set inside */ 
4743         deleteKey(c
->db
,dstkey
); 
4744         dictAdd(c
->db
->dict
,dstkey
,dstset
); 
4745         incrRefCount(dstkey
); 
4750         decrRefCount(dstset
); 
4752         addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n", 
4753             dictSize((dict
*)dstset
->ptr
))); 
4759 static void sunionCommand(redisClient 
*c
) { 
4760     sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
); 
4763 static void sunionstoreCommand(redisClient 
*c
) { 
4764     sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
); 
4767 static void sdiffCommand(redisClient 
*c
) { 
4768     sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
); 
4771 static void sdiffstoreCommand(redisClient 
*c
) { 
4772     sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
); 
4775 /* ==================================== ZSets =============================== */ 
4777 /* ZSETs are ordered sets using two data structures to hold the same elements 
4778  * in order to get O(log(N)) INSERT and REMOVE operations into a sorted 
4781  * The elements are added to an hash table mapping Redis objects to scores. 
4782  * At the same time the elements are added to a skip list mapping scores 
4783  * to Redis objects (so objects are sorted by scores in this "view"). */ 
4785 /* This skiplist implementation is almost a C translation of the original 
4786  * algorithm described by William Pugh in "Skip Lists: A Probabilistic 
4787  * Alternative to Balanced Trees", modified in three ways: 
4788  * a) this implementation allows for repeated values. 
4789  * b) the comparison is not just by key (our 'score') but by satellite data. 
4790  * c) there is a back pointer, so it's a doubly linked list with the back 
4791  * pointers being only at "level 1". This allows to traverse the list 
4792  * from tail to head, useful for ZREVRANGE. */ 
4794 static zskiplistNode 
*zslCreateNode(int level
, double score
, robj 
*obj
) { 
4795     zskiplistNode 
*zn 
= zmalloc(sizeof(*zn
)); 
4797     zn
->forward 
= zmalloc(sizeof(zskiplistNode
*) * level
); 
4803 static zskiplist 
*zslCreate(void) { 
4807     zsl 
= zmalloc(sizeof(*zsl
)); 
4810     zsl
->header 
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
); 
4811     for (j 
= 0; j 
< ZSKIPLIST_MAXLEVEL
; j
++) 
4812         zsl
->header
->forward
[j
] = NULL
; 
4813     zsl
->header
->backward 
= NULL
; 
4818 static void zslFreeNode(zskiplistNode 
*node
) { 
4819     decrRefCount(node
->obj
); 
4820     zfree(node
->forward
); 
4824 static void zslFree(zskiplist 
*zsl
) { 
4825     zskiplistNode 
*node 
= zsl
->header
->forward
[0], *next
; 
4827     zfree(zsl
->header
->forward
); 
4830         next 
= node
->forward
[0]; 
4837 static int zslRandomLevel(void) { 
4839     while ((random()&0xFFFF) < (ZSKIPLIST_P 
* 0xFFFF)) 
4844 static void zslInsert(zskiplist 
*zsl
, double score
, robj 
*obj
) { 
4845     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
4849     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
4850         while (x
->forward
[i
] && 
4851             (x
->forward
[i
]->score 
< score 
|| 
4852                 (x
->forward
[i
]->score 
== score 
&& 
4853                 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) 
4857     /* we assume the key is not already inside, since we allow duplicated 
4858      * scores, and the re-insertion of score and redis object should never 
4859      * happpen since the caller of zslInsert() should test in the hash table 
4860      * if the element is already inside or not. */ 
4861     level 
= zslRandomLevel(); 
4862     if (level 
> zsl
->level
) { 
4863         for (i 
= zsl
->level
; i 
< level
; i
++) 
4864             update
[i
] = zsl
->header
; 
4867     x 
= zslCreateNode(level
,score
,obj
); 
4868     for (i 
= 0; i 
< level
; i
++) { 
4869         x
->forward
[i
] = update
[i
]->forward
[i
]; 
4870         update
[i
]->forward
[i
] = x
; 
4872     x
->backward 
= (update
[0] == zsl
->header
) ? NULL 
: update
[0]; 
4874         x
->forward
[0]->backward 
= x
; 
4880 /* Delete an element with matching score/object from the skiplist. */ 
4881 static int zslDelete(zskiplist 
*zsl
, double score
, robj 
*obj
) { 
4882     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
4886     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
4887         while (x
->forward
[i
] && 
4888             (x
->forward
[i
]->score 
< score 
|| 
4889                 (x
->forward
[i
]->score 
== score 
&& 
4890                 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) 
4894     /* We may have multiple elements with the same score, what we need 
4895      * is to find the element with both the right score and object. */ 
4897     if (x 
&& score 
== x
->score 
&& compareStringObjects(x
->obj
,obj
) == 0) { 
4898         for (i 
= 0; i 
< zsl
->level
; i
++) { 
4899             if (update
[i
]->forward
[i
] != x
) break; 
4900             update
[i
]->forward
[i
] = x
->forward
[i
]; 
4902         if (x
->forward
[0]) { 
4903             x
->forward
[0]->backward 
= (x
->backward 
== zsl
->header
) ? 
4906             zsl
->tail 
= x
->backward
; 
4909         while(zsl
->level 
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
) 
4914         return 0; /* not found */ 
4916     return 0; /* not found */ 
4919 /* Delete all the elements with score between min and max from the skiplist. 
4920  * Min and mx are inclusive, so a score >= min || score <= max is deleted. 
4921  * Note that this function takes the reference to the hash table view of the 
4922  * sorted set, in order to remove the elements from the hash table too. */ 
4923 static unsigned long zslDeleteRange(zskiplist 
*zsl
, double min
, double max
, dict 
*dict
) { 
4924     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
4925     unsigned long removed 
= 0; 
4929     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
4930         while (x
->forward
[i
] && x
->forward
[i
]->score 
< min
) 
4934     /* We may have multiple elements with the same score, what we need 
4935      * is to find the element with both the right score and object. */ 
4937     while (x 
&& x
->score 
<= max
) { 
4938         zskiplistNode 
*next
; 
4940         for (i 
= 0; i 
< zsl
->level
; i
++) { 
4941             if (update
[i
]->forward
[i
] != x
) break; 
4942             update
[i
]->forward
[i
] = x
->forward
[i
]; 
4944         if (x
->forward
[0]) { 
4945             x
->forward
[0]->backward 
= (x
->backward 
== zsl
->header
) ? 
4948             zsl
->tail 
= x
->backward
; 
4950         next 
= x
->forward
[0]; 
4951         dictDelete(dict
,x
->obj
); 
4953         while(zsl
->level 
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
) 
4959     return removed
; /* not found */ 
4962 /* Find the first node having a score equal or greater than the specified one. 
4963  * Returns NULL if there is no match. */ 
4964 static zskiplistNode 
*zslFirstWithScore(zskiplist 
*zsl
, double score
) { 
4969     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
4970         while (x
->forward
[i
] && x
->forward
[i
]->score 
< score
) 
4973     /* We may have multiple elements with the same score, what we need 
4974      * is to find the element with both the right score and object. */ 
4975     return x
->forward
[0]; 
4978 /* The actual Z-commands implementations */ 
4980 /* This generic command implements both ZADD and ZINCRBY. 
4981  * scoreval is the score if the operation is a ZADD (doincrement == 0) or 
4982  * the increment if the operation is a ZINCRBY (doincrement == 1). */ 
4983 static void zaddGenericCommand(redisClient 
*c
, robj 
*key
, robj 
*ele
, double scoreval
, int doincrement
) { 
4988     zsetobj 
= lookupKeyWrite(c
->db
,key
); 
4989     if (zsetobj 
== NULL
) { 
4990         zsetobj 
= createZsetObject(); 
4991         dictAdd(c
->db
->dict
,key
,zsetobj
); 
4994         if (zsetobj
->type 
!= REDIS_ZSET
) { 
4995             addReply(c
,shared
.wrongtypeerr
); 
5001     /* Ok now since we implement both ZADD and ZINCRBY here the code 
5002      * needs to handle the two different conditions. It's all about setting 
5003      * '*score', that is, the new score to set, to the right value. */ 
5004     score 
= zmalloc(sizeof(double)); 
5008         /* Read the old score. If the element was not present starts from 0 */ 
5009         de 
= dictFind(zs
->dict
,ele
); 
5011             double *oldscore 
= dictGetEntryVal(de
); 
5012             *score 
= *oldscore 
+ scoreval
; 
5020     /* What follows is a simple remove and re-insert operation that is common 
5021      * to both ZADD and ZINCRBY... */ 
5022     if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) { 
5023         /* case 1: New element */ 
5024         incrRefCount(ele
); /* added to hash */ 
5025         zslInsert(zs
->zsl
,*score
,ele
); 
5026         incrRefCount(ele
); /* added to skiplist */ 
5029             addReplyDouble(c
,*score
); 
5031             addReply(c
,shared
.cone
); 
5036         /* case 2: Score update operation */ 
5037         de 
= dictFind(zs
->dict
,ele
); 
5038         redisAssert(de 
!= NULL
); 
5039         oldscore 
= dictGetEntryVal(de
); 
5040         if (*score 
!= *oldscore
) { 
5043             /* Remove and insert the element in the skip list with new score */ 
5044             deleted 
= zslDelete(zs
->zsl
,*oldscore
,ele
); 
5045             redisAssert(deleted 
!= 0); 
5046             zslInsert(zs
->zsl
,*score
,ele
); 
5048             /* Update the score in the hash table */ 
5049             dictReplace(zs
->dict
,ele
,score
); 
5055             addReplyDouble(c
,*score
); 
5057             addReply(c
,shared
.czero
); 
5061 static void zaddCommand(redisClient 
*c
) { 
5064     scoreval 
= strtod(c
->argv
[2]->ptr
,NULL
); 
5065     zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0); 
5068 static void zincrbyCommand(redisClient 
*c
) { 
5071     scoreval 
= strtod(c
->argv
[2]->ptr
,NULL
); 
5072     zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1); 
5075 static void zremCommand(redisClient 
*c
) { 
5079     zsetobj 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
5080     if (zsetobj 
== NULL
) { 
5081         addReply(c
,shared
.czero
); 
5087         if (zsetobj
->type 
!= REDIS_ZSET
) { 
5088             addReply(c
,shared
.wrongtypeerr
); 
5092         de 
= dictFind(zs
->dict
,c
->argv
[2]); 
5094             addReply(c
,shared
.czero
); 
5097         /* Delete from the skiplist */ 
5098         oldscore 
= dictGetEntryVal(de
); 
5099         deleted 
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]); 
5100         redisAssert(deleted 
!= 0); 
5102         /* Delete from the hash table */ 
5103         dictDelete(zs
->dict
,c
->argv
[2]); 
5104         if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
); 
5106         addReply(c
,shared
.cone
); 
5110 static void zremrangebyscoreCommand(redisClient 
*c
) { 
5111     double min 
= strtod(c
->argv
[2]->ptr
,NULL
); 
5112     double max 
= strtod(c
->argv
[3]->ptr
,NULL
); 
5116     zsetobj 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
5117     if (zsetobj 
== NULL
) { 
5118         addReply(c
,shared
.czero
); 
5122         if (zsetobj
->type 
!= REDIS_ZSET
) { 
5123             addReply(c
,shared
.wrongtypeerr
); 
5127         deleted 
= zslDeleteRange(zs
->zsl
,min
,max
,zs
->dict
); 
5128         if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
); 
5129         server
.dirty 
+= deleted
; 
5130         addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",deleted
)); 
5134 static void zrangeGenericCommand(redisClient 
*c
, int reverse
) { 
5136     int start 
= atoi(c
->argv
[2]->ptr
); 
5137     int end 
= atoi(c
->argv
[3]->ptr
); 
5140     if (c
->argc 
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) { 
5142     } else if (c
->argc 
>= 5) { 
5143         addReply(c
,shared
.syntaxerr
); 
5147     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
5149         addReply(c
,shared
.nullmultibulk
); 
5151         if (o
->type 
!= REDIS_ZSET
) { 
5152             addReply(c
,shared
.wrongtypeerr
); 
5154             zset 
*zsetobj 
= o
->ptr
; 
5155             zskiplist 
*zsl 
= zsetobj
->zsl
; 
5158             int llen 
= zsl
->length
; 
5162             /* convert negative indexes */ 
5163             if (start 
< 0) start 
= llen
+start
; 
5164             if (end 
< 0) end 
= llen
+end
; 
5165             if (start 
< 0) start 
= 0; 
5166             if (end 
< 0) end 
= 0; 
5168             /* indexes sanity checks */ 
5169             if (start 
> end 
|| start 
>= llen
) { 
5170                 /* Out of range start or start > end result in empty list */ 
5171                 addReply(c
,shared
.emptymultibulk
); 
5174             if (end 
>= llen
) end 
= llen
-1; 
5175             rangelen 
= (end
-start
)+1; 
5177             /* Return the result in form of a multi-bulk reply */ 
5183                 ln 
= zsl
->header
->forward
[0]; 
5185                     ln 
= ln
->forward
[0]; 
5188             addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n", 
5189                 withscores 
? (rangelen
*2) : rangelen
)); 
5190             for (j 
= 0; j 
< rangelen
; j
++) { 
5192                 addReplyBulkLen(c
,ele
); 
5194                 addReply(c
,shared
.crlf
); 
5196                     addReplyDouble(c
,ln
->score
); 
5197                 ln 
= reverse 
? ln
->backward 
: ln
->forward
[0]; 
5203 static void zrangeCommand(redisClient 
*c
) { 
5204     zrangeGenericCommand(c
,0); 
5207 static void zrevrangeCommand(redisClient 
*c
) { 
5208     zrangeGenericCommand(c
,1); 
5211 /* This command implements both ZRANGEBYSCORE and ZCOUNT. 
5212  * If justcount is non-zero, just the count is returned. */ 
5213 static void genericZrangebyscoreCommand(redisClient 
*c
, int justcount
) { 
5216     int minex 
= 0, maxex 
= 0; /* are min or max exclusive? */ 
5217     int offset 
= 0, limit 
= -1; 
5221     /* Parse the min-max interval. If one of the values is prefixed 
5222      * by the "(" character, it's considered "open". For instance 
5223      * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max 
5224      * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */ 
5225     if (((char*)c
->argv
[2]->ptr
)[0] == '(') { 
5226         min 
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
); 
5229         min 
= strtod(c
->argv
[2]->ptr
,NULL
); 
5231     if (((char*)c
->argv
[3]->ptr
)[0] == '(') { 
5232         max 
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
); 
5235         max 
= strtod(c
->argv
[3]->ptr
,NULL
); 
5238     /* Parse "WITHSCORES": note that if the command was called with 
5239      * the name ZCOUNT then we are sure that c->argc == 4, so we'll never 
5240      * enter the following paths to parse WITHSCORES and LIMIT. */ 
5241     if (c
->argc 
== 5 || c
->argc 
== 8) { 
5242         if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0) 
5247     if (c
->argc 
!= (4 + withscores
) && c
->argc 
!= (7 + withscores
)) 
5251             sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n")); 
5256     if (c
->argc 
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) { 
5257         addReply(c
,shared
.syntaxerr
); 
5259     } else if (c
->argc 
== (7 + withscores
)) { 
5260         offset 
= atoi(c
->argv
[5]->ptr
); 
5261         limit 
= atoi(c
->argv
[6]->ptr
); 
5262         if (offset 
< 0) offset 
= 0; 
5265     /* Ok, lookup the key and get the range */ 
5266     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
5268         addReply(c
,justcount 
? shared
.czero 
: shared
.nullmultibulk
); 
5270         if (o
->type 
!= REDIS_ZSET
) { 
5271             addReply(c
,shared
.wrongtypeerr
); 
5273             zset 
*zsetobj 
= o
->ptr
; 
5274             zskiplist 
*zsl 
= zsetobj
->zsl
; 
5276             robj 
*ele
, *lenobj 
= NULL
; 
5277             unsigned long rangelen 
= 0; 
5279             /* Get the first node with the score >= min, or with 
5280              * score > min if 'minex' is true. */ 
5281             ln 
= zslFirstWithScore(zsl
,min
); 
5282             while (minex 
&& ln 
&& ln
->score 
== min
) ln 
= ln
->forward
[0]; 
5285                 /* No element matching the speciifed interval */ 
5286                 addReply(c
,justcount 
? shared
.czero 
: shared
.emptymultibulk
); 
5290             /* We don't know in advance how many matching elements there 
5291              * are in the list, so we push this object that will represent 
5292              * the multi-bulk length in the output buffer, and will "fix" 
5295                 lenobj 
= createObject(REDIS_STRING
,NULL
); 
5297                 decrRefCount(lenobj
); 
5300             while(ln 
&& (maxex 
? (ln
->score 
< max
) : (ln
->score 
<= max
))) { 
5303                     ln 
= ln
->forward
[0]; 
5306                 if (limit 
== 0) break; 
5309                     addReplyBulkLen(c
,ele
); 
5311                     addReply(c
,shared
.crlf
); 
5313                         addReplyDouble(c
,ln
->score
); 
5315                 ln 
= ln
->forward
[0]; 
5317                 if (limit 
> 0) limit
--; 
5320                 addReplyLong(c
,(long)rangelen
); 
5322                 lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n", 
5323                      withscores 
? (rangelen
*2) : rangelen
); 
5329 static void zrangebyscoreCommand(redisClient 
*c
) { 
5330     genericZrangebyscoreCommand(c
,0); 
5333 static void zcountCommand(redisClient 
*c
) { 
5334     genericZrangebyscoreCommand(c
,1); 
5337 static void zcardCommand(redisClient 
*c
) { 
5341     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
5343         addReply(c
,shared
.czero
); 
5346         if (o
->type 
!= REDIS_ZSET
) { 
5347             addReply(c
,shared
.wrongtypeerr
); 
5350             addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",zs
->zsl
->length
)); 
5355 static void zscoreCommand(redisClient 
*c
) { 
5359     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
5361         addReply(c
,shared
.nullbulk
); 
5364         if (o
->type 
!= REDIS_ZSET
) { 
5365             addReply(c
,shared
.wrongtypeerr
); 
5370             de 
= dictFind(zs
->dict
,c
->argv
[2]); 
5372                 addReply(c
,shared
.nullbulk
); 
5374                 double *score 
= dictGetEntryVal(de
); 
5376                 addReplyDouble(c
,*score
); 
5382 /* ========================= Non type-specific commands  ==================== */ 
5384 static void flushdbCommand(redisClient 
*c
) { 
5385     server
.dirty 
+= dictSize(c
->db
->dict
); 
5386     dictEmpty(c
->db
->dict
); 
5387     dictEmpty(c
->db
->expires
); 
5388     addReply(c
,shared
.ok
); 
5391 static void flushallCommand(redisClient 
*c
) { 
5392     server
.dirty 
+= emptyDb(); 
5393     addReply(c
,shared
.ok
); 
5394     rdbSave(server
.dbfilename
); 
5398 static redisSortOperation 
*createSortOperation(int type
, robj 
*pattern
) { 
5399     redisSortOperation 
*so 
= zmalloc(sizeof(*so
)); 
5401     so
->pattern 
= pattern
; 
5405 /* Return the value associated to the key with a name obtained 
5406  * substituting the first occurence of '*' in 'pattern' with 'subst' */ 
5407 static robj 
*lookupKeyByPattern(redisDb 
*db
, robj 
*pattern
, robj 
*subst
) { 
5411     int prefixlen
, sublen
, postfixlen
; 
5412     /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */ 
5416         char buf
[REDIS_SORTKEY_MAX
+1]; 
5419     /* If the pattern is "#" return the substitution object itself in order 
5420      * to implement the "SORT ... GET #" feature. */ 
5421     spat 
= pattern
->ptr
; 
5422     if (spat
[0] == '#' && spat
[1] == '\0') { 
5426     /* The substitution object may be specially encoded. If so we create 
5427      * a decoded object on the fly. Otherwise getDecodedObject will just 
5428      * increment the ref count, that we'll decrement later. */ 
5429     subst 
= getDecodedObject(subst
); 
5432     if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
; 
5433     p 
= strchr(spat
,'*'); 
5435         decrRefCount(subst
); 
5440     sublen 
= sdslen(ssub
); 
5441     postfixlen 
= sdslen(spat
)-(prefixlen
+1); 
5442     memcpy(keyname
.buf
,spat
,prefixlen
); 
5443     memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
); 
5444     memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
); 
5445     keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0'; 
5446     keyname
.len 
= prefixlen
+sublen
+postfixlen
; 
5448     initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2)) 
5449     decrRefCount(subst
); 
5451     /* printf("lookup '%s' => %p\n", keyname.buf,de); */ 
5452     return lookupKeyRead(db
,&keyobj
); 
5455 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with 
5456  * the additional parameter is not standard but a BSD-specific we have to 
5457  * pass sorting parameters via the global 'server' structure */ 
5458 static int sortCompare(const void *s1
, const void *s2
) { 
5459     const redisSortObject 
*so1 
= s1
, *so2 
= s2
; 
5462     if (!server
.sort_alpha
) { 
5463         /* Numeric sorting. Here it's trivial as we precomputed scores */ 
5464         if (so1
->u
.score 
> so2
->u
.score
) { 
5466         } else if (so1
->u
.score 
< so2
->u
.score
) { 
5472         /* Alphanumeric sorting */ 
5473         if (server
.sort_bypattern
) { 
5474             if (!so1
->u
.cmpobj 
|| !so2
->u
.cmpobj
) { 
5475                 /* At least one compare object is NULL */ 
5476                 if (so1
->u
.cmpobj 
== so2
->u
.cmpobj
) 
5478                 else if (so1
->u
.cmpobj 
== NULL
) 
5483                 /* We have both the objects, use strcoll */ 
5484                 cmp 
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
); 
5487             /* Compare elements directly */ 
5490             dec1 
= getDecodedObject(so1
->obj
); 
5491             dec2 
= getDecodedObject(so2
->obj
); 
5492             cmp 
= strcoll(dec1
->ptr
,dec2
->ptr
); 
5497     return server
.sort_desc 
? -cmp 
: cmp
; 
5500 /* The SORT command is the most complex command in Redis. Warning: this code 
5501  * is optimized for speed and a bit less for readability */ 
5502 static void sortCommand(redisClient 
*c
) { 
5505     int desc 
= 0, alpha 
= 0; 
5506     int limit_start 
= 0, limit_count 
= -1, start
, end
; 
5507     int j
, dontsort 
= 0, vectorlen
; 
5508     int getop 
= 0; /* GET operation counter */ 
5509     robj 
*sortval
, *sortby 
= NULL
, *storekey 
= NULL
; 
5510     redisSortObject 
*vector
; /* Resulting vector to sort */ 
5512     /* Lookup the key to sort. It must be of the right types */ 
5513     sortval 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
5514     if (sortval 
== NULL
) { 
5515         addReply(c
,shared
.nullmultibulk
); 
5518     if (sortval
->type 
!= REDIS_SET 
&& sortval
->type 
!= REDIS_LIST 
&& 
5519         sortval
->type 
!= REDIS_ZSET
) 
5521         addReply(c
,shared
.wrongtypeerr
); 
5525     /* Create a list of operations to perform for every sorted element. 
5526      * Operations can be GET/DEL/INCR/DECR */ 
5527     operations 
= listCreate(); 
5528     listSetFreeMethod(operations
,zfree
); 
5531     /* Now we need to protect sortval incrementing its count, in the future 
5532      * SORT may have options able to overwrite/delete keys during the sorting 
5533      * and the sorted key itself may get destroied */ 
5534     incrRefCount(sortval
); 
5536     /* The SORT command has an SQL-alike syntax, parse it */ 
5537     while(j 
< c
->argc
) { 
5538         int leftargs 
= c
->argc
-j
-1; 
5539         if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) { 
5541         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) { 
5543         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) { 
5545         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs 
>= 2) { 
5546             limit_start 
= atoi(c
->argv
[j
+1]->ptr
); 
5547             limit_count 
= atoi(c
->argv
[j
+2]->ptr
); 
5549         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs 
>= 1) { 
5550             storekey 
= c
->argv
[j
+1]; 
5552         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs 
>= 1) { 
5553             sortby 
= c
->argv
[j
+1]; 
5554             /* If the BY pattern does not contain '*', i.e. it is constant, 
5555              * we don't need to sort nor to lookup the weight keys. */ 
5556             if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort 
= 1; 
5558         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs 
>= 1) { 
5559             listAddNodeTail(operations
,createSortOperation( 
5560                 REDIS_SORT_GET
,c
->argv
[j
+1])); 
5564             decrRefCount(sortval
); 
5565             listRelease(operations
); 
5566             addReply(c
,shared
.syntaxerr
); 
5572     /* Load the sorting vector with all the objects to sort */ 
5573     switch(sortval
->type
) { 
5574     case REDIS_LIST
: vectorlen 
= listLength((list
*)sortval
->ptr
); break; 
5575     case REDIS_SET
: vectorlen 
=  dictSize((dict
*)sortval
->ptr
); break; 
5576     case REDIS_ZSET
: vectorlen 
= dictSize(((zset
*)sortval
->ptr
)->dict
); break; 
5577     default: vectorlen 
= 0; redisAssert(0); /* Avoid GCC warning */ 
5579     vector 
= zmalloc(sizeof(redisSortObject
)*vectorlen
); 
5582     if (sortval
->type 
== REDIS_LIST
) { 
5583         list 
*list 
= sortval
->ptr
; 
5587         listRewind(list
,&li
); 
5588         while((ln 
= listNext(&li
))) { 
5589             robj 
*ele 
= ln
->value
; 
5590             vector
[j
].obj 
= ele
; 
5591             vector
[j
].u
.score 
= 0; 
5592             vector
[j
].u
.cmpobj 
= NULL
; 
5600         if (sortval
->type 
== REDIS_SET
) { 
5603             zset 
*zs 
= sortval
->ptr
; 
5607         di 
= dictGetIterator(set
); 
5608         while((setele 
= dictNext(di
)) != NULL
) { 
5609             vector
[j
].obj 
= dictGetEntryKey(setele
); 
5610             vector
[j
].u
.score 
= 0; 
5611             vector
[j
].u
.cmpobj 
= NULL
; 
5614         dictReleaseIterator(di
); 
5616     redisAssert(j 
== vectorlen
); 
5618     /* Now it's time to load the right scores in the sorting vector */ 
5619     if (dontsort 
== 0) { 
5620         for (j 
= 0; j 
< vectorlen
; j
++) { 
5624                 byval 
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
); 
5625                 if (!byval 
|| byval
->type 
!= REDIS_STRING
) continue; 
5627                     vector
[j
].u
.cmpobj 
= getDecodedObject(byval
); 
5629                     if (byval
->encoding 
== REDIS_ENCODING_RAW
) { 
5630                         vector
[j
].u
.score 
= strtod(byval
->ptr
,NULL
); 
5632                         /* Don't need to decode the object if it's 
5633                          * integer-encoded (the only encoding supported) so 
5634                          * far. We can just cast it */ 
5635                         if (byval
->encoding 
== REDIS_ENCODING_INT
) { 
5636                             vector
[j
].u
.score 
= (long)byval
->ptr
; 
5638                             redisAssert(1 != 1); 
5643                     if (vector
[j
].obj
->encoding 
== REDIS_ENCODING_RAW
) 
5644                         vector
[j
].u
.score 
= strtod(vector
[j
].obj
->ptr
,NULL
); 
5646                         if (vector
[j
].obj
->encoding 
== REDIS_ENCODING_INT
) 
5647                             vector
[j
].u
.score 
= (long) vector
[j
].obj
->ptr
; 
5649                             redisAssert(1 != 1); 
5656     /* We are ready to sort the vector... perform a bit of sanity check 
5657      * on the LIMIT option too. We'll use a partial version of quicksort. */ 
5658     start 
= (limit_start 
< 0) ? 0 : limit_start
; 
5659     end 
= (limit_count 
< 0) ? vectorlen
-1 : start
+limit_count
-1; 
5660     if (start 
>= vectorlen
) { 
5661         start 
= vectorlen
-1; 
5664     if (end 
>= vectorlen
) end 
= vectorlen
-1; 
5666     if (dontsort 
== 0) { 
5667         server
.sort_desc 
= desc
; 
5668         server
.sort_alpha 
= alpha
; 
5669         server
.sort_bypattern 
= sortby 
? 1 : 0; 
5670         if (sortby 
&& (start 
!= 0 || end 
!= vectorlen
-1)) 
5671             pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
); 
5673             qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
); 
5676     /* Send command output to the output buffer, performing the specified 
5677      * GET/DEL/INCR/DECR operations if any. */ 
5678     outputlen 
= getop 
? getop
*(end
-start
+1) : end
-start
+1; 
5679     if (storekey 
== NULL
) { 
5680         /* STORE option not specified, sent the sorting result to client */ 
5681         addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
)); 
5682         for (j 
= start
; j 
<= end
; j
++) { 
5687                 addReplyBulkLen(c
,vector
[j
].obj
); 
5688                 addReply(c
,vector
[j
].obj
); 
5689                 addReply(c
,shared
.crlf
); 
5691             listRewind(operations
,&li
); 
5692             while((ln 
= listNext(&li
))) { 
5693                 redisSortOperation 
*sop 
= ln
->value
; 
5694                 robj 
*val 
= lookupKeyByPattern(c
->db
,sop
->pattern
, 
5697                 if (sop
->type 
== REDIS_SORT_GET
) { 
5698                     if (!val 
|| val
->type 
!= REDIS_STRING
) { 
5699                         addReply(c
,shared
.nullbulk
); 
5701                         addReplyBulkLen(c
,val
); 
5703                         addReply(c
,shared
.crlf
); 
5706                     redisAssert(sop
->type 
== REDIS_SORT_GET
); /* always fails */ 
5711         robj 
*listObject 
= createListObject(); 
5712         list 
*listPtr 
= (list
*) listObject
->ptr
; 
5714         /* STORE option specified, set the sorting result as a List object */ 
5715         for (j 
= start
; j 
<= end
; j
++) { 
5720                 listAddNodeTail(listPtr
,vector
[j
].obj
); 
5721                 incrRefCount(vector
[j
].obj
); 
5723             listRewind(operations
,&li
); 
5724             while((ln 
= listNext(&li
))) { 
5725                 redisSortOperation 
*sop 
= ln
->value
; 
5726                 robj 
*val 
= lookupKeyByPattern(c
->db
,sop
->pattern
, 
5729                 if (sop
->type 
== REDIS_SORT_GET
) { 
5730                     if (!val 
|| val
->type 
!= REDIS_STRING
) { 
5731                         listAddNodeTail(listPtr
,createStringObject("",0)); 
5733                         listAddNodeTail(listPtr
,val
); 
5737                     redisAssert(sop
->type 
== REDIS_SORT_GET
); /* always fails */ 
5741         if (dictReplace(c
->db
->dict
,storekey
,listObject
)) { 
5742             incrRefCount(storekey
); 
5744         /* Note: we add 1 because the DB is dirty anyway since even if the 
5745          * SORT result is empty a new key is set and maybe the old content 
5747         server
.dirty 
+= 1+outputlen
; 
5748         addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
)); 
5752     decrRefCount(sortval
); 
5753     listRelease(operations
); 
5754     for (j 
= 0; j 
< vectorlen
; j
++) { 
5755         if (sortby 
&& alpha 
&& vector
[j
].u
.cmpobj
) 
5756             decrRefCount(vector
[j
].u
.cmpobj
); 
5761 /* Convert an amount of bytes into a human readable string in the form 
5762  * of 100B, 2G, 100M, 4K, and so forth. */ 
5763 static void bytesToHuman(char *s
, unsigned long long n
) { 
5768         sprintf(s
,"%lluB",n
); 
5770     } else if (n 
< (1024*1024)) { 
5771         d 
= (double)n
/(1024); 
5772         sprintf(s
,"%.2fK",d
); 
5773     } else if (n 
< (1024LL*1024*1024)) { 
5774         d 
= (double)n
/(1024*1024); 
5775         sprintf(s
,"%.2fM",d
); 
5776     } else if (n 
< (1024LL*1024*1024*1024)) { 
5777         d 
= (double)n
/(1024LL*1024*1024); 
5778         sprintf(s
,"%.2fG",d
); 
5782 /* Create the string returned by the INFO command. This is decoupled 
5783  * by the INFO command itself as we need to report the same information 
5784  * on memory corruption problems. */ 
5785 static sds 
genRedisInfoString(void) { 
5787     time_t uptime 
= time(NULL
)-server
.stat_starttime
; 
5791     bytesToHuman(hmem
,zmalloc_used_memory()); 
5792     info 
= sdscatprintf(sdsempty(), 
5793         "redis_version:%s\r\n" 
5795         "multiplexing_api:%s\r\n" 
5796         "process_id:%ld\r\n" 
5797         "uptime_in_seconds:%ld\r\n" 
5798         "uptime_in_days:%ld\r\n" 
5799         "connected_clients:%d\r\n" 
5800         "connected_slaves:%d\r\n" 
5801         "blocked_clients:%d\r\n" 
5802         "used_memory:%zu\r\n" 
5803         "used_memory_human:%s\r\n" 
5804         "changes_since_last_save:%lld\r\n" 
5805         "bgsave_in_progress:%d\r\n" 
5806         "last_save_time:%ld\r\n" 
5807         "bgrewriteaof_in_progress:%d\r\n" 
5808         "total_connections_received:%lld\r\n" 
5809         "total_commands_processed:%lld\r\n" 
5813         (sizeof(long) == 8) ? "64" : "32", 
5818         listLength(server
.clients
)-listLength(server
.slaves
), 
5819         listLength(server
.slaves
), 
5820         server
.blpop_blocked_clients
, 
5821         zmalloc_used_memory(), 
5824         server
.bgsavechildpid 
!= -1, 
5826         server
.bgrewritechildpid 
!= -1, 
5827         server
.stat_numconnections
, 
5828         server
.stat_numcommands
, 
5829         server
.vm_enabled 
!= 0, 
5830         server
.masterhost 
== NULL 
? "master" : "slave" 
5832     if (server
.masterhost
) { 
5833         info 
= sdscatprintf(info
, 
5834             "master_host:%s\r\n" 
5835             "master_port:%d\r\n" 
5836             "master_link_status:%s\r\n" 
5837             "master_last_io_seconds_ago:%d\r\n" 
5840             (server
.replstate 
== REDIS_REPL_CONNECTED
) ? 
5842             server
.master 
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1 
5845     if (server
.vm_enabled
) { 
5847         info 
= sdscatprintf(info
, 
5848             "vm_conf_max_memory:%llu\r\n" 
5849             "vm_conf_page_size:%llu\r\n" 
5850             "vm_conf_pages:%llu\r\n" 
5851             "vm_stats_used_pages:%llu\r\n" 
5852             "vm_stats_swapped_objects:%llu\r\n" 
5853             "vm_stats_swappin_count:%llu\r\n" 
5854             "vm_stats_swappout_count:%llu\r\n" 
5855             "vm_stats_io_newjobs_len:%lu\r\n" 
5856             "vm_stats_io_processing_len:%lu\r\n" 
5857             "vm_stats_io_processed_len:%lu\r\n" 
5858             "vm_stats_io_active_threads:%lu\r\n" 
5859             "vm_stats_blocked_clients:%lu\r\n" 
5860             ,(unsigned long long) server
.vm_max_memory
, 
5861             (unsigned long long) server
.vm_page_size
, 
5862             (unsigned long long) server
.vm_pages
, 
5863             (unsigned long long) server
.vm_stats_used_pages
, 
5864             (unsigned long long) server
.vm_stats_swapped_objects
, 
5865             (unsigned long long) server
.vm_stats_swapins
, 
5866             (unsigned long long) server
.vm_stats_swapouts
, 
5867             (unsigned long) listLength(server
.io_newjobs
), 
5868             (unsigned long) listLength(server
.io_processing
), 
5869             (unsigned long) listLength(server
.io_processed
), 
5870             (unsigned long) server
.io_active_threads
, 
5871             (unsigned long) server
.vm_blocked_clients
 
5875     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
5876         long long keys
, vkeys
; 
5878         keys 
= dictSize(server
.db
[j
].dict
); 
5879         vkeys 
= dictSize(server
.db
[j
].expires
); 
5880         if (keys 
|| vkeys
) { 
5881             info 
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n", 
5888 static void infoCommand(redisClient 
*c
) { 
5889     sds info 
= genRedisInfoString(); 
5890     addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n", 
5891         (unsigned long)sdslen(info
))); 
5892     addReplySds(c
,info
); 
5893     addReply(c
,shared
.crlf
); 
5896 static void monitorCommand(redisClient 
*c
) { 
5897     /* ignore MONITOR if aleady slave or in monitor mode */ 
5898     if (c
->flags 
& REDIS_SLAVE
) return; 
5900     c
->flags 
|= (REDIS_SLAVE
|REDIS_MONITOR
); 
5902     listAddNodeTail(server
.monitors
,c
); 
5903     addReply(c
,shared
.ok
); 
5906 /* ================================= Expire ================================= */ 
5907 static int removeExpire(redisDb 
*db
, robj 
*key
) { 
5908     if (dictDelete(db
->expires
,key
) == DICT_OK
) { 
5915 static int setExpire(redisDb 
*db
, robj 
*key
, time_t when
) { 
5916     if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) { 
5924 /* Return the expire time of the specified key, or -1 if no expire 
5925  * is associated with this key (i.e. the key is non volatile) */ 
5926 static time_t getExpire(redisDb 
*db
, robj 
*key
) { 
5929     /* No expire? return ASAP */ 
5930     if (dictSize(db
->expires
) == 0 || 
5931        (de 
= dictFind(db
->expires
,key
)) == NULL
) return -1; 
5933     return (time_t) dictGetEntryVal(de
); 
5936 static int expireIfNeeded(redisDb 
*db
, robj 
*key
) { 
5940     /* No expire? return ASAP */ 
5941     if (dictSize(db
->expires
) == 0 || 
5942        (de 
= dictFind(db
->expires
,key
)) == NULL
) return 0; 
5944     /* Lookup the expire */ 
5945     when 
= (time_t) dictGetEntryVal(de
); 
5946     if (time(NULL
) <= when
) return 0; 
5948     /* Delete the key */ 
5949     dictDelete(db
->expires
,key
); 
5950     return dictDelete(db
->dict
,key
) == DICT_OK
; 
5953 static int deleteIfVolatile(redisDb 
*db
, robj 
*key
) { 
5956     /* No expire? return ASAP */ 
5957     if (dictSize(db
->expires
) == 0 || 
5958        (de 
= dictFind(db
->expires
,key
)) == NULL
) return 0; 
5960     /* Delete the key */ 
5962     dictDelete(db
->expires
,key
); 
5963     return dictDelete(db
->dict
,key
) == DICT_OK
; 
5966 static void expireGenericCommand(redisClient 
*c
, robj 
*key
, time_t seconds
) { 
5969     de 
= dictFind(c
->db
->dict
,key
); 
5971         addReply(c
,shared
.czero
); 
5975         if (deleteKey(c
->db
,key
)) server
.dirty
++; 
5976         addReply(c
, shared
.cone
); 
5979         time_t when 
= time(NULL
)+seconds
; 
5980         if (setExpire(c
->db
,key
,when
)) { 
5981             addReply(c
,shared
.cone
); 
5984             addReply(c
,shared
.czero
); 
5990 static void expireCommand(redisClient 
*c
) { 
5991     expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10)); 
5994 static void expireatCommand(redisClient 
*c
) { 
5995     expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10)-time(NULL
)); 
5998 static void ttlCommand(redisClient 
*c
) { 
6002     expire 
= getExpire(c
->db
,c
->argv
[1]); 
6004         ttl 
= (int) (expire
-time(NULL
)); 
6005         if (ttl 
< 0) ttl 
= -1; 
6007     addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
)); 
6010 /* ================================ MULTI/EXEC ============================== */ 
6012 /* Client state initialization for MULTI/EXEC */ 
6013 static void initClientMultiState(redisClient 
*c
) { 
6014     c
->mstate
.commands 
= NULL
; 
6015     c
->mstate
.count 
= 0; 
6018 /* Release all the resources associated with MULTI/EXEC state */ 
6019 static void freeClientMultiState(redisClient 
*c
) { 
6022     for (j 
= 0; j 
< c
->mstate
.count
; j
++) { 
6024         multiCmd 
*mc 
= c
->mstate
.commands
+j
; 
6026         for (i 
= 0; i 
< mc
->argc
; i
++) 
6027             decrRefCount(mc
->argv
[i
]); 
6030     zfree(c
->mstate
.commands
); 
6033 /* Add a new command into the MULTI commands queue */ 
6034 static void queueMultiCommand(redisClient 
*c
, struct redisCommand 
*cmd
) { 
6038     c
->mstate
.commands 
= zrealloc(c
->mstate
.commands
, 
6039             sizeof(multiCmd
)*(c
->mstate
.count
+1)); 
6040     mc 
= c
->mstate
.commands
+c
->mstate
.count
; 
6043     mc
->argv 
= zmalloc(sizeof(robj
*)*c
->argc
); 
6044     memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
); 
6045     for (j 
= 0; j 
< c
->argc
; j
++) 
6046         incrRefCount(mc
->argv
[j
]); 
6050 static void multiCommand(redisClient 
*c
) { 
6051     c
->flags 
|= REDIS_MULTI
; 
6052     addReply(c
,shared
.ok
); 
6055 static void discardCommand(redisClient 
*c
) { 
6056     if (!(c
->flags 
& REDIS_MULTI
)) { 
6057         addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n")); 
6061     freeClientMultiState(c
); 
6062     initClientMultiState(c
); 
6063     c
->flags 
&= (~REDIS_MULTI
); 
6064     addReply(c
,shared
.ok
); 
6067 static void execCommand(redisClient 
*c
) { 
6072     if (!(c
->flags 
& REDIS_MULTI
)) { 
6073         addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n")); 
6077     orig_argv 
= c
->argv
; 
6078     orig_argc 
= c
->argc
; 
6079     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
)); 
6080     for (j 
= 0; j 
< c
->mstate
.count
; j
++) { 
6081         c
->argc 
= c
->mstate
.commands
[j
].argc
; 
6082         c
->argv 
= c
->mstate
.commands
[j
].argv
; 
6083         call(c
,c
->mstate
.commands
[j
].cmd
); 
6085     c
->argv 
= orig_argv
; 
6086     c
->argc 
= orig_argc
; 
6087     freeClientMultiState(c
); 
6088     initClientMultiState(c
); 
6089     c
->flags 
&= (~REDIS_MULTI
); 
6092 /* =========================== Blocking Operations  ========================= */ 
6094 /* Currently Redis blocking operations support is limited to list POP ops, 
6095  * so the current implementation is not fully generic, but it is also not 
6096  * completely specific so it will not require a rewrite to support new 
6097  * kind of blocking operations in the future. 
6099  * Still it's important to note that list blocking operations can be already 
6100  * used as a notification mechanism in order to implement other blocking 
6101  * operations at application level, so there must be a very strong evidence 
6102  * of usefulness and generality before new blocking operations are implemented. 
6104  * This is how the current blocking POP works, we use BLPOP as example: 
6105  * - If the user calls BLPOP and the key exists and contains a non empty list 
6106  *   then LPOP is called instead. So BLPOP is semantically the same as LPOP 
6107  *   if there is not to block. 
6108  * - If instead BLPOP is called and the key does not exists or the list is 
6109  *   empty we need to block. In order to do so we remove the notification for 
6110  *   new data to read in the client socket (so that we'll not serve new 
6111  *   requests if the blocking request is not served). Also we put the client 
6112  *   in a dictionary (db->blockingkeys) mapping keys to a list of clients 
6113  *   blocking for this keys. 
6114  * - If a PUSH operation against a key with blocked clients waiting is 
6115  *   performed, we serve the first in the list: basically instead to push 
6116  *   the new element inside the list we return it to the (first / oldest) 
6117  *   blocking client, unblock the client, and remove it form the list. 
6119  * The above comment and the source code should be enough in order to understand 
6120  * the implementation and modify / fix it later. 
6123 /* Set a client in blocking mode for the specified key, with the specified 
6125 static void blockForKeys(redisClient 
*c
, robj 
**keys
, int numkeys
, time_t timeout
) { 
6130     c
->blockingkeys 
= zmalloc(sizeof(robj
*)*numkeys
); 
6131     c
->blockingkeysnum 
= numkeys
; 
6132     c
->blockingto 
= timeout
; 
6133     for (j 
= 0; j 
< numkeys
; j
++) { 
6134         /* Add the key in the client structure, to map clients -> keys */ 
6135         c
->blockingkeys
[j
] = keys
[j
]; 
6136         incrRefCount(keys
[j
]); 
6138         /* And in the other "side", to map keys -> clients */ 
6139         de 
= dictFind(c
->db
->blockingkeys
,keys
[j
]); 
6143             /* For every key we take a list of clients blocked for it */ 
6145             retval 
= dictAdd(c
->db
->blockingkeys
,keys
[j
],l
); 
6146             incrRefCount(keys
[j
]); 
6147             assert(retval 
== DICT_OK
); 
6149             l 
= dictGetEntryVal(de
); 
6151         listAddNodeTail(l
,c
); 
6153     /* Mark the client as a blocked client */ 
6154     c
->flags 
|= REDIS_BLOCKED
; 
6155     server
.blpop_blocked_clients
++; 
6158 /* Unblock a client that's waiting in a blocking operation such as BLPOP */ 
6159 static void unblockClientWaitingData(redisClient 
*c
) { 
6164     assert(c
->blockingkeys 
!= NULL
); 
6165     /* The client may wait for multiple keys, so unblock it for every key. */ 
6166     for (j 
= 0; j 
< c
->blockingkeysnum
; j
++) { 
6167         /* Remove this client from the list of clients waiting for this key. */ 
6168         de 
= dictFind(c
->db
->blockingkeys
,c
->blockingkeys
[j
]); 
6170         l 
= dictGetEntryVal(de
); 
6171         listDelNode(l
,listSearchKey(l
,c
)); 
6172         /* If the list is empty we need to remove it to avoid wasting memory */ 
6173         if (listLength(l
) == 0) 
6174             dictDelete(c
->db
->blockingkeys
,c
->blockingkeys
[j
]); 
6175         decrRefCount(c
->blockingkeys
[j
]); 
6177     /* Cleanup the client structure */ 
6178     zfree(c
->blockingkeys
); 
6179     c
->blockingkeys 
= NULL
; 
6180     c
->flags 
&= (~REDIS_BLOCKED
); 
6181     server
.blpop_blocked_clients
--; 
6182     /* We want to process data if there is some command waiting 
6183      * in the input buffer. Note that this is safe even if 
6184      * unblockClientWaitingData() gets called from freeClient() because 
6185      * freeClient() will be smart enough to call this function 
6186      * *after* c->querybuf was set to NULL. */ 
6187     if (c
->querybuf 
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
); 
6190 /* This should be called from any function PUSHing into lists. 
6191  * 'c' is the "pushing client", 'key' is the key it is pushing data against, 
6192  * 'ele' is the element pushed. 
6194  * If the function returns 0 there was no client waiting for a list push 
6197  * If the function returns 1 there was a client waiting for a list push 
6198  * against this key, the element was passed to this client thus it's not 
6199  * needed to actually add it to the list and the caller should return asap. */ 
6200 static int handleClientsWaitingListPush(redisClient 
*c
, robj 
*key
, robj 
*ele
) { 
6201     struct dictEntry 
*de
; 
6202     redisClient 
*receiver
; 
6206     de 
= dictFind(c
->db
->blockingkeys
,key
); 
6207     if (de 
== NULL
) return 0; 
6208     l 
= dictGetEntryVal(de
); 
6211     receiver 
= ln
->value
; 
6213     addReplySds(receiver
,sdsnew("*2\r\n")); 
6214     addReplyBulkLen(receiver
,key
); 
6215     addReply(receiver
,key
); 
6216     addReply(receiver
,shared
.crlf
); 
6217     addReplyBulkLen(receiver
,ele
); 
6218     addReply(receiver
,ele
); 
6219     addReply(receiver
,shared
.crlf
); 
6220     unblockClientWaitingData(receiver
); 
6224 /* Blocking RPOP/LPOP */ 
6225 static void blockingPopGenericCommand(redisClient 
*c
, int where
) { 
6230     for (j 
= 1; j 
< c
->argc
-1; j
++) { 
6231         o 
= lookupKeyWrite(c
->db
,c
->argv
[j
]); 
6233             if (o
->type 
!= REDIS_LIST
) { 
6234                 addReply(c
,shared
.wrongtypeerr
); 
6237                 list 
*list 
= o
->ptr
; 
6238                 if (listLength(list
) != 0) { 
6239                     /* If the list contains elements fall back to the usual 
6240                      * non-blocking POP operation */ 
6241                     robj 
*argv
[2], **orig_argv
; 
6244                     /* We need to alter the command arguments before to call 
6245                      * popGenericCommand() as the command takes a single key. */ 
6246                     orig_argv 
= c
->argv
; 
6247                     orig_argc 
= c
->argc
; 
6248                     argv
[1] = c
->argv
[j
]; 
6252                     /* Also the return value is different, we need to output 
6253                      * the multi bulk reply header and the key name. The 
6254                      * "real" command will add the last element (the value) 
6255                      * for us. If this souds like an hack to you it's just 
6256                      * because it is... */ 
6257                     addReplySds(c
,sdsnew("*2\r\n")); 
6258                     addReplyBulkLen(c
,argv
[1]); 
6259                     addReply(c
,argv
[1]); 
6260                     addReply(c
,shared
.crlf
); 
6261                     popGenericCommand(c
,where
); 
6263                     /* Fix the client structure with the original stuff */ 
6264                     c
->argv 
= orig_argv
; 
6265                     c
->argc 
= orig_argc
; 
6271     /* If the list is empty or the key does not exists we must block */ 
6272     timeout 
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10); 
6273     if (timeout 
> 0) timeout 
+= time(NULL
); 
6274     blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
); 
6277 static void blpopCommand(redisClient 
*c
) { 
6278     blockingPopGenericCommand(c
,REDIS_HEAD
); 
6281 static void brpopCommand(redisClient 
*c
) { 
6282     blockingPopGenericCommand(c
,REDIS_TAIL
); 
6285 /* =============================== Replication  ============================= */ 
6287 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) { 
6288     ssize_t nwritten
, ret 
= size
; 
6289     time_t start 
= time(NULL
); 
6293         if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) { 
6294             nwritten 
= write(fd
,ptr
,size
); 
6295             if (nwritten 
== -1) return -1; 
6299         if ((time(NULL
)-start
) > timeout
) { 
6307 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) { 
6308     ssize_t nread
, totread 
= 0; 
6309     time_t start 
= time(NULL
); 
6313         if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) { 
6314             nread 
= read(fd
,ptr
,size
); 
6315             if (nread 
== -1) return -1; 
6320         if ((time(NULL
)-start
) > timeout
) { 
6328 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) { 
6335         if (syncRead(fd
,&c
,1,timeout
) == -1) return -1; 
6338             if (nread 
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0'; 
6349 static void syncCommand(redisClient 
*c
) { 
6350     /* ignore SYNC if aleady slave or in monitor mode */ 
6351     if (c
->flags 
& REDIS_SLAVE
) return; 
6353     /* SYNC can't be issued when the server has pending data to send to 
6354      * the client about already issued commands. We need a fresh reply 
6355      * buffer registering the differences between the BGSAVE and the current 
6356      * dataset, so that we can copy to other slaves if needed. */ 
6357     if (listLength(c
->reply
) != 0) { 
6358         addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n")); 
6362     redisLog(REDIS_NOTICE
,"Slave ask for synchronization"); 
6363     /* Here we need to check if there is a background saving operation 
6364      * in progress, or if it is required to start one */ 
6365     if (server
.bgsavechildpid 
!= -1) { 
6366         /* Ok a background save is in progress. Let's check if it is a good 
6367          * one for replication, i.e. if there is another slave that is 
6368          * registering differences since the server forked to save */ 
6373         listRewind(server
.slaves
,&li
); 
6374         while((ln 
= listNext(&li
))) { 
6376             if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_END
) break; 
6379             /* Perfect, the server is already registering differences for 
6380              * another slave. Set the right state, and copy the buffer. */ 
6381             listRelease(c
->reply
); 
6382             c
->reply 
= listDup(slave
->reply
); 
6383             c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_END
; 
6384             redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC"); 
6386             /* No way, we need to wait for the next BGSAVE in order to 
6387              * register differences */ 
6388             c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_START
; 
6389             redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC"); 
6392         /* Ok we don't have a BGSAVE in progress, let's start one */ 
6393         redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC"); 
6394         if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) { 
6395             redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE"); 
6396             addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n")); 
6399         c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_END
; 
6402     c
->flags 
|= REDIS_SLAVE
; 
6404     listAddNodeTail(server
.slaves
,c
); 
6408 static void sendBulkToSlave(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
6409     redisClient 
*slave 
= privdata
; 
6411     REDIS_NOTUSED(mask
); 
6412     char buf
[REDIS_IOBUF_LEN
]; 
6413     ssize_t nwritten
, buflen
; 
6415     if (slave
->repldboff 
== 0) { 
6416         /* Write the bulk write count before to transfer the DB. In theory here 
6417          * we don't know how much room there is in the output buffer of the 
6418          * socket, but in pratice SO_SNDLOWAT (the minimum count for output 
6419          * operations) will never be smaller than the few bytes we need. */ 
6422         bulkcount 
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long) 
6424         if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
)) 
6432     lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
); 
6433     buflen 
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
); 
6435         redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s", 
6436             (buflen 
== 0) ? "premature EOF" : strerror(errno
)); 
6440     if ((nwritten 
= write(fd
,buf
,buflen
)) == -1) { 
6441         redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s", 
6446     slave
->repldboff 
+= nwritten
; 
6447     if (slave
->repldboff 
== slave
->repldbsize
) { 
6448         close(slave
->repldbfd
); 
6449         slave
->repldbfd 
= -1; 
6450         aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
); 
6451         slave
->replstate 
= REDIS_REPL_ONLINE
; 
6452         if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, 
6453             sendReplyToClient
, slave
) == AE_ERR
) { 
6457         addReplySds(slave
,sdsempty()); 
6458         redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded"); 
6462 /* This function is called at the end of every backgrond saving. 
6463  * The argument bgsaveerr is REDIS_OK if the background saving succeeded 
6464  * otherwise REDIS_ERR is passed to the function. 
6466  * The goal of this function is to handle slaves waiting for a successful 
6467  * background saving in order to perform non-blocking synchronization. */ 
6468 static void updateSlavesWaitingBgsave(int bgsaveerr
) { 
6470     int startbgsave 
= 0; 
6473     listRewind(server
.slaves
,&li
); 
6474     while((ln 
= listNext(&li
))) { 
6475         redisClient 
*slave 
= ln
->value
; 
6477         if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_START
) { 
6479             slave
->replstate 
= REDIS_REPL_WAIT_BGSAVE_END
; 
6480         } else if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_END
) { 
6481             struct redis_stat buf
; 
6483             if (bgsaveerr 
!= REDIS_OK
) { 
6485                 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error"); 
6488             if ((slave
->repldbfd 
= open(server
.dbfilename
,O_RDONLY
)) == -1 || 
6489                 redis_fstat(slave
->repldbfd
,&buf
) == -1) { 
6491                 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
)); 
6494             slave
->repldboff 
= 0; 
6495             slave
->repldbsize 
= buf
.st_size
; 
6496             slave
->replstate 
= REDIS_REPL_SEND_BULK
; 
6497             aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
); 
6498             if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) { 
6505         if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) { 
6508             listRewind(server
.slaves
,&li
); 
6509             redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed"); 
6510             while((ln 
= listNext(&li
))) { 
6511                 redisClient 
*slave 
= ln
->value
; 
6513                 if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_START
) 
6520 static int syncWithMaster(void) { 
6521     char buf
[1024], tmpfile
[256], authcmd
[1024]; 
6523     int fd 
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
); 
6527         redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s", 
6532     /* AUTH with the master if required. */ 
6533     if(server
.masterauth
) { 
6534         snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
); 
6535         if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) { 
6537             redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s", 
6541         /* Read the AUTH result.  */ 
6542         if (syncReadLine(fd
,buf
,1024,3600) == -1) { 
6544             redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s", 
6548         if (buf
[0] != '+') { 
6550             redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?"); 
6555     /* Issue the SYNC command */ 
6556     if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) { 
6558         redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s", 
6562     /* Read the bulk write count */ 
6563     if (syncReadLine(fd
,buf
,1024,3600) == -1) { 
6565         redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s", 
6569     if (buf
[0] != '$') { 
6571         redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?"); 
6574     dumpsize 
= atoi(buf
+1); 
6575     redisLog(REDIS_NOTICE
,"Receiving %d bytes data dump from MASTER",dumpsize
); 
6576     /* Read the bulk write data on a temp file */ 
6577     snprintf(tmpfile
,256,"temp-%d.%ld.rdb",(int)time(NULL
),(long int)random()); 
6578     dfd 
= open(tmpfile
,O_CREAT
|O_WRONLY
,0644); 
6581         redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
)); 
6585         int nread
, nwritten
; 
6587         nread 
= read(fd
,buf
,(dumpsize 
< 1024)?dumpsize
:1024); 
6589             redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s", 
6595         nwritten 
= write(dfd
,buf
,nread
); 
6596         if (nwritten 
== -1) { 
6597             redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
)); 
6605     if (rename(tmpfile
,server
.dbfilename
) == -1) { 
6606         redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
)); 
6612     if (rdbLoad(server
.dbfilename
) != REDIS_OK
) { 
6613         redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk"); 
6617     server
.master 
= createClient(fd
); 
6618     server
.master
->flags 
|= REDIS_MASTER
; 
6619     server
.master
->authenticated 
= 1; 
6620     server
.replstate 
= REDIS_REPL_CONNECTED
; 
6624 static void slaveofCommand(redisClient 
*c
) { 
6625     if (!strcasecmp(c
->argv
[1]->ptr
,"no") && 
6626         !strcasecmp(c
->argv
[2]->ptr
,"one")) { 
6627         if (server
.masterhost
) { 
6628             sdsfree(server
.masterhost
); 
6629             server
.masterhost 
= NULL
; 
6630             if (server
.master
) freeClient(server
.master
); 
6631             server
.replstate 
= REDIS_REPL_NONE
; 
6632             redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)"); 
6635         sdsfree(server
.masterhost
); 
6636         server
.masterhost 
= sdsdup(c
->argv
[1]->ptr
); 
6637         server
.masterport 
= atoi(c
->argv
[2]->ptr
); 
6638         if (server
.master
) freeClient(server
.master
); 
6639         server
.replstate 
= REDIS_REPL_CONNECT
; 
6640         redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)", 
6641             server
.masterhost
, server
.masterport
); 
6643     addReply(c
,shared
.ok
); 
6646 /* ============================ Maxmemory directive  ======================== */ 
6648 /* Try to free one object form the pre-allocated objects free list. 
6649  * This is useful under low mem conditions as by default we take 1 million 
6650  * free objects allocated. On success REDIS_OK is returned, otherwise 
6652 static int tryFreeOneObjectFromFreelist(void) { 
6655     if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
); 
6656     if (listLength(server
.objfreelist
)) { 
6657         listNode 
*head 
= listFirst(server
.objfreelist
); 
6658         o 
= listNodeValue(head
); 
6659         listDelNode(server
.objfreelist
,head
); 
6660         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
6664         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
6669 /* This function gets called when 'maxmemory' is set on the config file to limit 
6670  * the max memory used by the server, and we are out of memory. 
6671  * This function will try to, in order: 
6673  * - Free objects from the free list 
6674  * - Try to remove keys with an EXPIRE set 
6676  * It is not possible to free enough memory to reach used-memory < maxmemory 
6677  * the server will start refusing commands that will enlarge even more the 
6680 static void freeMemoryIfNeeded(void) { 
6681     while (server
.maxmemory 
&& zmalloc_used_memory() > server
.maxmemory
) { 
6682         int j
, k
, freed 
= 0; 
6684         if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue; 
6685         for (j 
= 0; j 
< server
.dbnum
; j
++) { 
6687             robj 
*minkey 
= NULL
; 
6688             struct dictEntry 
*de
; 
6690             if (dictSize(server
.db
[j
].expires
)) { 
6692                 /* From a sample of three keys drop the one nearest to 
6693                  * the natural expire */ 
6694                 for (k 
= 0; k 
< 3; k
++) { 
6697                     de 
= dictGetRandomKey(server
.db
[j
].expires
); 
6698                     t 
= (time_t) dictGetEntryVal(de
); 
6699                     if (minttl 
== -1 || t 
< minttl
) { 
6700                         minkey 
= dictGetEntryKey(de
); 
6704                 deleteKey(server
.db
+j
,minkey
); 
6707         if (!freed
) return; /* nothing to free... */ 
6711 /* ============================== Append Only file ========================== */ 
6713 static void feedAppendOnlyFile(struct redisCommand 
*cmd
, int dictid
, robj 
**argv
, int argc
) { 
6714     sds buf 
= sdsempty(); 
6720     /* The DB this command was targetting is not the same as the last command 
6721      * we appendend. To issue a SELECT command is needed. */ 
6722     if (dictid 
!= server
.appendseldb
) { 
6725         snprintf(seldb
,sizeof(seldb
),"%d",dictid
); 
6726         buf 
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n", 
6727             (unsigned long)strlen(seldb
),seldb
); 
6728         server
.appendseldb 
= dictid
; 
6731     /* "Fix" the argv vector if the command is EXPIRE. We want to translate 
6732      * EXPIREs into EXPIREATs calls */ 
6733     if (cmd
->proc 
== expireCommand
) { 
6736         tmpargv
[0] = createStringObject("EXPIREAT",8); 
6737         tmpargv
[1] = argv
[1]; 
6738         incrRefCount(argv
[1]); 
6739         when 
= time(NULL
)+strtol(argv
[2]->ptr
,NULL
,10); 
6740         tmpargv
[2] = createObject(REDIS_STRING
, 
6741             sdscatprintf(sdsempty(),"%ld",when
)); 
6745     /* Append the actual command */ 
6746     buf 
= sdscatprintf(buf
,"*%d\r\n",argc
); 
6747     for (j 
= 0; j 
< argc
; j
++) { 
6750         o 
= getDecodedObject(o
); 
6751         buf 
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
)); 
6752         buf 
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
)); 
6753         buf 
= sdscatlen(buf
,"\r\n",2); 
6757     /* Free the objects from the modified argv for EXPIREAT */ 
6758     if (cmd
->proc 
== expireCommand
) { 
6759         for (j 
= 0; j 
< 3; j
++) 
6760             decrRefCount(argv
[j
]); 
6763     /* We want to perform a single write. This should be guaranteed atomic 
6764      * at least if the filesystem we are writing is a real physical one. 
6765      * While this will save us against the server being killed I don't think 
6766      * there is much to do about the whole server stopping for power problems 
6768      nwritten 
= write(server
.appendfd
,buf
,sdslen(buf
)); 
6769      if (nwritten 
!= (signed)sdslen(buf
)) { 
6770         /* Ooops, we are in troubles. The best thing to do for now is 
6771          * to simply exit instead to give the illusion that everything is 
6772          * working as expected. */ 
6773          if (nwritten 
== -1) { 
6774             redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
)); 
6776             redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
)); 
6780     /* If a background append only file rewriting is in progress we want to 
6781      * accumulate the differences between the child DB and the current one 
6782      * in a buffer, so that when the child process will do its work we 
6783      * can append the differences to the new append only file. */ 
6784     if (server
.bgrewritechildpid 
!= -1) 
6785         server
.bgrewritebuf 
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
)); 
6789     if (server
.appendfsync 
== APPENDFSYNC_ALWAYS 
|| 
6790         (server
.appendfsync 
== APPENDFSYNC_EVERYSEC 
&& 
6791          now
-server
.lastfsync 
> 1)) 
6793         fsync(server
.appendfd
); /* Let's try to get this data on the disk */ 
6794         server
.lastfsync 
= now
; 
6798 /* In Redis commands are always executed in the context of a client, so in 
6799  * order to load the append only file we need to create a fake client. */ 
6800 static struct redisClient 
*createFakeClient(void) { 
6801     struct redisClient 
*c 
= zmalloc(sizeof(*c
)); 
6805     c
->querybuf 
= sdsempty(); 
6809     /* We set the fake client as a slave waiting for the synchronization 
6810      * so that Redis will not try to send replies to this client. */ 
6811     c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_START
; 
6812     c
->reply 
= listCreate(); 
6813     listSetFreeMethod(c
->reply
,decrRefCount
); 
6814     listSetDupMethod(c
->reply
,dupClientReplyValue
); 
6818 static void freeFakeClient(struct redisClient 
*c
) { 
6819     sdsfree(c
->querybuf
); 
6820     listRelease(c
->reply
); 
6824 /* Replay the append log file. On error REDIS_OK is returned. On non fatal 
6825  * error (the append only file is zero-length) REDIS_ERR is returned. On 
6826  * fatal error an error message is logged and the program exists. */ 
6827 int loadAppendOnlyFile(char *filename
) { 
6828     struct redisClient 
*fakeClient
; 
6829     FILE *fp 
= fopen(filename
,"r"); 
6830     struct redis_stat sb
; 
6831     unsigned long long loadedkeys 
= 0; 
6833     if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size 
== 0) 
6837         redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
)); 
6841     fakeClient 
= createFakeClient(); 
6848         struct redisCommand 
*cmd
; 
6850         if (fgets(buf
,sizeof(buf
),fp
) == NULL
) { 
6856         if (buf
[0] != '*') goto fmterr
; 
6858         argv 
= zmalloc(sizeof(robj
*)*argc
); 
6859         for (j 
= 0; j 
< argc
; j
++) { 
6860             if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
; 
6861             if (buf
[0] != '$') goto fmterr
; 
6862             len 
= strtol(buf
+1,NULL
,10); 
6863             argsds 
= sdsnewlen(NULL
,len
); 
6864             if (len 
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
; 
6865             argv
[j
] = createObject(REDIS_STRING
,argsds
); 
6866             if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */ 
6869         /* Command lookup */ 
6870         cmd 
= lookupCommand(argv
[0]->ptr
); 
6872             redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
); 
6875         /* Try object sharing and encoding */ 
6876         if (server
.shareobjects
) { 
6878             for(j 
= 1; j 
< argc
; j
++) 
6879                 argv
[j
] = tryObjectSharing(argv
[j
]); 
6881         if (cmd
->flags 
& REDIS_CMD_BULK
) 
6882             tryObjectEncoding(argv
[argc
-1]); 
6883         /* Run the command in the context of a fake client */ 
6884         fakeClient
->argc 
= argc
; 
6885         fakeClient
->argv 
= argv
; 
6886         cmd
->proc(fakeClient
); 
6887         /* Discard the reply objects list from the fake client */ 
6888         while(listLength(fakeClient
->reply
)) 
6889             listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
)); 
6890         /* Clean up, ready for the next command */ 
6891         for (j 
= 0; j 
< argc
; j
++) decrRefCount(argv
[j
]); 
6893         /* Handle swapping while loading big datasets when VM is on */ 
6895         if (server
.vm_enabled 
&& (loadedkeys 
% 5000) == 0) { 
6896             while (zmalloc_used_memory() > server
.vm_max_memory
) { 
6897                 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break; 
6902     freeFakeClient(fakeClient
); 
6907         redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file"); 
6909         redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
)); 
6913     redisLog(REDIS_WARNING
,"Bad file format reading the append only file"); 
6917 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */ 
6918 static int fwriteBulk(FILE *fp
, robj 
*obj
) { 
6922     /* Avoid the incr/decr ref count business if possible to help 
6923      * copy-on-write (we are often in a child process when this function 
6925      * Also makes sure that key objects don't get incrRefCount-ed when VM 
6927     if (obj
->encoding 
!= REDIS_ENCODING_RAW
) { 
6928         obj 
= getDecodedObject(obj
); 
6931     snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
)); 
6932     if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
; 
6933     if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0) 
6935     if (fwrite("\r\n",2,1,fp
) == 0) goto err
; 
6936     if (decrrc
) decrRefCount(obj
); 
6939     if (decrrc
) decrRefCount(obj
); 
6943 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */ 
6944 static int fwriteBulkDouble(FILE *fp
, double d
) { 
6945     char buf
[128], dbuf
[128]; 
6947     snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
); 
6948     snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2); 
6949     if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0; 
6950     if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0; 
6954 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */ 
6955 static int fwriteBulkLong(FILE *fp
, long l
) { 
6956     char buf
[128], lbuf
[128]; 
6958     snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
); 
6959     snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2); 
6960     if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0; 
6961     if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0; 
6965 /* Write a sequence of commands able to fully rebuild the dataset into 
6966  * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */ 
6967 static int rewriteAppendOnlyFile(char *filename
) { 
6968     dictIterator 
*di 
= NULL
; 
6973     time_t now 
= time(NULL
); 
6975     /* Note that we have to use a different temp name here compared to the 
6976      * one used by rewriteAppendOnlyFileBackground() function. */ 
6977     snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid()); 
6978     fp 
= fopen(tmpfile
,"w"); 
6980         redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
)); 
6983     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
6984         char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n"; 
6985         redisDb 
*db 
= server
.db
+j
; 
6987         if (dictSize(d
) == 0) continue; 
6988         di 
= dictGetIterator(d
); 
6994         /* SELECT the new DB */ 
6995         if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
; 
6996         if (fwriteBulkLong(fp
,j
) == 0) goto werr
; 
6998         /* Iterate this DB writing every entry */ 
6999         while((de 
= dictNext(di
)) != NULL
) { 
7004             key 
= dictGetEntryKey(de
); 
7005             /* If the value for this key is swapped, load a preview in memory. 
7006              * We use a "swapped" flag to remember if we need to free the 
7007              * value object instead to just increment the ref count anyway 
7008              * in order to avoid copy-on-write of pages if we are forked() */ 
7009             if (!server
.vm_enabled 
|| key
->storage 
== REDIS_VM_MEMORY 
|| 
7010                 key
->storage 
== REDIS_VM_SWAPPING
) { 
7011                 o 
= dictGetEntryVal(de
); 
7014                 o 
= vmPreviewObject(key
); 
7017             expiretime 
= getExpire(db
,key
); 
7019             /* Save the key and associated value */ 
7020             if (o
->type 
== REDIS_STRING
) { 
7021                 /* Emit a SET command */ 
7022                 char cmd
[]="*3\r\n$3\r\nSET\r\n"; 
7023                 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
7025                 if (fwriteBulk(fp
,key
) == 0) goto werr
; 
7026                 if (fwriteBulk(fp
,o
) == 0) goto werr
; 
7027             } else if (o
->type 
== REDIS_LIST
) { 
7028                 /* Emit the RPUSHes needed to rebuild the list */ 
7029                 list 
*list 
= o
->ptr
; 
7033                 listRewind(list
,&li
); 
7034                 while((ln 
= listNext(&li
))) { 
7035                     char cmd
[]="*3\r\n$5\r\nRPUSH\r\n"; 
7036                     robj 
*eleobj 
= listNodeValue(ln
); 
7038                     if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
7039                     if (fwriteBulk(fp
,key
) == 0) goto werr
; 
7040                     if (fwriteBulk(fp
,eleobj
) == 0) goto werr
; 
7042             } else if (o
->type 
== REDIS_SET
) { 
7043                 /* Emit the SADDs needed to rebuild the set */ 
7045                 dictIterator 
*di 
= dictGetIterator(set
); 
7048                 while((de 
= dictNext(di
)) != NULL
) { 
7049                     char cmd
[]="*3\r\n$4\r\nSADD\r\n"; 
7050                     robj 
*eleobj 
= dictGetEntryKey(de
); 
7052                     if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
7053                     if (fwriteBulk(fp
,key
) == 0) goto werr
; 
7054                     if (fwriteBulk(fp
,eleobj
) == 0) goto werr
; 
7056                 dictReleaseIterator(di
); 
7057             } else if (o
->type 
== REDIS_ZSET
) { 
7058                 /* Emit the ZADDs needed to rebuild the sorted set */ 
7060                 dictIterator 
*di 
= dictGetIterator(zs
->dict
); 
7063                 while((de 
= dictNext(di
)) != NULL
) { 
7064                     char cmd
[]="*4\r\n$4\r\nZADD\r\n"; 
7065                     robj 
*eleobj 
= dictGetEntryKey(de
); 
7066                     double *score 
= dictGetEntryVal(de
); 
7068                     if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
7069                     if (fwriteBulk(fp
,key
) == 0) goto werr
; 
7070                     if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
; 
7071                     if (fwriteBulk(fp
,eleobj
) == 0) goto werr
; 
7073                 dictReleaseIterator(di
); 
7075                 redisAssert(0 != 0); 
7077             /* Save the expire time */ 
7078             if (expiretime 
!= -1) { 
7079                 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n"; 
7080                 /* If this key is already expired skip it */ 
7081                 if (expiretime 
< now
) continue; 
7082                 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
7083                 if (fwriteBulk(fp
,key
) == 0) goto werr
; 
7084                 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
; 
7086             if (swapped
) decrRefCount(o
); 
7088         dictReleaseIterator(di
); 
7091     /* Make sure data will not remain on the OS's output buffers */ 
7096     /* Use RENAME to make sure the DB file is changed atomically only 
7097      * if the generate DB file is ok. */ 
7098     if (rename(tmpfile
,filename
) == -1) { 
7099         redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
)); 
7103     redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed"); 
7109     redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
)); 
7110     if (di
) dictReleaseIterator(di
); 
7114 /* This is how rewriting of the append only file in background works: 
7116  * 1) The user calls BGREWRITEAOF 
7117  * 2) Redis calls this function, that forks(): 
7118  *    2a) the child rewrite the append only file in a temp file. 
7119  *    2b) the parent accumulates differences in server.bgrewritebuf. 
7120  * 3) When the child finished '2a' exists. 
7121  * 4) The parent will trap the exit code, if it's OK, will append the 
7122  *    data accumulated into server.bgrewritebuf into the temp file, and 
7123  *    finally will rename(2) the temp file in the actual file name. 
7124  *    The the new file is reopened as the new append only file. Profit! 
7126 static int rewriteAppendOnlyFileBackground(void) { 
7129     if (server
.bgrewritechildpid 
!= -1) return REDIS_ERR
; 
7130     if (server
.vm_enabled
) waitEmptyIOJobsQueue(); 
7131     if ((childpid 
= fork()) == 0) { 
7135         if (server
.vm_enabled
) vmReopenSwapFile(); 
7137         snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid()); 
7138         if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) { 
7145         if (childpid 
== -1) { 
7146             redisLog(REDIS_WARNING
, 
7147                 "Can't rewrite append only file in background: fork: %s", 
7151         redisLog(REDIS_NOTICE
, 
7152             "Background append only file rewriting started by pid %d",childpid
); 
7153         server
.bgrewritechildpid 
= childpid
; 
7154         /* We set appendseldb to -1 in order to force the next call to the 
7155          * feedAppendOnlyFile() to issue a SELECT command, so the differences 
7156          * accumulated by the parent into server.bgrewritebuf will start 
7157          * with a SELECT statement and it will be safe to merge. */ 
7158         server
.appendseldb 
= -1; 
7161     return REDIS_OK
; /* unreached */ 
7164 static void bgrewriteaofCommand(redisClient 
*c
) { 
7165     if (server
.bgrewritechildpid 
!= -1) { 
7166         addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n")); 
7169     if (rewriteAppendOnlyFileBackground() == REDIS_OK
) { 
7170         char *status 
= "+Background append only file rewriting started\r\n"; 
7171         addReplySds(c
,sdsnew(status
)); 
7173         addReply(c
,shared
.err
); 
7177 static void aofRemoveTempFile(pid_t childpid
) { 
7180     snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
); 
7184 /* Virtual Memory is composed mainly of two subsystems: 
7185  * - Blocking Virutal Memory 
7186  * - Threaded Virtual Memory I/O 
7187  * The two parts are not fully decoupled, but functions are split among two 
7188  * different sections of the source code (delimited by comments) in order to 
7189  * make more clear what functionality is about the blocking VM and what about 
7190  * the threaded (not blocking) VM. 
7194  * Redis VM is a blocking VM (one that blocks reading swapped values from 
7195  * disk into memory when a value swapped out is needed in memory) that is made 
7196  * unblocking by trying to examine the command argument vector in order to 
7197  * load in background values that will likely be needed in order to exec 
7198  * the command. The command is executed only once all the relevant keys 
7199  * are loaded into memory. 
7201  * This basically is almost as simple of a blocking VM, but almost as parallel 
7202  * as a fully non-blocking VM. 
7205 /* =================== Virtual Memory - Blocking Side  ====================== */ 
7207 /* substitute the first occurrence of '%p' with the process pid in the 
7208  * swap file name. */ 
7209 static void expandVmSwapFilename(void) { 
7210     char *p 
= strstr(server
.vm_swap_file
,"%p"); 
7216     new = sdscat(new,server
.vm_swap_file
); 
7217     new = sdscatprintf(new,"%ld",(long) getpid()); 
7218     new = sdscat(new,p
+2); 
7219     zfree(server
.vm_swap_file
); 
7220     server
.vm_swap_file 
= new; 
7223 static void vmInit(void) { 
7228     if (server
.vm_max_threads 
!= 0) 
7229         zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */ 
7231     expandVmSwapFilename(); 
7232     redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
); 
7233     if ((server
.vm_fp 
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) { 
7234         server
.vm_fp 
= fopen(server
.vm_swap_file
,"w+b"); 
7236     if (server
.vm_fp 
== NULL
) { 
7237         redisLog(REDIS_WARNING
, 
7238             "Impossible to open the swap file: %s. Exiting.", 
7242     server
.vm_fd 
= fileno(server
.vm_fp
); 
7243     server
.vm_next_page 
= 0; 
7244     server
.vm_near_pages 
= 0; 
7245     server
.vm_stats_used_pages 
= 0; 
7246     server
.vm_stats_swapped_objects 
= 0; 
7247     server
.vm_stats_swapouts 
= 0; 
7248     server
.vm_stats_swapins 
= 0; 
7249     totsize 
= server
.vm_pages
*server
.vm_page_size
; 
7250     redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
); 
7251     if (ftruncate(server
.vm_fd
,totsize
) == -1) { 
7252         redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.", 
7256         redisLog(REDIS_NOTICE
,"Swap file allocated with success"); 
7258     server
.vm_bitmap 
= zmalloc((server
.vm_pages
+7)/8); 
7259     redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages", 
7260         (long long) (server
.vm_pages
+7)/8, server
.vm_pages
); 
7261     memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8); 
7263     /* Initialize threaded I/O (used by Virtual Memory) */ 
7264     server
.io_newjobs 
= listCreate(); 
7265     server
.io_processing 
= listCreate(); 
7266     server
.io_processed 
= listCreate(); 
7267     server
.io_ready_clients 
= listCreate(); 
7268     pthread_mutex_init(&server
.io_mutex
,NULL
); 
7269     pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
); 
7270     pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
); 
7271     server
.io_active_threads 
= 0; 
7272     if (pipe(pipefds
) == -1) { 
7273         redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting." 
7277     server
.io_ready_pipe_read 
= pipefds
[0]; 
7278     server
.io_ready_pipe_write 
= pipefds
[1]; 
7279     redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
); 
7280     /* LZF requires a lot of stack */ 
7281     pthread_attr_init(&server
.io_threads_attr
); 
7282     pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
); 
7283     while (stacksize 
< REDIS_THREAD_STACK_SIZE
) stacksize 
*= 2; 
7284     pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
); 
7285     /* Listen for events in the threaded I/O pipe */ 
7286     if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
, 
7287         vmThreadedIOCompletedJob
, NULL
) == AE_ERR
) 
7288         oom("creating file event"); 
7291 /* Mark the page as used */ 
7292 static void vmMarkPageUsed(off_t page
) { 
7293     off_t byte 
= page
/8; 
7295     redisAssert(vmFreePage(page
) == 1); 
7296     server
.vm_bitmap
[byte
] |= 1<<bit
; 
7299 /* Mark N contiguous pages as used, with 'page' being the first. */ 
7300 static void vmMarkPagesUsed(off_t page
, off_t count
) { 
7303     for (j 
= 0; j 
< count
; j
++) 
7304         vmMarkPageUsed(page
+j
); 
7305     server
.vm_stats_used_pages 
+= count
; 
7306     redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n", 
7307         (long long)count
, (long long)page
); 
7310 /* Mark the page as free */ 
7311 static void vmMarkPageFree(off_t page
) { 
7312     off_t byte 
= page
/8; 
7314     redisAssert(vmFreePage(page
) == 0); 
7315     server
.vm_bitmap
[byte
] &= ~(1<<bit
); 
7318 /* Mark N contiguous pages as free, with 'page' being the first. */ 
7319 static void vmMarkPagesFree(off_t page
, off_t count
) { 
7322     for (j 
= 0; j 
< count
; j
++) 
7323         vmMarkPageFree(page
+j
); 
7324     server
.vm_stats_used_pages 
-= count
; 
7325     redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n", 
7326         (long long)count
, (long long)page
); 
7329 /* Test if the page is free */ 
7330 static int vmFreePage(off_t page
) { 
7331     off_t byte 
= page
/8; 
7333     return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0; 
7336 /* Find N contiguous free pages storing the first page of the cluster in *first. 
7337  * Returns REDIS_OK if it was able to find N contiguous pages, otherwise  
7338  * REDIS_ERR is returned. 
7340  * This function uses a simple algorithm: we try to allocate 
7341  * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start 
7342  * again from the start of the swap file searching for free spaces. 
7344  * If it looks pretty clear that there are no free pages near our offset 
7345  * we try to find less populated places doing a forward jump of 
7346  * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages 
7347  * without hurry, and then we jump again and so forth... 
7349  * This function can be improved using a free list to avoid to guess 
7350  * too much, since we could collect data about freed pages. 
7352  * note: I implemented this function just after watching an episode of 
7353  * Battlestar Galactica, where the hybrid was continuing to say "JUMP!" 
7355 static int vmFindContiguousPages(off_t 
*first
, off_t n
) { 
7356     off_t base
, offset 
= 0, since_jump 
= 0, numfree 
= 0; 
7358     if (server
.vm_near_pages 
== REDIS_VM_MAX_NEAR_PAGES
) { 
7359         server
.vm_near_pages 
= 0; 
7360         server
.vm_next_page 
= 0; 
7362     server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */ 
7363     base 
= server
.vm_next_page
; 
7365     while(offset 
< server
.vm_pages
) { 
7366         off_t 
this = base
+offset
; 
7368         /* If we overflow, restart from page zero */ 
7369         if (this >= server
.vm_pages
) { 
7370             this -= server
.vm_pages
; 
7372                 /* Just overflowed, what we found on tail is no longer 
7373                  * interesting, as it's no longer contiguous. */ 
7377         if (vmFreePage(this)) { 
7378             /* This is a free page */ 
7380             /* Already got N free pages? Return to the caller, with success */ 
7382                 *first 
= this-(n
-1); 
7383                 server
.vm_next_page 
= this+1; 
7384                 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
); 
7388             /* The current one is not a free page */ 
7392         /* Fast-forward if the current page is not free and we already 
7393          * searched enough near this place. */ 
7395         if (!numfree 
&& since_jump 
>= REDIS_VM_MAX_RANDOM_JUMP
/4) { 
7396             offset 
+= random() % REDIS_VM_MAX_RANDOM_JUMP
; 
7398             /* Note that even if we rewind after the jump, we are don't need 
7399              * to make sure numfree is set to zero as we only jump *if* it 
7400              * is set to zero. */ 
7402             /* Otherwise just check the next page */ 
7409 /* Write the specified object at the specified page of the swap file */ 
7410 static int vmWriteObjectOnSwap(robj 
*o
, off_t page
) { 
7411     if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
); 
7412     if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) { 
7413         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
); 
7414         redisLog(REDIS_WARNING
, 
7415             "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s", 
7419     rdbSaveObject(server
.vm_fp
,o
); 
7420     fflush(server
.vm_fp
); 
7421     if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
); 
7425 /* Swap the 'val' object relative to 'key' into disk. Store all the information 
7426  * needed to later retrieve the object into the key object. 
7427  * If we can't find enough contiguous empty pages to swap the object on disk 
7428  * REDIS_ERR is returned. */ 
7429 static int vmSwapObjectBlocking(robj 
*key
, robj 
*val
) { 
7430     off_t pages 
= rdbSavedObjectPages(val
,NULL
); 
7433     assert(key
->storage 
== REDIS_VM_MEMORY
); 
7434     assert(key
->refcount 
== 1); 
7435     if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
; 
7436     if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
; 
7437     key
->vm
.page 
= page
; 
7438     key
->vm
.usedpages 
= pages
; 
7439     key
->storage 
= REDIS_VM_SWAPPED
; 
7440     key
->vtype 
= val
->type
; 
7441     decrRefCount(val
); /* Deallocate the object from memory. */ 
7442     vmMarkPagesUsed(page
,pages
); 
7443     redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)", 
7444         (unsigned char*) key
->ptr
, 
7445         (unsigned long long) page
, (unsigned long long) pages
); 
7446     server
.vm_stats_swapped_objects
++; 
7447     server
.vm_stats_swapouts
++; 
7451 static robj 
*vmReadObjectFromSwap(off_t page
, int type
) { 
7454     if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
); 
7455     if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) { 
7456         redisLog(REDIS_WARNING
, 
7457             "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s", 
7461     o 
= rdbLoadObject(type
,server
.vm_fp
); 
7463         redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
)); 
7466     if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
); 
7470 /* Load the value object relative to the 'key' object from swap to memory. 
7471  * The newly allocated object is returned. 
7473  * If preview is true the unserialized object is returned to the caller but 
7474  * no changes are made to the key object, nor the pages are marked as freed */ 
7475 static robj 
*vmGenericLoadObject(robj 
*key
, int preview
) { 
7478     redisAssert(key
->storage 
== REDIS_VM_SWAPPED 
|| key
->storage 
== REDIS_VM_LOADING
); 
7479     val 
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
); 
7481         key
->storage 
= REDIS_VM_MEMORY
; 
7482         key
->vm
.atime 
= server
.unixtime
; 
7483         vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
); 
7484         redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk", 
7485             (unsigned char*) key
->ptr
); 
7486         server
.vm_stats_swapped_objects
--; 
7488         redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk", 
7489             (unsigned char*) key
->ptr
); 
7491     server
.vm_stats_swapins
++; 
7495 /* Plain object loading, from swap to memory */ 
7496 static robj 
*vmLoadObject(robj 
*key
) { 
7497     /* If we are loading the object in background, stop it, we 
7498      * need to load this object synchronously ASAP. */ 
7499     if (key
->storage 
== REDIS_VM_LOADING
) 
7500         vmCancelThreadedIOJob(key
); 
7501     return vmGenericLoadObject(key
,0); 
7504 /* Just load the value on disk, without to modify the key. 
7505  * This is useful when we want to perform some operation on the value 
7506  * without to really bring it from swap to memory, like while saving the 
7507  * dataset or rewriting the append only log. */ 
7508 static robj 
*vmPreviewObject(robj 
*key
) { 
7509     return vmGenericLoadObject(key
,1); 
7512 /* How a good candidate is this object for swapping? 
7513  * The better candidate it is, the greater the returned value. 
7515  * Currently we try to perform a fast estimation of the object size in 
7516  * memory, and combine it with aging informations. 
7518  * Basically swappability = idle-time * log(estimated size) 
7520  * Bigger objects are preferred over smaller objects, but not 
7521  * proportionally, this is why we use the logarithm. This algorithm is 
7522  * just a first try and will probably be tuned later. */ 
7523 static double computeObjectSwappability(robj 
*o
) { 
7524     time_t age 
= server
.unixtime 
- o
->vm
.atime
; 
7528     struct dictEntry 
*de
; 
7531     if (age 
<= 0) return 0; 
7534         if (o
->encoding 
!= REDIS_ENCODING_RAW
) { 
7537             asize 
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2; 
7542         listNode 
*ln 
= listFirst(l
); 
7544         asize 
= sizeof(list
); 
7546             robj 
*ele 
= ln
->value
; 
7549             elesize 
= (ele
->encoding 
== REDIS_ENCODING_RAW
) ? 
7550                             (sizeof(*o
)+sdslen(ele
->ptr
)) : 
7552             asize 
+= (sizeof(listNode
)+elesize
)*listLength(l
); 
7557         z 
= (o
->type 
== REDIS_ZSET
); 
7558         d 
= z 
? ((zset
*)o
->ptr
)->dict 
: o
->ptr
; 
7560         asize 
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
)); 
7561         if (z
) asize 
+= sizeof(zset
)-sizeof(dict
); 
7566             de 
= dictGetRandomKey(d
); 
7567             ele 
= dictGetEntryKey(de
); 
7568             elesize 
= (ele
->encoding 
== REDIS_ENCODING_RAW
) ? 
7569                             (sizeof(*o
)+sdslen(ele
->ptr
)) : 
7571             asize 
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
); 
7572             if (z
) asize 
+= sizeof(zskiplistNode
)*dictSize(d
); 
7576     return (double)age
*log(1+asize
); 
7579 /* Try to swap an object that's a good candidate for swapping. 
7580  * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible 
7581  * to swap any object at all. 
7583  * If 'usethreaded' is true, Redis will try to swap the object in background 
7584  * using I/O threads. */ 
7585 static int vmSwapOneObject(int usethreads
) { 
7587     struct dictEntry 
*best 
= NULL
; 
7588     double best_swappability 
= 0; 
7589     redisDb 
*best_db 
= NULL
; 
7592     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
7593         redisDb 
*db 
= server
.db
+j
; 
7594         /* Why maxtries is set to 100? 
7595          * Because this way (usually) we'll find 1 object even if just 1% - 2% 
7596          * are swappable objects */ 
7599         if (dictSize(db
->dict
) == 0) continue; 
7600         for (i 
= 0; i 
< 5; i
++) { 
7602             double swappability
; 
7604             if (maxtries
) maxtries
--; 
7605             de 
= dictGetRandomKey(db
->dict
); 
7606             key 
= dictGetEntryKey(de
); 
7607             val 
= dictGetEntryVal(de
); 
7608             /* Only swap objects that are currently in memory. 
7610              * Also don't swap shared objects if threaded VM is on, as we 
7611              * try to ensure that the main thread does not touch the 
7612              * object while the I/O thread is using it, but we can't 
7613              * control other keys without adding additional mutex. */ 
7614             if (key
->storage 
!= REDIS_VM_MEMORY 
|| 
7615                 (server
.vm_max_threads 
!= 0 && val
->refcount 
!= 1)) { 
7616                 if (maxtries
) i
--; /* don't count this try */ 
7619             swappability 
= computeObjectSwappability(val
); 
7620             if (!best 
|| swappability 
> best_swappability
) { 
7622                 best_swappability 
= swappability
; 
7627     if (best 
== NULL
) return REDIS_ERR
; 
7628     key 
= dictGetEntryKey(best
); 
7629     val 
= dictGetEntryVal(best
); 
7631     redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f", 
7632         key
->ptr
, best_swappability
); 
7634     /* Unshare the key if needed */ 
7635     if (key
->refcount 
> 1) { 
7636         robj 
*newkey 
= dupStringObject(key
); 
7638         key 
= dictGetEntryKey(best
) = newkey
; 
7642         vmSwapObjectThreaded(key
,val
,best_db
); 
7645         if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) { 
7646             dictGetEntryVal(best
) = NULL
; 
7654 static int vmSwapOneObjectBlocking() { 
7655     return vmSwapOneObject(0); 
7658 static int vmSwapOneObjectThreaded() { 
7659     return vmSwapOneObject(1); 
7662 /* Return true if it's safe to swap out objects in a given moment. 
7663  * Basically we don't want to swap objects out while there is a BGSAVE 
7664  * or a BGAEOREWRITE running in backgroud. */ 
7665 static int vmCanSwapOut(void) { 
7666     return (server
.bgsavechildpid 
== -1 && server
.bgrewritechildpid 
== -1); 
7669 /* Delete a key if swapped. Returns 1 if the key was found, was swapped 
7670  * and was deleted. Otherwise 0 is returned. */ 
7671 static int deleteIfSwapped(redisDb 
*db
, robj 
*key
) { 
7675     if ((de 
= dictFind(db
->dict
,key
)) == NULL
) return 0; 
7676     foundkey 
= dictGetEntryKey(de
); 
7677     if (foundkey
->storage 
== REDIS_VM_MEMORY
) return 0; 
7682 /* =================== Virtual Memory - Threaded I/O  ======================= */ 
7684 static void freeIOJob(iojob 
*j
) { 
7685     if ((j
->type 
== REDIS_IOJOB_PREPARE_SWAP 
|| 
7686         j
->type 
== REDIS_IOJOB_DO_SWAP 
|| 
7687         j
->type 
== REDIS_IOJOB_LOAD
) && j
->val 
!= NULL
) 
7688         decrRefCount(j
->val
); 
7689     decrRefCount(j
->key
); 
7693 /* Every time a thread finished a Job, it writes a byte into the write side 
7694  * of an unix pipe in order to "awake" the main thread, and this function 
7696 static void vmThreadedIOCompletedJob(aeEventLoop 
*el
, int fd
, void *privdata
, 
7700     int retval
, processed 
= 0, toprocess 
= -1, trytoswap 
= 1; 
7702     REDIS_NOTUSED(mask
); 
7703     REDIS_NOTUSED(privdata
); 
7705     /* For every byte we read in the read side of the pipe, there is one 
7706      * I/O job completed to process. */ 
7707     while((retval 
= read(fd
,buf
,1)) == 1) { 
7711         struct dictEntry 
*de
; 
7713         redisLog(REDIS_DEBUG
,"Processing I/O completed job"); 
7715         /* Get the processed element (the oldest one) */ 
7717         assert(listLength(server
.io_processed
) != 0); 
7718         if (toprocess 
== -1) { 
7719             toprocess 
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100; 
7720             if (toprocess 
<= 0) toprocess 
= 1; 
7722         ln 
= listFirst(server
.io_processed
); 
7724         listDelNode(server
.io_processed
,ln
); 
7726         /* If this job is marked as canceled, just ignore it */ 
7731         /* Post process it in the main thread, as there are things we 
7732          * can do just here to avoid race conditions and/or invasive locks */ 
7733         redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
); 
7734         de 
= dictFind(j
->db
->dict
,j
->key
); 
7736         key 
= dictGetEntryKey(de
); 
7737         if (j
->type 
== REDIS_IOJOB_LOAD
) { 
7740             /* Key loaded, bring it at home */ 
7741             key
->storage 
= REDIS_VM_MEMORY
; 
7742             key
->vm
.atime 
= server
.unixtime
; 
7743             vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
); 
7744             redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)", 
7745                 (unsigned char*) key
->ptr
); 
7746             server
.vm_stats_swapped_objects
--; 
7747             server
.vm_stats_swapins
++; 
7748             dictGetEntryVal(de
) = j
->val
; 
7749             incrRefCount(j
->val
); 
7752             /* Handle clients waiting for this key to be loaded. */ 
7753             handleClientsBlockedOnSwappedKey(db
,key
); 
7754         } else if (j
->type 
== REDIS_IOJOB_PREPARE_SWAP
) { 
7755             /* Now we know the amount of pages required to swap this object. 
7756              * Let's find some space for it, and queue this task again 
7757              * rebranded as REDIS_IOJOB_DO_SWAP. */ 
7758             if (!vmCanSwapOut() || 
7759                 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
) 
7761                 /* Ooops... no space or we can't swap as there is 
7762                  * a fork()ed Redis trying to save stuff on disk. */ 
7764                 key
->storage 
= REDIS_VM_MEMORY
; /* undo operation */ 
7766                 /* Note that we need to mark this pages as used now, 
7767                  * if the job will be canceled, we'll mark them as freed 
7769                 vmMarkPagesUsed(j
->page
,j
->pages
); 
7770                 j
->type 
= REDIS_IOJOB_DO_SWAP
; 
7775         } else if (j
->type 
== REDIS_IOJOB_DO_SWAP
) { 
7778             /* Key swapped. We can finally free some memory. */ 
7779             if (key
->storage 
!= REDIS_VM_SWAPPING
) { 
7780                 printf("key->storage: %d\n",key
->storage
); 
7781                 printf("key->name: %s\n",(char*)key
->ptr
); 
7782                 printf("key->refcount: %d\n",key
->refcount
); 
7783                 printf("val: %p\n",(void*)j
->val
); 
7784                 printf("val->type: %d\n",j
->val
->type
); 
7785                 printf("val->ptr: %s\n",(char*)j
->val
->ptr
); 
7787             redisAssert(key
->storage 
== REDIS_VM_SWAPPING
); 
7788             val 
= dictGetEntryVal(de
); 
7789             key
->vm
.page 
= j
->page
; 
7790             key
->vm
.usedpages 
= j
->pages
; 
7791             key
->storage 
= REDIS_VM_SWAPPED
; 
7792             key
->vtype 
= j
->val
->type
; 
7793             decrRefCount(val
); /* Deallocate the object from memory. */ 
7794             dictGetEntryVal(de
) = NULL
; 
7795             redisLog(REDIS_DEBUG
, 
7796                 "VM: object %s swapped out at %lld (%lld pages) (threaded)", 
7797                 (unsigned char*) key
->ptr
, 
7798                 (unsigned long long) j
->page
, (unsigned long long) j
->pages
); 
7799             server
.vm_stats_swapped_objects
++; 
7800             server
.vm_stats_swapouts
++; 
7802             /* Put a few more swap requests in queue if we are still 
7804             if (trytoswap 
&& vmCanSwapOut() && 
7805                 zmalloc_used_memory() > server
.vm_max_memory
) 
7810                     more 
= listLength(server
.io_newjobs
) < 
7811                             (unsigned) server
.vm_max_threads
; 
7813                     /* Don't waste CPU time if swappable objects are rare. */ 
7814                     if (vmSwapOneObjectThreaded() == REDIS_ERR
) { 
7822         if (processed 
== toprocess
) return; 
7824     if (retval 
< 0 && errno 
!= EAGAIN
) { 
7825         redisLog(REDIS_WARNING
, 
7826             "WARNING: read(2) error in vmThreadedIOCompletedJob() %s", 
7831 static void lockThreadedIO(void) { 
7832     pthread_mutex_lock(&server
.io_mutex
); 
7835 static void unlockThreadedIO(void) { 
7836     pthread_mutex_unlock(&server
.io_mutex
); 
7839 /* Remove the specified object from the threaded I/O queue if still not 
7840  * processed, otherwise make sure to flag it as canceled. */ 
7841 static void vmCancelThreadedIOJob(robj 
*o
) { 
7843         server
.io_newjobs
,      /* 0 */ 
7844         server
.io_processing
,   /* 1 */ 
7845         server
.io_processed     
/* 2 */ 
7849     assert(o
->storage 
== REDIS_VM_LOADING 
|| o
->storage 
== REDIS_VM_SWAPPING
); 
7852     /* Search for a matching key in one of the queues */ 
7853     for (i 
= 0; i 
< 3; i
++) { 
7857         listRewind(lists
[i
],&li
); 
7858         while ((ln 
= listNext(&li
)) != NULL
) { 
7859             iojob 
*job 
= ln
->value
; 
7861             if (job
->canceled
) continue; /* Skip this, already canceled. */ 
7862             if (compareStringObjects(job
->key
,o
) == 0) { 
7863                 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n", 
7864                     (void*)job
, (char*)o
->ptr
, job
->type
, i
); 
7865                 /* Mark the pages as free since the swap didn't happened 
7866                  * or happened but is now discarded. */ 
7867                 if (i 
!= 1 && job
->type 
== REDIS_IOJOB_DO_SWAP
) 
7868                     vmMarkPagesFree(job
->page
,job
->pages
); 
7869                 /* Cancel the job. It depends on the list the job is 
7872                 case 0: /* io_newjobs */ 
7873                     /* If the job was yet not processed the best thing to do 
7874                      * is to remove it from the queue at all */ 
7876                     listDelNode(lists
[i
],ln
); 
7878                 case 1: /* io_processing */ 
7879                     /* Oh Shi- the thread is messing with the Job: 
7881                      * Probably it's accessing the object if this is a 
7882                      * PREPARE_SWAP or DO_SWAP job. 
7883                      * If it's a LOAD job it may be reading from disk and 
7884                      * if we don't wait for the job to terminate before to 
7885                      * cancel it, maybe in a few microseconds data can be 
7886                      * corrupted in this pages. So the short story is: 
7888                      * Better to wait for the job to move into the 
7889                      * next queue (processed)... */ 
7891                     /* We try again and again until the job is completed. */ 
7893                     /* But let's wait some time for the I/O thread 
7894                      * to finish with this job. After all this condition 
7895                      * should be very rare. */ 
7898                 case 2: /* io_processed */ 
7899                     /* The job was already processed, that's easy... 
7900                      * just mark it as canceled so that we'll ignore it 
7901                      * when processing completed jobs. */ 
7905                 /* Finally we have to adjust the storage type of the object 
7906                  * in order to "UNDO" the operaiton. */ 
7907                 if (o
->storage 
== REDIS_VM_LOADING
) 
7908                     o
->storage 
= REDIS_VM_SWAPPED
; 
7909                 else if (o
->storage 
== REDIS_VM_SWAPPING
) 
7910                     o
->storage 
= REDIS_VM_MEMORY
; 
7917     assert(1 != 1); /* We should never reach this */ 
7920 static void *IOThreadEntryPoint(void *arg
) { 
7925     pthread_detach(pthread_self()); 
7927         /* Get a new job to process */ 
7929         if (listLength(server
.io_newjobs
) == 0) { 
7930             /* No new jobs in queue, exit. */ 
7931             redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do", 
7932                 (long) pthread_self()); 
7933             server
.io_active_threads
--; 
7937         ln 
= listFirst(server
.io_newjobs
); 
7939         listDelNode(server
.io_newjobs
,ln
); 
7940         /* Add the job in the processing queue */ 
7941         j
->thread 
= pthread_self(); 
7942         listAddNodeTail(server
.io_processing
,j
); 
7943         ln 
= listLast(server
.io_processing
); /* We use ln later to remove it */ 
7945         redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'", 
7946             (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
); 
7948         /* Process the Job */ 
7949         if (j
->type 
== REDIS_IOJOB_LOAD
) { 
7950             j
->val 
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
); 
7951         } else if (j
->type 
== REDIS_IOJOB_PREPARE_SWAP
) { 
7952             FILE *fp 
= fopen("/dev/null","w+"); 
7953             j
->pages 
= rdbSavedObjectPages(j
->val
,fp
); 
7955         } else if (j
->type 
== REDIS_IOJOB_DO_SWAP
) { 
7956             if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
) 
7960         /* Done: insert the job into the processed queue */ 
7961         redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)", 
7962             (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
); 
7964         listDelNode(server
.io_processing
,ln
); 
7965         listAddNodeTail(server
.io_processed
,j
); 
7968         /* Signal the main thread there is new stuff to process */ 
7969         assert(write(server
.io_ready_pipe_write
,"x",1) == 1); 
7971     return NULL
; /* never reached */ 
7974 static void spawnIOThread(void) { 
7976     sigset_t mask
, omask
; 
7979     sigaddset(&mask
,SIGCHLD
); 
7980     sigaddset(&mask
,SIGHUP
); 
7981     sigaddset(&mask
,SIGPIPE
); 
7982     pthread_sigmask(SIG_SETMASK
, &mask
, &omask
); 
7983     pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
); 
7984     pthread_sigmask(SIG_SETMASK
, &omask
, NULL
); 
7985     server
.io_active_threads
++; 
7988 /* We need to wait for the last thread to exit before we are able to 
7989  * fork() in order to BGSAVE or BGREWRITEAOF. */ 
7990 static void waitEmptyIOJobsQueue(void) { 
7992         int io_processed_len
; 
7995         if (listLength(server
.io_newjobs
) == 0 && 
7996             listLength(server
.io_processing
) == 0 && 
7997             server
.io_active_threads 
== 0) 
8002         /* While waiting for empty jobs queue condition we post-process some 
8003          * finshed job, as I/O threads may be hanging trying to write against 
8004          * the io_ready_pipe_write FD but there are so much pending jobs that 
8006         io_processed_len 
= listLength(server
.io_processed
); 
8008         if (io_processed_len
) { 
8009             vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0); 
8010             usleep(1000); /* 1 millisecond */ 
8012             usleep(10000); /* 10 milliseconds */ 
8017 static void vmReopenSwapFile(void) { 
8018     /* Note: we don't close the old one as we are in the child process 
8019      * and don't want to mess at all with the original file object. */ 
8020     server
.vm_fp 
= fopen(server
.vm_swap_file
,"r+b"); 
8021     if (server
.vm_fp 
== NULL
) { 
8022         redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.", 
8023             server
.vm_swap_file
); 
8026     server
.vm_fd 
= fileno(server
.vm_fp
); 
8029 /* This function must be called while with threaded IO locked */ 
8030 static void queueIOJob(iojob 
*j
) { 
8031     redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n", 
8032         (void*)j
, j
->type
, (char*)j
->key
->ptr
); 
8033     listAddNodeTail(server
.io_newjobs
,j
); 
8034     if (server
.io_active_threads 
< server
.vm_max_threads
) 
8038 static int vmSwapObjectThreaded(robj 
*key
, robj 
*val
, redisDb 
*db
) { 
8041     assert(key
->storage 
== REDIS_VM_MEMORY
); 
8042     assert(key
->refcount 
== 1); 
8044     j 
= zmalloc(sizeof(*j
)); 
8045     j
->type 
= REDIS_IOJOB_PREPARE_SWAP
; 
8047     j
->key 
= dupStringObject(key
); 
8051     j
->thread 
= (pthread_t
) -1; 
8052     key
->storage 
= REDIS_VM_SWAPPING
; 
8060 /* ============ Virtual Memory - Blocking clients on missing keys =========== */ 
8062 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded. 
8063  * If there is not already a job loading the key, it is craeted. 
8064  * The key is added to the io_keys list in the client structure, and also 
8065  * in the hash table mapping swapped keys to waiting clients, that is, 
8066  * server.io_waited_keys. */ 
8067 static int waitForSwappedKey(redisClient 
*c
, robj 
*key
) { 
8068     struct dictEntry 
*de
; 
8072     /* If the key does not exist or is already in RAM we don't need to 
8073      * block the client at all. */ 
8074     de 
= dictFind(c
->db
->dict
,key
); 
8075     if (de 
== NULL
) return 0; 
8076     o 
= dictGetEntryKey(de
); 
8077     if (o
->storage 
== REDIS_VM_MEMORY
) { 
8079     } else if (o
->storage 
== REDIS_VM_SWAPPING
) { 
8080         /* We were swapping the key, undo it! */ 
8081         vmCancelThreadedIOJob(o
); 
8085     /* OK: the key is either swapped, or being loaded just now. */ 
8087     /* Add the key to the list of keys this client is waiting for. 
8088      * This maps clients to keys they are waiting for. */ 
8089     listAddNodeTail(c
->io_keys
,key
); 
8092     /* Add the client to the swapped keys => clients waiting map. */ 
8093     de 
= dictFind(c
->db
->io_keys
,key
); 
8097         /* For every key we take a list of clients blocked for it */ 
8099         retval 
= dictAdd(c
->db
->io_keys
,key
,l
); 
8101         assert(retval 
== DICT_OK
); 
8103         l 
= dictGetEntryVal(de
); 
8105     listAddNodeTail(l
,c
); 
8107     /* Are we already loading the key from disk? If not create a job */ 
8108     if (o
->storage 
== REDIS_VM_SWAPPED
) { 
8111         o
->storage 
= REDIS_VM_LOADING
; 
8112         j 
= zmalloc(sizeof(*j
)); 
8113         j
->type 
= REDIS_IOJOB_LOAD
; 
8115         j
->key 
= dupStringObject(key
); 
8116         j
->key
->vtype 
= o
->vtype
; 
8117         j
->page 
= o
->vm
.page
; 
8120         j
->thread 
= (pthread_t
) -1; 
8128 /* Is this client attempting to run a command against swapped keys? 
8129  * If so, block it ASAP, load the keys in background, then resume it. 
8131  * The important idea about this function is that it can fail! If keys will 
8132  * still be swapped when the client is resumed, this key lookups will 
8133  * just block loading keys from disk. In practical terms this should only 
8134  * happen with SORT BY command or if there is a bug in this function. 
8136  * Return 1 if the client is marked as blocked, 0 if the client can 
8137  * continue as the keys it is going to access appear to be in memory. */ 
8138 static int blockClientOnSwappedKeys(struct redisCommand 
*cmd
, redisClient 
*c
) { 
8141     if (cmd
->vm_firstkey 
== 0) return 0; 
8142     last 
= cmd
->vm_lastkey
; 
8143     if (last 
< 0) last 
= c
->argc
+last
; 
8144     for (j 
= cmd
->vm_firstkey
; j 
<= last
; j 
+= cmd
->vm_keystep
) 
8145         waitForSwappedKey(c
,c
->argv
[j
]); 
8146     /* If the client was blocked for at least one key, mark it as blocked. */ 
8147     if (listLength(c
->io_keys
)) { 
8148         c
->flags 
|= REDIS_IO_WAIT
; 
8149         aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
); 
8150         server
.vm_blocked_clients
++; 
8157 /* Remove the 'key' from the list of blocked keys for a given client. 
8159  * The function returns 1 when there are no longer blocking keys after 
8160  * the current one was removed (and the client can be unblocked). */ 
8161 static int dontWaitForSwappedKey(redisClient 
*c
, robj 
*key
) { 
8165     struct dictEntry 
*de
; 
8167     /* Remove the key from the list of keys this client is waiting for. */ 
8168     listRewind(c
->io_keys
,&li
); 
8169     while ((ln 
= listNext(&li
)) != NULL
) { 
8170         if (compareStringObjects(ln
->value
,key
) == 0) { 
8171             listDelNode(c
->io_keys
,ln
); 
8177     /* Remove the client form the key => waiting clients map. */ 
8178     de 
= dictFind(c
->db
->io_keys
,key
); 
8180     l 
= dictGetEntryVal(de
); 
8181     ln 
= listSearchKey(l
,c
); 
8184     if (listLength(l
) == 0) 
8185         dictDelete(c
->db
->io_keys
,key
); 
8187     return listLength(c
->io_keys
) == 0; 
8190 static void handleClientsBlockedOnSwappedKey(redisDb 
*db
, robj 
*key
) { 
8191     struct dictEntry 
*de
; 
8196     de 
= dictFind(db
->io_keys
,key
); 
8199     l 
= dictGetEntryVal(de
); 
8200     len 
= listLength(l
); 
8201     /* Note: we can't use something like while(listLength(l)) as the list 
8202      * can be freed by the calling function when we remove the last element. */ 
8205         redisClient 
*c 
= ln
->value
; 
8207         if (dontWaitForSwappedKey(c
,key
)) { 
8208             /* Put the client in the list of clients ready to go as we 
8209              * loaded all the keys about it. */ 
8210             listAddNodeTail(server
.io_ready_clients
,c
); 
8215 /* ================================= Debugging ============================== */ 
8217 static void debugCommand(redisClient 
*c
) { 
8218     if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) { 
8220     } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) { 
8221         if (rdbSave(server
.dbfilename
) != REDIS_OK
) { 
8222             addReply(c
,shared
.err
); 
8226         if (rdbLoad(server
.dbfilename
) != REDIS_OK
) { 
8227             addReply(c
,shared
.err
); 
8230         redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD"); 
8231         addReply(c
,shared
.ok
); 
8232     } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) { 
8234         if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) { 
8235             addReply(c
,shared
.err
); 
8238         redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF"); 
8239         addReply(c
,shared
.ok
); 
8240     } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc 
== 3) { 
8241         dictEntry 
*de 
= dictFind(c
->db
->dict
,c
->argv
[2]); 
8245             addReply(c
,shared
.nokeyerr
); 
8248         key 
= dictGetEntryKey(de
); 
8249         val 
= dictGetEntryVal(de
); 
8250         if (!server
.vm_enabled 
|| (key
->storage 
== REDIS_VM_MEMORY 
|| 
8251                                    key
->storage 
== REDIS_VM_SWAPPING
)) { 
8252             addReplySds(c
,sdscatprintf(sdsempty(), 
8253                 "+Key at:%p refcount:%d, value at:%p refcount:%d " 
8254                 "encoding:%d serializedlength:%lld\r\n", 
8255                 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
, 
8256                 val
->encoding
, (long long) rdbSavedObjectLen(val
,NULL
))); 
8258             addReplySds(c
,sdscatprintf(sdsempty(), 
8259                 "+Key at:%p refcount:%d, value swapped at: page %llu " 
8260                 "using %llu pages\r\n", 
8261                 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
, 
8262                 (unsigned long long) key
->vm
.usedpages
)); 
8264     } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc 
== 3) { 
8265         dictEntry 
*de 
= dictFind(c
->db
->dict
,c
->argv
[2]); 
8268         if (!server
.vm_enabled
) { 
8269             addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n")); 
8273             addReply(c
,shared
.nokeyerr
); 
8276         key 
= dictGetEntryKey(de
); 
8277         val 
= dictGetEntryVal(de
); 
8278         /* If the key is shared we want to create a copy */ 
8279         if (key
->refcount 
> 1) { 
8280             robj 
*newkey 
= dupStringObject(key
); 
8282             key 
= dictGetEntryKey(de
) = newkey
; 
8285         if (key
->storage 
!= REDIS_VM_MEMORY
) { 
8286             addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n")); 
8287         } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) { 
8288             dictGetEntryVal(de
) = NULL
; 
8289             addReply(c
,shared
.ok
); 
8291             addReply(c
,shared
.err
); 
8294         addReplySds(c
,sdsnew( 
8295             "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPOUT <key>|RELOAD]\r\n")); 
8299 static void _redisAssert(char *estr
, char *file
, int line
) { 
8300     redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ==="); 
8301     redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true\n",file
,line
,estr
); 
8302 #ifdef HAVE_BACKTRACE 
8303     redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)"); 
8308 /* =================================== Main! ================================ */ 
8311 int linuxOvercommitMemoryValue(void) { 
8312     FILE *fp 
= fopen("/proc/sys/vm/overcommit_memory","r"); 
8316     if (fgets(buf
,64,fp
) == NULL
) { 
8325 void linuxOvercommitMemoryWarning(void) { 
8326     if (linuxOvercommitMemoryValue() == 0) { 
8327         redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low condition memory. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect."); 
8330 #endif /* __linux__ */ 
8332 static void daemonize(void) { 
8336     if (fork() != 0) exit(0); /* parent exits */ 
8337     setsid(); /* create a new session */ 
8339     /* Every output goes to /dev/null. If Redis is daemonized but 
8340      * the 'logfile' is set to 'stdout' in the configuration file 
8341      * it will not log at all. */ 
8342     if ((fd 
= open("/dev/null", O_RDWR
, 0)) != -1) { 
8343         dup2(fd
, STDIN_FILENO
); 
8344         dup2(fd
, STDOUT_FILENO
); 
8345         dup2(fd
, STDERR_FILENO
); 
8346         if (fd 
> STDERR_FILENO
) close(fd
); 
8348     /* Try to write the pid file */ 
8349     fp 
= fopen(server
.pidfile
,"w"); 
8351         fprintf(fp
,"%d\n",getpid()); 
8356 int main(int argc
, char **argv
) { 
8361         resetServerSaveParams(); 
8362         loadServerConfig(argv
[1]); 
8363     } else if (argc 
> 2) { 
8364         fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n"); 
8367         redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'"); 
8369     if (server
.daemonize
) daemonize(); 
8371     redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
); 
8373     linuxOvercommitMemoryWarning(); 
8376     if (server
.appendonly
) { 
8377         if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
) 
8378             redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
); 
8380         if (rdbLoad(server
.dbfilename
) == REDIS_OK
) 
8381             redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
); 
8383     redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
); 
8384     aeSetBeforeSleepProc(server
.el
,beforeSleep
); 
8386     aeDeleteEventLoop(server
.el
); 
8390 /* ============================= Backtrace support ========================= */ 
8392 #ifdef HAVE_BACKTRACE 
8393 static char *findFuncName(void *pointer
, unsigned long *offset
); 
8395 static void *getMcontextEip(ucontext_t 
*uc
) { 
8396 #if defined(__FreeBSD__) 
8397     return (void*) uc
->uc_mcontext
.mc_eip
; 
8398 #elif defined(__dietlibc__) 
8399     return (void*) uc
->uc_mcontext
.eip
; 
8400 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6) 
8402     return (void*) uc
->uc_mcontext
->__ss
.__rip
; 
8404     return (void*) uc
->uc_mcontext
->__ss
.__eip
; 
8406 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6) 
8407   #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__) 
8408     return (void*) uc
->uc_mcontext
->__ss
.__rip
; 
8410     return (void*) uc
->uc_mcontext
->__ss
.__eip
; 
8412 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__) 
8413     return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */ 
8414 #elif defined(__ia64__) /* Linux IA64 */ 
8415     return (void*) uc
->uc_mcontext
.sc_ip
; 
8421 static void segvHandler(int sig
, siginfo_t 
*info
, void *secret
) { 
8423     char **messages 
= NULL
; 
8424     int i
, trace_size 
= 0; 
8425     unsigned long offset
=0; 
8426     ucontext_t 
*uc 
= (ucontext_t
*) secret
; 
8428     REDIS_NOTUSED(info
); 
8430     redisLog(REDIS_WARNING
, 
8431         "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
); 
8432     infostring 
= genRedisInfoString(); 
8433     redisLog(REDIS_WARNING
, "%s",infostring
); 
8434     /* It's not safe to sdsfree() the returned string under memory 
8435      * corruption conditions. Let it leak as we are going to abort */ 
8437     trace_size 
= backtrace(trace
, 100); 
8438     /* overwrite sigaction with caller's address */ 
8439     if (getMcontextEip(uc
) != NULL
) { 
8440         trace
[1] = getMcontextEip(uc
); 
8442     messages 
= backtrace_symbols(trace
, trace_size
); 
8444     for (i
=1; i
<trace_size
; ++i
) { 
8445         char *fn 
= findFuncName(trace
[i
], &offset
), *p
; 
8447         p 
= strchr(messages
[i
],'+'); 
8448         if (!fn 
|| (p 
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) { 
8449             redisLog(REDIS_WARNING
,"%s", messages
[i
]); 
8451             redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
); 
8454     /* free(messages); Don't call free() with possibly corrupted memory. */ 
8458 static void setupSigSegvAction(void) { 
8459     struct sigaction act
; 
8461     sigemptyset (&act
.sa_mask
); 
8462     /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction 
8463      * is used. Otherwise, sa_handler is used */ 
8464     act
.sa_flags 
= SA_NODEFER 
| SA_ONSTACK 
| SA_RESETHAND 
| SA_SIGINFO
; 
8465     act
.sa_sigaction 
= segvHandler
; 
8466     sigaction (SIGSEGV
, &act
, NULL
); 
8467     sigaction (SIGBUS
, &act
, NULL
); 
8468     sigaction (SIGFPE
, &act
, NULL
); 
8469     sigaction (SIGILL
, &act
, NULL
); 
8470     sigaction (SIGBUS
, &act
, NULL
); 
8474 #include "staticsymbols.h" 
8475 /* This function try to convert a pointer into a function name. It's used in 
8476  * oreder to provide a backtrace under segmentation fault that's able to 
8477  * display functions declared as static (otherwise the backtrace is useless). */ 
8478 static char *findFuncName(void *pointer
, unsigned long *offset
){ 
8480     unsigned long off
, minoff 
= 0; 
8482     /* Try to match against the Symbol with the smallest offset */ 
8483     for (i
=0; symsTable
[i
].pointer
; i
++) { 
8484         unsigned long lp 
= (unsigned long) pointer
; 
8486         if (lp 
!= (unsigned long)-1 && lp 
>= symsTable
[i
].pointer
) { 
8487             off
=lp
-symsTable
[i
].pointer
; 
8488             if (ret 
< 0 || off 
< minoff
) { 
8494     if (ret 
== -1) return NULL
; 
8496     return symsTable
[ret
].name
; 
8498 #else /* HAVE_BACKTRACE */ 
8499 static void setupSigSegvAction(void) { 
8501 #endif /* HAVE_BACKTRACE */