2  * Copyright (c) 2006-2009, Salvatore Sanfilippo <antirez at gmail dot com> 
   5  * Redistribution and use in source and binary forms, with or without 
   6  * modification, are permitted provided that the following conditions are met: 
   8  *   * Redistributions of source code must retain the above copyright notice, 
   9  *     this list of conditions and the following disclaimer. 
  10  *   * Redistributions in binary form must reproduce the above copyright 
  11  *     notice, this list of conditions and the following disclaimer in the 
  12  *     documentation and/or other materials provided with the distribution. 
  13  *   * Neither the name of Redis nor the names of its contributors may be used 
  14  *     to endorse or promote products derived from this software without 
  15  *     specific prior written permission. 
  17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
  18  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  20  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
  21  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
  22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
  23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
  24  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
  25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
  26  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
  27  * POSSIBILITY OF SUCH DAMAGE. 
  30 #define REDIS_VERSION "1.3.2" 
  40 #define __USE_POSIX199309 
  46 #endif /* HAVE_BACKTRACE */ 
  54 #include <arpa/inet.h> 
  58 #include <sys/resource.h> 
  65 #include "solarisfixes.h" 
  69 #include "ae.h"     /* Event driven programming library */ 
  70 #include "sds.h"    /* Dynamic safe strings */ 
  71 #include "anet.h"   /* Networking the easy way */ 
  72 #include "dict.h"   /* Hash tables */ 
  73 #include "adlist.h" /* Linked lists */ 
  74 #include "zmalloc.h" /* total memory usage aware version of malloc/free */ 
  75 #include "lzf.h"    /* LZF compression library */ 
  76 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */ 
  82 /* Static server configuration */ 
  83 #define REDIS_SERVERPORT        6379    /* TCP port */ 
  84 #define REDIS_MAXIDLETIME       (60*5)  /* default client timeout */ 
  85 #define REDIS_IOBUF_LEN         1024 
  86 #define REDIS_LOADBUF_LEN       1024 
  87 #define REDIS_STATIC_ARGS       4 
  88 #define REDIS_DEFAULT_DBNUM     16 
  89 #define REDIS_CONFIGLINE_MAX    1024 
  90 #define REDIS_OBJFREELIST_MAX   1000000 /* Max number of objects to cache */ 
  91 #define REDIS_MAX_SYNC_TIME     60      /* Slave can't take more to sync */ 
  92 #define REDIS_EXPIRELOOKUPS_PER_CRON    100 /* try to expire 100 keys/second */ 
  93 #define REDIS_MAX_WRITE_PER_EVENT (1024*64) 
  94 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */ 
  96 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */ 
  97 #define REDIS_WRITEV_THRESHOLD      3 
  98 /* Max number of iovecs used for each writev call */ 
  99 #define REDIS_WRITEV_IOVEC_COUNT    256 
 101 /* Hash table parameters */ 
 102 #define REDIS_HT_MINFILL        10      /* Minimal hash table fill 10% */ 
 105 #define REDIS_CMD_BULK          1       /* Bulk write command */ 
 106 #define REDIS_CMD_INLINE        2       /* Inline command */ 
 107 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with 
 108    this flags will return an error when the 'maxmemory' option is set in the 
 109    config file and the server is using more than maxmemory bytes of memory. 
 110    In short this commands are denied on low memory conditions. */ 
 111 #define REDIS_CMD_DENYOOM       4 
 114 #define REDIS_STRING 0 
 120 /* Objects encoding */ 
 121 #define REDIS_ENCODING_RAW 0    /* Raw representation */ 
 122 #define REDIS_ENCODING_INT 1    /* Encoded as integer */ 
 124 /* Object types only used for dumping to disk */ 
 125 #define REDIS_EXPIRETIME 253 
 126 #define REDIS_SELECTDB 254 
 127 #define REDIS_EOF 255 
 129 /* Defines related to the dump file format. To store 32 bits lengths for short 
 130  * keys requires a lot of space, so we check the most significant 2 bits of 
 131  * the first byte to interpreter the length: 
 133  * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte 
 134  * 01|000000 00000000 =>  01, the len is 14 byes, 6 bits + 8 bits of next byte 
 135  * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow 
 136  * 11|000000 this means: specially encoded object will follow. The six bits 
 137  *           number specify the kind of object that follows. 
 138  *           See the REDIS_RDB_ENC_* defines. 
 140  * Lenghts up to 63 are stored using a single byte, most DB keys, and may 
 141  * values, will fit inside. */ 
 142 #define REDIS_RDB_6BITLEN 0 
 143 #define REDIS_RDB_14BITLEN 1 
 144 #define REDIS_RDB_32BITLEN 2 
 145 #define REDIS_RDB_ENCVAL 3 
 146 #define REDIS_RDB_LENERR UINT_MAX 
 148 /* When a length of a string object stored on disk has the first two bits 
 149  * set, the remaining two bits specify a special encoding for the object 
 150  * accordingly to the following defines: */ 
 151 #define REDIS_RDB_ENC_INT8 0        /* 8 bit signed integer */ 
 152 #define REDIS_RDB_ENC_INT16 1       /* 16 bit signed integer */ 
 153 #define REDIS_RDB_ENC_INT32 2       /* 32 bit signed integer */ 
 154 #define REDIS_RDB_ENC_LZF 3         /* string compressed with FASTLZ */ 
 156 /* Virtual memory object->where field. */ 
 157 #define REDIS_VM_MEMORY 0       /* The object is on memory */ 
 158 #define REDIS_VM_SWAPPED 1      /* The object is on disk */ 
 159 #define REDIS_VM_SWAPPING 2     /* Redis is swapping this object on disk */ 
 160 #define REDIS_VM_LOADING 3      /* Redis is loading this object from disk */ 
 162 /* Virtual memory static configuration stuff. 
 163  * Check vmFindContiguousPages() to know more about this magic numbers. */ 
 164 #define REDIS_VM_MAX_NEAR_PAGES 65536 
 165 #define REDIS_VM_MAX_RANDOM_JUMP 4096 
 166 #define REDIS_VM_MAX_THREADS 32 
 169 #define REDIS_CLOSE 1       /* This client connection should be closed ASAP */ 
 170 #define REDIS_SLAVE 2       /* This client is a slave server */ 
 171 #define REDIS_MASTER 4      /* This client is a master server */ 
 172 #define REDIS_MONITOR 8      /* This client is a slave monitor, see MONITOR */ 
 173 #define REDIS_MULTI 16      /* This client is in a MULTI context */ 
 174 #define REDIS_BLOCKED 32    /* The client is waiting in a blocking operation */ 
 175 #define REDIS_IO_WAIT 64    /* The client is waiting for Virtual Memory I/O */ 
 177 /* Slave replication state - slave side */ 
 178 #define REDIS_REPL_NONE 0   /* No active replication */ 
 179 #define REDIS_REPL_CONNECT 1    /* Must connect to master */ 
 180 #define REDIS_REPL_CONNECTED 2  /* Connected to master */ 
 182 /* Slave replication state - from the point of view of master 
 183  * Note that in SEND_BULK and ONLINE state the slave receives new updates 
 184  * in its output queue. In the WAIT_BGSAVE state instead the server is waiting 
 185  * to start the next background saving in order to send updates to it. */ 
 186 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */ 
 187 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */ 
 188 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */ 
 189 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */ 
 191 /* List related stuff */ 
 195 /* Sort operations */ 
 196 #define REDIS_SORT_GET 0 
 197 #define REDIS_SORT_ASC 1 
 198 #define REDIS_SORT_DESC 2 
 199 #define REDIS_SORTKEY_MAX 1024 
 202 #define REDIS_DEBUG 0 
 203 #define REDIS_VERBOSE 1 
 204 #define REDIS_NOTICE 2 
 205 #define REDIS_WARNING 3 
 207 /* Anti-warning macro... */ 
 208 #define REDIS_NOTUSED(V) ((void) V) 
 210 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */ 
 211 #define ZSKIPLIST_P 0.25      /* Skiplist P = 1/4 */ 
 213 /* Append only defines */ 
 214 #define APPENDFSYNC_NO 0 
 215 #define APPENDFSYNC_ALWAYS 1 
 216 #define APPENDFSYNC_EVERYSEC 2 
 218 /* We can print the stacktrace, so our assert is defined this way: */ 
 219 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e),exit(1))) 
 220 static void _redisAssert(char *estr
); 
 222 /*================================= Data types ============================== */ 
 224 /* A redis object, that is a type able to hold a string / list / set */ 
 226 /* The VM object structure */ 
 227 struct redisObjectVM 
{ 
 228     off_t page
;         /* the page at witch the object is stored on disk */ 
 229     off_t usedpages
;    /* number of pages used on disk */ 
 230     time_t atime
;       /* Last access time */ 
 233 /* The actual Redis Object */ 
 234 typedef struct redisObject 
{ 
 237     unsigned char encoding
; 
 238     unsigned char storage
;  /* If this object is a key, where is the value? 
 239                              * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */ 
 240     unsigned char vtype
; /* If this object is a key, and value is swapped out, 
 241                           * this is the type of the swapped out object. */ 
 243     /* VM fields, this are only allocated if VM is active, otherwise the 
 244      * object allocation function will just allocate 
 245      * sizeof(redisObjct) minus sizeof(redisObjectVM), so using 
 246      * Redis without VM active will not have any overhead. */ 
 247     struct redisObjectVM vm
; 
 250 /* Macro used to initalize a Redis object allocated on the stack. 
 251  * Note that this macro is taken near the structure definition to make sure 
 252  * we'll update it when the structure is changed, to avoid bugs like 
 253  * bug #85 introduced exactly in this way. */ 
 254 #define initStaticStringObject(_var,_ptr) do { \ 
 256     _var.type = REDIS_STRING; \ 
 257     _var.encoding = REDIS_ENCODING_RAW; \ 
 259     if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \ 
 262 typedef struct redisDb 
{ 
 263     dict 
*dict
;                 /* The keyspace for this DB */ 
 264     dict 
*expires
;              /* Timeout of keys with a timeout set */ 
 265     dict 
*blockingkeys
;         /* Keys with clients waiting for data (BLPOP) */ 
 269 /* Client MULTI/EXEC state */ 
 270 typedef struct multiCmd 
{ 
 273     struct redisCommand 
*cmd
; 
 276 typedef struct multiState 
{ 
 277     multiCmd 
*commands
;     /* Array of MULTI commands */ 
 278     int count
;              /* Total number of MULTI commands */ 
 281 /* With multiplexing we need to take per-clinet state. 
 282  * Clients are taken in a liked list. */ 
 283 typedef struct redisClient 
{ 
 288     robj 
**argv
, **mbargv
; 
 290     int bulklen
;            /* bulk read len. -1 if not in bulk read mode */ 
 291     int multibulk
;          /* multi bulk command format active */ 
 294     time_t lastinteraction
; /* time of the last interaction, used for timeout */ 
 295     int flags
;              /* REDIS_CLOSE | REDIS_SLAVE | REDIS_MONITOR */ 
 297     int slaveseldb
;         /* slave selected db, if this client is a slave */ 
 298     int authenticated
;      /* when requirepass is non-NULL */ 
 299     int replstate
;          /* replication state if this is a slave */ 
 300     int repldbfd
;           /* replication DB file descriptor */ 
 301     long repldboff
;         /* replication DB file offset */ 
 302     off_t repldbsize
;       /* replication DB file size */ 
 303     multiState mstate
;      /* MULTI/EXEC state */ 
 304     robj 
**blockingkeys
;    /* The key we waiting to terminate a blocking 
 305                              * operation such as BLPOP. Otherwise NULL. */ 
 306     int blockingkeysnum
;    /* Number of blocking keys */ 
 307     time_t blockingto
;      /* Blocking operation timeout. If UNIX current time 
 308                              * is >= blockingto then the operation timed out. */ 
 309     list 
*io_keys
;          /* Keys this client is waiting to be loaded from the 
 310                              * swap file in order to continue. */ 
 318 /* Global server state structure */ 
 323     dict 
*sharingpool
;          /* Poll used for object sharing */ 
 324     unsigned int sharingpoolsize
; 
 325     long long dirty
;            /* changes to DB from the last save */ 
 327     list 
*slaves
, *monitors
; 
 328     char neterr
[ANET_ERR_LEN
]; 
 330     int cronloops
;              /* number of times the cron function run */ 
 331     list 
*objfreelist
;          /* A list of freed objects to avoid malloc() */ 
 332     time_t lastsave
;            /* Unix time of last save succeeede */ 
 333     size_t usedmemory
;             /* Used memory in megabytes */ 
 334     /* Fields used only for stats */ 
 335     time_t stat_starttime
;         /* server start time */ 
 336     long long stat_numcommands
;    /* number of processed commands */ 
 337     long long stat_numconnections
; /* number of connections received */ 
 350     pid_t bgsavechildpid
; 
 351     pid_t bgrewritechildpid
; 
 352     sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */ 
 353     struct saveparam 
*saveparams
; 
 358     char *appendfilename
; 
 362     /* Replication related */ 
 367     redisClient 
*master
;    /* client that is master for this slave */ 
 369     unsigned int maxclients
; 
 370     unsigned long long maxmemory
; 
 371     unsigned int blockedclients
; 
 372     /* Sort parameters - qsort_r() is only available under BSD so we 
 373      * have to take this state global, in order to pass it to sortCompare() */ 
 377     /* Virtual memory configuration */ 
 381     unsigned long long vm_max_memory
; 
 382     /* Virtual memory state */ 
 385     off_t vm_next_page
; /* Next probably empty page */ 
 386     off_t vm_near_pages
; /* Number of pages allocated sequentially */ 
 387     unsigned char *vm_bitmap
; /* Bitmap of free/used pages */ 
 388     time_t unixtime
;    /* Unix time sampled every second. */ 
 389     /* Virtual memory I/O threads stuff */ 
 390     /* An I/O thread process an element taken from the io_jobs queue and 
 391      * put the result of the operation in the io_done list. While the 
 392      * job is being processed, it's put on io_processing queue. */ 
 393     list 
*io_newjobs
; /* List of VM I/O jobs yet to be processed */ 
 394     list 
*io_processing
; /* List of VM I/O jobs being processed */ 
 395     list 
*io_processed
; /* List of VM I/O jobs already processed */ 
 396     list 
*io_clients
; /* All the clients waiting for SWAP I/O operations */ 
 397     pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */ 
 398     int io_active_threads
; /* Number of running I/O threads */ 
 399     int vm_max_threads
; /* Max number of I/O threads running at the same time */ 
 400     /* Our main thread is blocked on the event loop, locking for sockets ready 
 401      * to be read or written, so when a threaded I/O operation is ready to be 
 402      * processed by the main thread, the I/O thread will use a unix pipe to 
 403      * awake the main thread. The followings are the two pipe FDs. */ 
 404     int io_ready_pipe_read
; 
 405     int io_ready_pipe_write
; 
 406     /* Virtual memory stats */ 
 407     unsigned long long vm_stats_used_pages
; 
 408     unsigned long long vm_stats_swapped_objects
; 
 409     unsigned long long vm_stats_swapouts
; 
 410     unsigned long long vm_stats_swapins
; 
 414 typedef void redisCommandProc(redisClient 
*c
); 
 415 struct redisCommand 
{ 
 417     redisCommandProc 
*proc
; 
 422 struct redisFunctionSym 
{ 
 424     unsigned long pointer
; 
 427 typedef struct _redisSortObject 
{ 
 435 typedef struct _redisSortOperation 
{ 
 438 } redisSortOperation
; 
 440 /* ZSETs use a specialized version of Skiplists */ 
 442 typedef struct zskiplistNode 
{ 
 443     struct zskiplistNode 
**forward
; 
 444     struct zskiplistNode 
*backward
; 
 449 typedef struct zskiplist 
{ 
 450     struct zskiplistNode 
*header
, *tail
; 
 451     unsigned long length
; 
 455 typedef struct zset 
{ 
 460 /* Our shared "common" objects */ 
 462 struct sharedObjectsStruct 
{ 
 463     robj 
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
, 
 464     *colon
, *nullbulk
, *nullmultibulk
, *queued
, 
 465     *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
, 
 466     *outofrangeerr
, *plus
, 
 467     *select0
, *select1
, *select2
, *select3
, *select4
, 
 468     *select5
, *select6
, *select7
, *select8
, *select9
; 
 471 /* Global vars that are actally used as constants. The following double 
 472  * values are used for double on-disk serialization, and are initialized 
 473  * at runtime to avoid strange compiler optimizations. */ 
 475 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
; 
 477 /* VM threaded I/O request message */ 
 478 #define REDIS_IOJOB_LOAD 0          /* Load from disk to memory */ 
 479 #define REDIS_IOJOB_PREPARE_SWAP 1  /* Compute needed pages */ 
 480 #define REDIS_IOJOB_DO_SWAP 2       /* Swap from memory to disk */ 
 481 typedef struct iojon 
{ 
 482     int type
;   /* Request type, REDIS_IOJOB_* */ 
 483     redisDb 
*db
;/* Redis database */ 
 484     robj 
*key
;  /* This I/O request is about swapping this key */ 
 485     robj 
*val
;  /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this 
 486                  * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */ 
 487     off_t page
; /* Swap page where to read/write the object */ 
 488     off_t pages
; /* Swap pages needed to safe object. PREPARE_SWAP return val */ 
 489     int canceled
; /* True if this command was canceled by blocking side of VM */ 
 490     pthread_t thread
; /* ID of the thread processing this entry */ 
 493 /*================================ Prototypes =============================== */ 
 495 static void freeStringObject(robj 
*o
); 
 496 static void freeListObject(robj 
*o
); 
 497 static void freeSetObject(robj 
*o
); 
 498 static void decrRefCount(void *o
); 
 499 static robj 
*createObject(int type
, void *ptr
); 
 500 static void freeClient(redisClient 
*c
); 
 501 static int rdbLoad(char *filename
); 
 502 static void addReply(redisClient 
*c
, robj 
*obj
); 
 503 static void addReplySds(redisClient 
*c
, sds s
); 
 504 static void incrRefCount(robj 
*o
); 
 505 static int rdbSaveBackground(char *filename
); 
 506 static robj 
*createStringObject(char *ptr
, size_t len
); 
 507 static robj 
*dupStringObject(robj 
*o
); 
 508 static void replicationFeedSlaves(list 
*slaves
, struct redisCommand 
*cmd
, int dictid
, robj 
**argv
, int argc
); 
 509 static void feedAppendOnlyFile(struct redisCommand 
*cmd
, int dictid
, robj 
**argv
, int argc
); 
 510 static int syncWithMaster(void); 
 511 static robj 
*tryObjectSharing(robj 
*o
); 
 512 static int tryObjectEncoding(robj 
*o
); 
 513 static robj 
*getDecodedObject(robj 
*o
); 
 514 static int removeExpire(redisDb 
*db
, robj 
*key
); 
 515 static int expireIfNeeded(redisDb 
*db
, robj 
*key
); 
 516 static int deleteIfVolatile(redisDb 
*db
, robj 
*key
); 
 517 static int deleteIfSwapped(redisDb 
*db
, robj 
*key
); 
 518 static int deleteKey(redisDb 
*db
, robj 
*key
); 
 519 static time_t getExpire(redisDb 
*db
, robj 
*key
); 
 520 static int setExpire(redisDb 
*db
, robj 
*key
, time_t when
); 
 521 static void updateSlavesWaitingBgsave(int bgsaveerr
); 
 522 static void freeMemoryIfNeeded(void); 
 523 static int processCommand(redisClient 
*c
); 
 524 static void setupSigSegvAction(void); 
 525 static void rdbRemoveTempFile(pid_t childpid
); 
 526 static void aofRemoveTempFile(pid_t childpid
); 
 527 static size_t stringObjectLen(robj 
*o
); 
 528 static void processInputBuffer(redisClient 
*c
); 
 529 static zskiplist 
*zslCreate(void); 
 530 static void zslFree(zskiplist 
*zsl
); 
 531 static void zslInsert(zskiplist 
*zsl
, double score
, robj 
*obj
); 
 532 static void sendReplyToClientWritev(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 533 static void initClientMultiState(redisClient 
*c
); 
 534 static void freeClientMultiState(redisClient 
*c
); 
 535 static void queueMultiCommand(redisClient 
*c
, struct redisCommand 
*cmd
); 
 536 static void unblockClient(redisClient 
*c
); 
 537 static int handleClientsWaitingListPush(redisClient 
*c
, robj 
*key
, robj 
*ele
); 
 538 static void vmInit(void); 
 539 static void vmMarkPagesFree(off_t page
, off_t count
); 
 540 static robj 
*vmLoadObject(robj 
*key
); 
 541 static robj 
*vmPreviewObject(robj 
*key
); 
 542 static int vmSwapOneObjectBlocking(void); 
 543 static int vmSwapOneObjectThreaded(void); 
 544 static int vmCanSwapOut(void); 
 545 static void freeOneObjectFromFreelist(void); 
 546 static void acceptHandler(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 547 static void vmThreadedIOCompletedJob(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 548 static void vmCancelThreadedIOJob(robj 
*o
); 
 549 static void lockThreadedIO(void); 
 550 static void unlockThreadedIO(void); 
 551 static int vmSwapObjectThreaded(robj 
*key
, robj 
*val
, redisDb 
*db
); 
 552 static void freeIOJob(iojob 
*j
); 
 553 static void queueIOJob(iojob 
*j
); 
 555 static void authCommand(redisClient 
*c
); 
 556 static void pingCommand(redisClient 
*c
); 
 557 static void echoCommand(redisClient 
*c
); 
 558 static void setCommand(redisClient 
*c
); 
 559 static void setnxCommand(redisClient 
*c
); 
 560 static void getCommand(redisClient 
*c
); 
 561 static void delCommand(redisClient 
*c
); 
 562 static void existsCommand(redisClient 
*c
); 
 563 static void incrCommand(redisClient 
*c
); 
 564 static void decrCommand(redisClient 
*c
); 
 565 static void incrbyCommand(redisClient 
*c
); 
 566 static void decrbyCommand(redisClient 
*c
); 
 567 static void selectCommand(redisClient 
*c
); 
 568 static void randomkeyCommand(redisClient 
*c
); 
 569 static void keysCommand(redisClient 
*c
); 
 570 static void dbsizeCommand(redisClient 
*c
); 
 571 static void lastsaveCommand(redisClient 
*c
); 
 572 static void saveCommand(redisClient 
*c
); 
 573 static void bgsaveCommand(redisClient 
*c
); 
 574 static void bgrewriteaofCommand(redisClient 
*c
); 
 575 static void shutdownCommand(redisClient 
*c
); 
 576 static void moveCommand(redisClient 
*c
); 
 577 static void renameCommand(redisClient 
*c
); 
 578 static void renamenxCommand(redisClient 
*c
); 
 579 static void lpushCommand(redisClient 
*c
); 
 580 static void rpushCommand(redisClient 
*c
); 
 581 static void lpopCommand(redisClient 
*c
); 
 582 static void rpopCommand(redisClient 
*c
); 
 583 static void llenCommand(redisClient 
*c
); 
 584 static void lindexCommand(redisClient 
*c
); 
 585 static void lrangeCommand(redisClient 
*c
); 
 586 static void ltrimCommand(redisClient 
*c
); 
 587 static void typeCommand(redisClient 
*c
); 
 588 static void lsetCommand(redisClient 
*c
); 
 589 static void saddCommand(redisClient 
*c
); 
 590 static void sremCommand(redisClient 
*c
); 
 591 static void smoveCommand(redisClient 
*c
); 
 592 static void sismemberCommand(redisClient 
*c
); 
 593 static void scardCommand(redisClient 
*c
); 
 594 static void spopCommand(redisClient 
*c
); 
 595 static void srandmemberCommand(redisClient 
*c
); 
 596 static void sinterCommand(redisClient 
*c
); 
 597 static void sinterstoreCommand(redisClient 
*c
); 
 598 static void sunionCommand(redisClient 
*c
); 
 599 static void sunionstoreCommand(redisClient 
*c
); 
 600 static void sdiffCommand(redisClient 
*c
); 
 601 static void sdiffstoreCommand(redisClient 
*c
); 
 602 static void syncCommand(redisClient 
*c
); 
 603 static void flushdbCommand(redisClient 
*c
); 
 604 static void flushallCommand(redisClient 
*c
); 
 605 static void sortCommand(redisClient 
*c
); 
 606 static void lremCommand(redisClient 
*c
); 
 607 static void rpoplpushcommand(redisClient 
*c
); 
 608 static void infoCommand(redisClient 
*c
); 
 609 static void mgetCommand(redisClient 
*c
); 
 610 static void monitorCommand(redisClient 
*c
); 
 611 static void expireCommand(redisClient 
*c
); 
 612 static void expireatCommand(redisClient 
*c
); 
 613 static void getsetCommand(redisClient 
*c
); 
 614 static void ttlCommand(redisClient 
*c
); 
 615 static void slaveofCommand(redisClient 
*c
); 
 616 static void debugCommand(redisClient 
*c
); 
 617 static void msetCommand(redisClient 
*c
); 
 618 static void msetnxCommand(redisClient 
*c
); 
 619 static void zaddCommand(redisClient 
*c
); 
 620 static void zincrbyCommand(redisClient 
*c
); 
 621 static void zrangeCommand(redisClient 
*c
); 
 622 static void zrangebyscoreCommand(redisClient 
*c
); 
 623 static void zrevrangeCommand(redisClient 
*c
); 
 624 static void zcardCommand(redisClient 
*c
); 
 625 static void zremCommand(redisClient 
*c
); 
 626 static void zscoreCommand(redisClient 
*c
); 
 627 static void zremrangebyscoreCommand(redisClient 
*c
); 
 628 static void multiCommand(redisClient 
*c
); 
 629 static void execCommand(redisClient 
*c
); 
 630 static void blpopCommand(redisClient 
*c
); 
 631 static void brpopCommand(redisClient 
*c
); 
 633 /*================================= Globals ================================= */ 
 636 static struct redisServer server
; /* server global state */ 
 637 static struct redisCommand cmdTable
[] = { 
 638     {"get",getCommand
,2,REDIS_CMD_INLINE
}, 
 639     {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
}, 
 640     {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
}, 
 641     {"del",delCommand
,-2,REDIS_CMD_INLINE
}, 
 642     {"exists",existsCommand
,2,REDIS_CMD_INLINE
}, 
 643     {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
}, 
 644     {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
}, 
 645     {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
}, 
 646     {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
}, 
 647     {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
}, 
 648     {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
}, 
 649     {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
}, 
 650     {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
}, 
 651     {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
}, 
 652     {"llen",llenCommand
,2,REDIS_CMD_INLINE
}, 
 653     {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
}, 
 654     {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
}, 
 655     {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
}, 
 656     {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
}, 
 657     {"lrem",lremCommand
,4,REDIS_CMD_BULK
}, 
 658     {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
}, 
 659     {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
}, 
 660     {"srem",sremCommand
,3,REDIS_CMD_BULK
}, 
 661     {"smove",smoveCommand
,4,REDIS_CMD_BULK
}, 
 662     {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
}, 
 663     {"scard",scardCommand
,2,REDIS_CMD_INLINE
}, 
 664     {"spop",spopCommand
,2,REDIS_CMD_INLINE
}, 
 665     {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
}, 
 666     {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
}, 
 667     {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
}, 
 668     {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
}, 
 669     {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
}, 
 670     {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
}, 
 671     {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
}, 
 672     {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
}, 
 673     {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
}, 
 674     {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
}, 
 675     {"zrem",zremCommand
,3,REDIS_CMD_BULK
}, 
 676     {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
}, 
 677     {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
}, 
 678     {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
}, 
 679     {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
}, 
 680     {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
}, 
 681     {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
}, 
 682     {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
}, 
 683     {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
}, 
 684     {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
}, 
 685     {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
}, 
 686     {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
}, 
 687     {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
}, 
 688     {"select",selectCommand
,2,REDIS_CMD_INLINE
}, 
 689     {"move",moveCommand
,3,REDIS_CMD_INLINE
}, 
 690     {"rename",renameCommand
,3,REDIS_CMD_INLINE
}, 
 691     {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
}, 
 692     {"expire",expireCommand
,3,REDIS_CMD_INLINE
}, 
 693     {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
}, 
 694     {"keys",keysCommand
,2,REDIS_CMD_INLINE
}, 
 695     {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
}, 
 696     {"auth",authCommand
,2,REDIS_CMD_INLINE
}, 
 697     {"ping",pingCommand
,1,REDIS_CMD_INLINE
}, 
 698     {"echo",echoCommand
,2,REDIS_CMD_BULK
}, 
 699     {"save",saveCommand
,1,REDIS_CMD_INLINE
}, 
 700     {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
}, 
 701     {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
}, 
 702     {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
}, 
 703     {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
}, 
 704     {"type",typeCommand
,2,REDIS_CMD_INLINE
}, 
 705     {"multi",multiCommand
,1,REDIS_CMD_INLINE
}, 
 706     {"exec",execCommand
,1,REDIS_CMD_INLINE
}, 
 707     {"sync",syncCommand
,1,REDIS_CMD_INLINE
}, 
 708     {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
}, 
 709     {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
}, 
 710     {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
}, 
 711     {"info",infoCommand
,1,REDIS_CMD_INLINE
}, 
 712     {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
}, 
 713     {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
}, 
 714     {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
}, 
 715     {"debug",debugCommand
,-2,REDIS_CMD_INLINE
}, 
 719 /*============================ Utility functions ============================ */ 
 721 /* Glob-style pattern matching. */ 
 722 int stringmatchlen(const char *pattern
, int patternLen
, 
 723         const char *string
, int stringLen
, int nocase
) 
 728             while (pattern
[1] == '*') { 
 733                 return 1; /* match */ 
 735                 if (stringmatchlen(pattern
+1, patternLen
-1, 
 736                             string
, stringLen
, nocase
)) 
 737                     return 1; /* match */ 
 741             return 0; /* no match */ 
 745                 return 0; /* no match */ 
 755             not = pattern
[0] == '^'; 
 762                 if (pattern
[0] == '\\') { 
 765                     if (pattern
[0] == string
[0]) 
 767                 } else if (pattern
[0] == ']') { 
 769                 } else if (patternLen 
== 0) { 
 773                 } else if (pattern
[1] == '-' && patternLen 
>= 3) { 
 774                     int start 
= pattern
[0]; 
 775                     int end 
= pattern
[2]; 
 783                         start 
= tolower(start
); 
 789                     if (c 
>= start 
&& c 
<= end
) 
 793                         if (pattern
[0] == string
[0]) 
 796                         if (tolower((int)pattern
[0]) == tolower((int)string
[0])) 
 806                 return 0; /* no match */ 
 812             if (patternLen 
>= 2) { 
 819                 if (pattern
[0] != string
[0]) 
 820                     return 0; /* no match */ 
 822                 if (tolower((int)pattern
[0]) != tolower((int)string
[0])) 
 823                     return 0; /* no match */ 
 831         if (stringLen 
== 0) { 
 832             while(*pattern 
== '*') { 
 839     if (patternLen 
== 0 && stringLen 
== 0) 
 844 static void redisLog(int level
, const char *fmt
, ...) { 
 848     fp 
= (server
.logfile 
== NULL
) ? stdout 
: fopen(server
.logfile
,"a"); 
 852     if (level 
>= server
.verbosity
) { 
 858         strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
)); 
 859         fprintf(fp
,"%s %c ",buf
,c
[level
]); 
 860         vfprintf(fp
, fmt
, ap
); 
 866     if (server
.logfile
) fclose(fp
); 
 869 /*====================== Hash table type implementation  ==================== */ 
 871 /* This is an hash table type that uses the SDS dynamic strings libary as 
 872  * keys and radis objects as values (objects can hold SDS strings, 
 875 static void dictVanillaFree(void *privdata
, void *val
) 
 877     DICT_NOTUSED(privdata
); 
 881 static void dictListDestructor(void *privdata
, void *val
) 
 883     DICT_NOTUSED(privdata
); 
 884     listRelease((list
*)val
); 
 887 static int sdsDictKeyCompare(void *privdata
, const void *key1
, 
 891     DICT_NOTUSED(privdata
); 
 893     l1 
= sdslen((sds
)key1
); 
 894     l2 
= sdslen((sds
)key2
); 
 895     if (l1 
!= l2
) return 0; 
 896     return memcmp(key1
, key2
, l1
) == 0; 
 899 static void dictRedisObjectDestructor(void *privdata
, void *val
) 
 901     DICT_NOTUSED(privdata
); 
 903     if (val 
== NULL
) return; /* Values of swapped out keys as set to NULL */ 
 907 static int dictObjKeyCompare(void *privdata
, const void *key1
, 
 910     const robj 
*o1 
= key1
, *o2 
= key2
; 
 911     return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
); 
 914 static unsigned int dictObjHash(const void *key
) { 
 916     return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
)); 
 919 static int dictEncObjKeyCompare(void *privdata
, const void *key1
, 
 922     robj 
*o1 
= (robj
*) key1
, *o2 
= (robj
*) key2
; 
 925     o1 
= getDecodedObject(o1
); 
 926     o2 
= getDecodedObject(o2
); 
 927     cmp 
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
); 
 933 static unsigned int dictEncObjHash(const void *key
) { 
 934     robj 
*o 
= (robj
*) key
; 
 936     o 
= getDecodedObject(o
); 
 937     unsigned int hash 
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
)); 
 942 static dictType setDictType 
= { 
 943     dictEncObjHash
,            /* hash function */ 
 946     dictEncObjKeyCompare
,      /* key compare */ 
 947     dictRedisObjectDestructor
, /* key destructor */ 
 948     NULL                       
/* val destructor */ 
 951 static dictType zsetDictType 
= { 
 952     dictEncObjHash
,            /* hash function */ 
 955     dictEncObjKeyCompare
,      /* key compare */ 
 956     dictRedisObjectDestructor
, /* key destructor */ 
 957     dictVanillaFree            
/* val destructor of malloc(sizeof(double)) */ 
 960 static dictType hashDictType 
= { 
 961     dictObjHash
,                /* hash function */ 
 964     dictObjKeyCompare
,          /* key compare */ 
 965     dictRedisObjectDestructor
,  /* key destructor */ 
 966     dictRedisObjectDestructor   
/* val destructor */ 
 969 /* Keylist hash table type has unencoded redis objects as keys and 
 970  * lists as values. It's used for blocking operations (BLPOP) */ 
 971 static dictType keylistDictType 
= { 
 972     dictObjHash
,                /* hash function */ 
 975     dictObjKeyCompare
,          /* key compare */ 
 976     dictRedisObjectDestructor
,  /* key destructor */ 
 977     dictListDestructor          
/* val destructor */ 
 980 /* ========================= Random utility functions ======================= */ 
 982 /* Redis generally does not try to recover from out of memory conditions 
 983  * when allocating objects or strings, it is not clear if it will be possible 
 984  * to report this condition to the client since the networking layer itself 
 985  * is based on heap allocation for send buffers, so we simply abort. 
 986  * At least the code will be simpler to read... */ 
 987 static void oom(const char *msg
) { 
 988     redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
); 
 993 /* ====================== Redis server networking stuff ===================== */ 
 994 static void closeTimedoutClients(void) { 
 997     time_t now 
= time(NULL
); 
 999     listRewind(server
.clients
); 
1000     while ((ln 
= listYield(server
.clients
)) != NULL
) { 
1001         c 
= listNodeValue(ln
); 
1002         if (server
.maxidletime 
&& 
1003             !(c
->flags 
& REDIS_SLAVE
) &&    /* no timeout for slaves */ 
1004             !(c
->flags 
& REDIS_MASTER
) &&   /* no timeout for masters */ 
1005              (now 
- c
->lastinteraction 
> server
.maxidletime
)) 
1007             redisLog(REDIS_VERBOSE
,"Closing idle client"); 
1009         } else if (c
->flags 
& REDIS_BLOCKED
) { 
1010             if (c
->blockingto 
!= 0 && c
->blockingto 
< now
) { 
1011                 addReply(c
,shared
.nullmultibulk
); 
1018 static int htNeedsResize(dict 
*dict
) { 
1019     long long size
, used
; 
1021     size 
= dictSlots(dict
); 
1022     used 
= dictSize(dict
); 
1023     return (size 
&& used 
&& size 
> DICT_HT_INITIAL_SIZE 
&& 
1024             (used
*100/size 
< REDIS_HT_MINFILL
)); 
1027 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL 
1028  * we resize the hash table to save memory */ 
1029 static void tryResizeHashTables(void) { 
1032     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1033         if (htNeedsResize(server
.db
[j
].dict
)) { 
1034             redisLog(REDIS_VERBOSE
,"The hash table %d is too sparse, resize it...",j
); 
1035             dictResize(server
.db
[j
].dict
); 
1036             redisLog(REDIS_VERBOSE
,"Hash table %d resized.",j
); 
1038         if (htNeedsResize(server
.db
[j
].expires
)) 
1039             dictResize(server
.db
[j
].expires
); 
1043 /* A background saving child (BGSAVE) terminated its work. Handle this. */ 
1044 void backgroundSaveDoneHandler(int statloc
) { 
1045     int exitcode 
= WEXITSTATUS(statloc
); 
1046     int bysignal 
= WIFSIGNALED(statloc
); 
1048     if (!bysignal 
&& exitcode 
== 0) { 
1049         redisLog(REDIS_NOTICE
, 
1050             "Background saving terminated with success"); 
1052         server
.lastsave 
= time(NULL
); 
1053     } else if (!bysignal 
&& exitcode 
!= 0) { 
1054         redisLog(REDIS_WARNING
, "Background saving error"); 
1056         redisLog(REDIS_WARNING
, 
1057             "Background saving terminated by signal"); 
1058         rdbRemoveTempFile(server
.bgsavechildpid
); 
1060     server
.bgsavechildpid 
= -1; 
1061     /* Possibly there are slaves waiting for a BGSAVE in order to be served 
1062      * (the first stage of SYNC is a bulk transfer of dump.rdb) */ 
1063     updateSlavesWaitingBgsave(exitcode 
== 0 ? REDIS_OK 
: REDIS_ERR
); 
1066 /* A background append only file rewriting (BGREWRITEAOF) terminated its work. 
1068 void backgroundRewriteDoneHandler(int statloc
) { 
1069     int exitcode 
= WEXITSTATUS(statloc
); 
1070     int bysignal 
= WIFSIGNALED(statloc
); 
1072     if (!bysignal 
&& exitcode 
== 0) { 
1076         redisLog(REDIS_NOTICE
, 
1077             "Background append only file rewriting terminated with success"); 
1078         /* Now it's time to flush the differences accumulated by the parent */ 
1079         snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
); 
1080         fd 
= open(tmpfile
,O_WRONLY
|O_APPEND
); 
1082             redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
)); 
1085         /* Flush our data... */ 
1086         if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) != 
1087                 (signed) sdslen(server
.bgrewritebuf
)) { 
1088             redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
)); 
1092         redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
)); 
1093         /* Now our work is to rename the temp file into the stable file. And 
1094          * switch the file descriptor used by the server for append only. */ 
1095         if (rename(tmpfile
,server
.appendfilename
) == -1) { 
1096             redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
)); 
1100         /* Mission completed... almost */ 
1101         redisLog(REDIS_NOTICE
,"Append only file successfully rewritten."); 
1102         if (server
.appendfd 
!= -1) { 
1103             /* If append only is actually enabled... */ 
1104             close(server
.appendfd
); 
1105             server
.appendfd 
= fd
; 
1107             server
.appendseldb 
= -1; /* Make sure it will issue SELECT */ 
1108             redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends."); 
1110             /* If append only is disabled we just generate a dump in this 
1111              * format. Why not? */ 
1114     } else if (!bysignal 
&& exitcode 
!= 0) { 
1115         redisLog(REDIS_WARNING
, "Background append only file rewriting error"); 
1117         redisLog(REDIS_WARNING
, 
1118             "Background append only file rewriting terminated by signal"); 
1121     sdsfree(server
.bgrewritebuf
); 
1122     server
.bgrewritebuf 
= sdsempty(); 
1123     aofRemoveTempFile(server
.bgrewritechildpid
); 
1124     server
.bgrewritechildpid 
= -1; 
1127 static int serverCron(struct aeEventLoop 
*eventLoop
, long long id
, void *clientData
) { 
1128     int j
, loops 
= server
.cronloops
++; 
1129     REDIS_NOTUSED(eventLoop
); 
1131     REDIS_NOTUSED(clientData
); 
1133     /* We take a cached value of the unix time in the global state because 
1134      * with virtual memory and aging there is to store the current time 
1135      * in objects at every object access, and accuracy is not needed. 
1136      * To access a global var is faster than calling time(NULL) */ 
1137     server
.unixtime 
= time(NULL
); 
1139     /* Update the global state with the amount of used memory */ 
1140     server
.usedmemory 
= zmalloc_used_memory(); 
1142     /* Show some info about non-empty databases */ 
1143     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1144         long long size
, used
, vkeys
; 
1146         size 
= dictSlots(server
.db
[j
].dict
); 
1147         used 
= dictSize(server
.db
[j
].dict
); 
1148         vkeys 
= dictSize(server
.db
[j
].expires
); 
1149         if (!(loops 
% 5) && (used 
|| vkeys
)) { 
1150             redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
); 
1151             /* dictPrintStats(server.dict); */ 
1155     /* We don't want to resize the hash tables while a bacground saving 
1156      * is in progress: the saving child is created using fork() that is 
1157      * implemented with a copy-on-write semantic in most modern systems, so 
1158      * if we resize the HT while there is the saving child at work actually 
1159      * a lot of memory movements in the parent will cause a lot of pages 
1161     if (server
.bgsavechildpid 
== -1) tryResizeHashTables(); 
1163     /* Show information about connected clients */ 
1165         redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use, %d shared objects", 
1166             listLength(server
.clients
)-listLength(server
.slaves
), 
1167             listLength(server
.slaves
), 
1169             dictSize(server
.sharingpool
)); 
1172     /* Close connections of timedout clients */ 
1173     if ((server
.maxidletime 
&& !(loops 
% 10)) || server
.blockedclients
) 
1174         closeTimedoutClients(); 
1176     /* Check if a background saving or AOF rewrite in progress terminated */ 
1177     if (server
.bgsavechildpid 
!= -1 || server
.bgrewritechildpid 
!= -1) { 
1181         if ((pid 
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) { 
1182             if (pid 
== server
.bgsavechildpid
) { 
1183                 backgroundSaveDoneHandler(statloc
); 
1185                 backgroundRewriteDoneHandler(statloc
); 
1189         /* If there is not a background saving in progress check if 
1190          * we have to save now */ 
1191          time_t now 
= time(NULL
); 
1192          for (j 
= 0; j 
< server
.saveparamslen
; j
++) { 
1193             struct saveparam 
*sp 
= server
.saveparams
+j
; 
1195             if (server
.dirty 
>= sp
->changes 
&& 
1196                 now
-server
.lastsave 
> sp
->seconds
) { 
1197                 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...", 
1198                     sp
->changes
, sp
->seconds
); 
1199                 rdbSaveBackground(server
.dbfilename
); 
1205     /* Try to expire a few timed out keys. The algorithm used is adaptive and 
1206      * will use few CPU cycles if there are few expiring keys, otherwise 
1207      * it will get more aggressive to avoid that too much memory is used by 
1208      * keys that can be removed from the keyspace. */ 
1209     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1211         redisDb 
*db 
= server
.db
+j
; 
1213         /* Continue to expire if at the end of the cycle more than 25% 
1214          * of the keys were expired. */ 
1216             long num 
= dictSize(db
->expires
); 
1217             time_t now 
= time(NULL
); 
1220             if (num 
> REDIS_EXPIRELOOKUPS_PER_CRON
) 
1221                 num 
= REDIS_EXPIRELOOKUPS_PER_CRON
; 
1226                 if ((de 
= dictGetRandomKey(db
->expires
)) == NULL
) break; 
1227                 t 
= (time_t) dictGetEntryVal(de
); 
1229                     deleteKey(db
,dictGetEntryKey(de
)); 
1233         } while (expired 
> REDIS_EXPIRELOOKUPS_PER_CRON
/4); 
1236     /* Swap a few keys on disk if we are over the memory limit and VM 
1237      * is enbled. Try to free objects from the free list first. */ 
1238     if (vmCanSwapOut()) { 
1239         while (server
.vm_enabled 
&& zmalloc_used_memory() > 
1240                 server
.vm_max_memory
) 
1242             if (listLength(server
.objfreelist
)) { 
1243                 freeOneObjectFromFreelist(); 
1245                 if (vmSwapOneObjectThreaded() == REDIS_ERR
) { 
1246                     if ((loops 
% 30) == 0 && zmalloc_used_memory() > 
1247                         (server
.vm_max_memory
+server
.vm_max_memory
/10)) { 
1248                         redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!"); 
1251                 /* Note that we freed just one object, because anyway when 
1252                  * the I/O thread in charge to swap this object out will 
1253                  * do its work, the handler of completed jobs will try to swap 
1254                  * more objects if we are out of memory. */ 
1260     /* Check if we should connect to a MASTER */ 
1261     if (server
.replstate 
== REDIS_REPL_CONNECT
) { 
1262         redisLog(REDIS_NOTICE
,"Connecting to MASTER..."); 
1263         if (syncWithMaster() == REDIS_OK
) { 
1264             redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded"); 
1270 static void createSharedObjects(void) { 
1271     shared
.crlf 
= createObject(REDIS_STRING
,sdsnew("\r\n")); 
1272     shared
.ok 
= createObject(REDIS_STRING
,sdsnew("+OK\r\n")); 
1273     shared
.err 
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n")); 
1274     shared
.emptybulk 
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n")); 
1275     shared
.czero 
= createObject(REDIS_STRING
,sdsnew(":0\r\n")); 
1276     shared
.cone 
= createObject(REDIS_STRING
,sdsnew(":1\r\n")); 
1277     shared
.nullbulk 
= createObject(REDIS_STRING
,sdsnew("$-1\r\n")); 
1278     shared
.nullmultibulk 
= createObject(REDIS_STRING
,sdsnew("*-1\r\n")); 
1279     shared
.emptymultibulk 
= createObject(REDIS_STRING
,sdsnew("*0\r\n")); 
1280     shared
.pong 
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n")); 
1281     shared
.queued 
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n")); 
1282     shared
.wrongtypeerr 
= createObject(REDIS_STRING
,sdsnew( 
1283         "-ERR Operation against a key holding the wrong kind of value\r\n")); 
1284     shared
.nokeyerr 
= createObject(REDIS_STRING
,sdsnew( 
1285         "-ERR no such key\r\n")); 
1286     shared
.syntaxerr 
= createObject(REDIS_STRING
,sdsnew( 
1287         "-ERR syntax error\r\n")); 
1288     shared
.sameobjecterr 
= createObject(REDIS_STRING
,sdsnew( 
1289         "-ERR source and destination objects are the same\r\n")); 
1290     shared
.outofrangeerr 
= createObject(REDIS_STRING
,sdsnew( 
1291         "-ERR index out of range\r\n")); 
1292     shared
.space 
= createObject(REDIS_STRING
,sdsnew(" ")); 
1293     shared
.colon 
= createObject(REDIS_STRING
,sdsnew(":")); 
1294     shared
.plus 
= createObject(REDIS_STRING
,sdsnew("+")); 
1295     shared
.select0 
= createStringObject("select 0\r\n",10); 
1296     shared
.select1 
= createStringObject("select 1\r\n",10); 
1297     shared
.select2 
= createStringObject("select 2\r\n",10); 
1298     shared
.select3 
= createStringObject("select 3\r\n",10); 
1299     shared
.select4 
= createStringObject("select 4\r\n",10); 
1300     shared
.select5 
= createStringObject("select 5\r\n",10); 
1301     shared
.select6 
= createStringObject("select 6\r\n",10); 
1302     shared
.select7 
= createStringObject("select 7\r\n",10); 
1303     shared
.select8 
= createStringObject("select 8\r\n",10); 
1304     shared
.select9 
= createStringObject("select 9\r\n",10); 
1307 static void appendServerSaveParams(time_t seconds
, int changes
) { 
1308     server
.saveparams 
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1)); 
1309     server
.saveparams
[server
.saveparamslen
].seconds 
= seconds
; 
1310     server
.saveparams
[server
.saveparamslen
].changes 
= changes
; 
1311     server
.saveparamslen
++; 
1314 static void resetServerSaveParams() { 
1315     zfree(server
.saveparams
); 
1316     server
.saveparams 
= NULL
; 
1317     server
.saveparamslen 
= 0; 
1320 static void initServerConfig() { 
1321     server
.dbnum 
= REDIS_DEFAULT_DBNUM
; 
1322     server
.port 
= REDIS_SERVERPORT
; 
1323     server
.verbosity 
= REDIS_VERBOSE
; 
1324     server
.maxidletime 
= REDIS_MAXIDLETIME
; 
1325     server
.saveparams 
= NULL
; 
1326     server
.logfile 
= NULL
; /* NULL = log on standard output */ 
1327     server
.bindaddr 
= NULL
; 
1328     server
.glueoutputbuf 
= 1; 
1329     server
.daemonize 
= 0; 
1330     server
.appendonly 
= 0; 
1331     server
.appendfsync 
= APPENDFSYNC_ALWAYS
; 
1332     server
.lastfsync 
= time(NULL
); 
1333     server
.appendfd 
= -1; 
1334     server
.appendseldb 
= -1; /* Make sure the first time will not match */ 
1335     server
.pidfile 
= "/var/run/redis.pid"; 
1336     server
.dbfilename 
= "dump.rdb"; 
1337     server
.appendfilename 
= "appendonly.aof"; 
1338     server
.requirepass 
= NULL
; 
1339     server
.shareobjects 
= 0; 
1340     server
.rdbcompression 
= 1; 
1341     server
.sharingpoolsize 
= 1024; 
1342     server
.maxclients 
= 0; 
1343     server
.blockedclients 
= 0; 
1344     server
.maxmemory 
= 0; 
1345     server
.vm_enabled 
= 0; 
1346     server
.vm_page_size 
= 256;          /* 256 bytes per page */ 
1347     server
.vm_pages 
= 1024*1024*100;    /* 104 millions of pages */ 
1348     server
.vm_max_memory 
= 1024LL*1024*1024*1; /* 1 GB of RAM */ 
1349     server
.vm_max_threads 
= 4; 
1351     resetServerSaveParams(); 
1353     appendServerSaveParams(60*60,1);  /* save after 1 hour and 1 change */ 
1354     appendServerSaveParams(300,100);  /* save after 5 minutes and 100 changes */ 
1355     appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */ 
1356     /* Replication related */ 
1358     server
.masterauth 
= NULL
; 
1359     server
.masterhost 
= NULL
; 
1360     server
.masterport 
= 6379; 
1361     server
.master 
= NULL
; 
1362     server
.replstate 
= REDIS_REPL_NONE
; 
1364     /* Double constants initialization */ 
1366     R_PosInf 
= 1.0/R_Zero
; 
1367     R_NegInf 
= -1.0/R_Zero
; 
1368     R_Nan 
= R_Zero
/R_Zero
; 
1371 static void initServer() { 
1374     signal(SIGHUP
, SIG_IGN
); 
1375     signal(SIGPIPE
, SIG_IGN
); 
1376     setupSigSegvAction(); 
1378     server
.devnull 
= fopen("/dev/null","w"); 
1379     if (server
.devnull 
== NULL
) { 
1380         redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
); 
1383     server
.clients 
= listCreate(); 
1384     server
.slaves 
= listCreate(); 
1385     server
.monitors 
= listCreate(); 
1386     server
.objfreelist 
= listCreate(); 
1387     createSharedObjects(); 
1388     server
.el 
= aeCreateEventLoop(); 
1389     server
.db 
= zmalloc(sizeof(redisDb
)*server
.dbnum
); 
1390     server
.sharingpool 
= dictCreate(&setDictType
,NULL
); 
1391     server
.fd 
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
); 
1392     if (server
.fd 
== -1) { 
1393         redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
); 
1396     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1397         server
.db
[j
].dict 
= dictCreate(&hashDictType
,NULL
); 
1398         server
.db
[j
].expires 
= dictCreate(&setDictType
,NULL
); 
1399         server
.db
[j
].blockingkeys 
= dictCreate(&keylistDictType
,NULL
); 
1400         server
.db
[j
].id 
= j
; 
1402     server
.cronloops 
= 0; 
1403     server
.bgsavechildpid 
= -1; 
1404     server
.bgrewritechildpid 
= -1; 
1405     server
.bgrewritebuf 
= sdsempty(); 
1406     server
.lastsave 
= time(NULL
); 
1408     server
.usedmemory 
= 0; 
1409     server
.stat_numcommands 
= 0; 
1410     server
.stat_numconnections 
= 0; 
1411     server
.stat_starttime 
= time(NULL
); 
1412     server
.unixtime 
= time(NULL
); 
1413     aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
); 
1414     if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
, 
1415         acceptHandler
, NULL
) == AE_ERR
) oom("creating file event"); 
1417     if (server
.appendonly
) { 
1418         server
.appendfd 
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644); 
1419         if (server
.appendfd 
== -1) { 
1420             redisLog(REDIS_WARNING
, "Can't open the append-only file: %s", 
1426     if (server
.vm_enabled
) vmInit(); 
1429 /* Empty the whole database */ 
1430 static long long emptyDb() { 
1432     long long removed 
= 0; 
1434     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1435         removed 
+= dictSize(server
.db
[j
].dict
); 
1436         dictEmpty(server
.db
[j
].dict
); 
1437         dictEmpty(server
.db
[j
].expires
); 
1442 static int yesnotoi(char *s
) { 
1443     if (!strcasecmp(s
,"yes")) return 1; 
1444     else if (!strcasecmp(s
,"no")) return 0; 
1448 /* I agree, this is a very rudimental way to load a configuration... 
1449    will improve later if the config gets more complex */ 
1450 static void loadServerConfig(char *filename
) { 
1452     char buf
[REDIS_CONFIGLINE_MAX
+1], *err 
= NULL
; 
1456     if (filename
[0] == '-' && filename
[1] == '\0') 
1459         if ((fp 
= fopen(filename
,"r")) == NULL
) { 
1460             redisLog(REDIS_WARNING
,"Fatal error, can't open config file"); 
1465     while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) { 
1471         line 
= sdstrim(line
," \t\r\n"); 
1473         /* Skip comments and blank lines*/ 
1474         if (line
[0] == '#' || line
[0] == '\0') { 
1479         /* Split into arguments */ 
1480         argv 
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
); 
1481         sdstolower(argv
[0]); 
1483         /* Execute config directives */ 
1484         if (!strcasecmp(argv
[0],"timeout") && argc 
== 2) { 
1485             server
.maxidletime 
= atoi(argv
[1]); 
1486             if (server
.maxidletime 
< 0) { 
1487                 err 
= "Invalid timeout value"; goto loaderr
; 
1489         } else if (!strcasecmp(argv
[0],"port") && argc 
== 2) { 
1490             server
.port 
= atoi(argv
[1]); 
1491             if (server
.port 
< 1 || server
.port 
> 65535) { 
1492                 err 
= "Invalid port"; goto loaderr
; 
1494         } else if (!strcasecmp(argv
[0],"bind") && argc 
== 2) { 
1495             server
.bindaddr 
= zstrdup(argv
[1]); 
1496         } else if (!strcasecmp(argv
[0],"save") && argc 
== 3) { 
1497             int seconds 
= atoi(argv
[1]); 
1498             int changes 
= atoi(argv
[2]); 
1499             if (seconds 
< 1 || changes 
< 0) { 
1500                 err 
= "Invalid save parameters"; goto loaderr
; 
1502             appendServerSaveParams(seconds
,changes
); 
1503         } else if (!strcasecmp(argv
[0],"dir") && argc 
== 2) { 
1504             if (chdir(argv
[1]) == -1) { 
1505                 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s", 
1506                     argv
[1], strerror(errno
)); 
1509         } else if (!strcasecmp(argv
[0],"loglevel") && argc 
== 2) { 
1510             if (!strcasecmp(argv
[1],"debug")) server
.verbosity 
= REDIS_DEBUG
; 
1511             else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity 
= REDIS_VERBOSE
; 
1512             else if (!strcasecmp(argv
[1],"notice")) server
.verbosity 
= REDIS_NOTICE
; 
1513             else if (!strcasecmp(argv
[1],"warning")) server
.verbosity 
= REDIS_WARNING
; 
1515                 err 
= "Invalid log level. Must be one of debug, notice, warning"; 
1518         } else if (!strcasecmp(argv
[0],"logfile") && argc 
== 2) { 
1521             server
.logfile 
= zstrdup(argv
[1]); 
1522             if (!strcasecmp(server
.logfile
,"stdout")) { 
1523                 zfree(server
.logfile
); 
1524                 server
.logfile 
= NULL
; 
1526             if (server
.logfile
) { 
1527                 /* Test if we are able to open the file. The server will not 
1528                  * be able to abort just for this problem later... */ 
1529                 logfp 
= fopen(server
.logfile
,"a"); 
1530                 if (logfp 
== NULL
) { 
1531                     err 
= sdscatprintf(sdsempty(), 
1532                         "Can't open the log file: %s", strerror(errno
)); 
1537         } else if (!strcasecmp(argv
[0],"databases") && argc 
== 2) { 
1538             server
.dbnum 
= atoi(argv
[1]); 
1539             if (server
.dbnum 
< 1) { 
1540                 err 
= "Invalid number of databases"; goto loaderr
; 
1542         } else if (!strcasecmp(argv
[0],"maxclients") && argc 
== 2) { 
1543             server
.maxclients 
= atoi(argv
[1]); 
1544         } else if (!strcasecmp(argv
[0],"maxmemory") && argc 
== 2) { 
1545             server
.maxmemory 
= strtoll(argv
[1], NULL
, 10); 
1546         } else if (!strcasecmp(argv
[0],"slaveof") && argc 
== 3) { 
1547             server
.masterhost 
= sdsnew(argv
[1]); 
1548             server
.masterport 
= atoi(argv
[2]); 
1549             server
.replstate 
= REDIS_REPL_CONNECT
; 
1550         } else if (!strcasecmp(argv
[0],"masterauth") && argc 
== 2) { 
1551                 server
.masterauth 
= zstrdup(argv
[1]); 
1552         } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc 
== 2) { 
1553             if ((server
.glueoutputbuf 
= yesnotoi(argv
[1])) == -1) { 
1554                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1556         } else if (!strcasecmp(argv
[0],"shareobjects") && argc 
== 2) { 
1557             if ((server
.shareobjects 
= yesnotoi(argv
[1])) == -1) { 
1558                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1560         } else if (!strcasecmp(argv
[0],"rdbcompression") && argc 
== 2) { 
1561             if ((server
.rdbcompression 
= yesnotoi(argv
[1])) == -1) { 
1562                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1564         } else if (!strcasecmp(argv
[0],"shareobjectspoolsize") && argc 
== 2) { 
1565             server
.sharingpoolsize 
= atoi(argv
[1]); 
1566             if (server
.sharingpoolsize 
< 1) { 
1567                 err 
= "invalid object sharing pool size"; goto loaderr
; 
1569         } else if (!strcasecmp(argv
[0],"daemonize") && argc 
== 2) { 
1570             if ((server
.daemonize 
= yesnotoi(argv
[1])) == -1) { 
1571                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1573         } else if (!strcasecmp(argv
[0],"appendonly") && argc 
== 2) { 
1574             if ((server
.appendonly 
= yesnotoi(argv
[1])) == -1) { 
1575                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1577         } else if (!strcasecmp(argv
[0],"appendfsync") && argc 
== 2) { 
1578             if (!strcasecmp(argv
[1],"no")) { 
1579                 server
.appendfsync 
= APPENDFSYNC_NO
; 
1580             } else if (!strcasecmp(argv
[1],"always")) { 
1581                 server
.appendfsync 
= APPENDFSYNC_ALWAYS
; 
1582             } else if (!strcasecmp(argv
[1],"everysec")) { 
1583                 server
.appendfsync 
= APPENDFSYNC_EVERYSEC
; 
1585                 err 
= "argument must be 'no', 'always' or 'everysec'"; 
1588         } else if (!strcasecmp(argv
[0],"requirepass") && argc 
== 2) { 
1589           server
.requirepass 
= zstrdup(argv
[1]); 
1590         } else if (!strcasecmp(argv
[0],"pidfile") && argc 
== 2) { 
1591           server
.pidfile 
= zstrdup(argv
[1]); 
1592         } else if (!strcasecmp(argv
[0],"dbfilename") && argc 
== 2) { 
1593           server
.dbfilename 
= zstrdup(argv
[1]); 
1594         } else if (!strcasecmp(argv
[0],"vm-enabled") && argc 
== 2) { 
1595             if ((server
.vm_enabled 
= yesnotoi(argv
[1])) == -1) { 
1596                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1598         } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc 
== 2) { 
1599             server
.vm_max_memory 
= strtoll(argv
[1], NULL
, 10); 
1600         } else if (!strcasecmp(argv
[0],"vm-page-size") && argc 
== 2) { 
1601             server
.vm_page_size 
= strtoll(argv
[1], NULL
, 10); 
1602         } else if (!strcasecmp(argv
[0],"vm-pages") && argc 
== 2) { 
1603             server
.vm_pages 
= strtoll(argv
[1], NULL
, 10); 
1604         } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc 
== 2) { 
1605             server
.vm_max_threads 
= strtoll(argv
[1], NULL
, 10); 
1607             err 
= "Bad directive or wrong number of arguments"; goto loaderr
; 
1609         for (j 
= 0; j 
< argc
; j
++) 
1614     if (fp 
!= stdin
) fclose(fp
); 
1618     fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n"); 
1619     fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
); 
1620     fprintf(stderr
, ">>> '%s'\n", line
); 
1621     fprintf(stderr
, "%s\n", err
); 
1625 static void freeClientArgv(redisClient 
*c
) { 
1628     for (j 
= 0; j 
< c
->argc
; j
++) 
1629         decrRefCount(c
->argv
[j
]); 
1630     for (j 
= 0; j 
< c
->mbargc
; j
++) 
1631         decrRefCount(c
->mbargv
[j
]); 
1636 static void freeClient(redisClient 
*c
) { 
1639     /* Note that if the client we are freeing is blocked into a blocking 
1640      * call, we have to set querybuf to NULL *before* to call unblockClient() 
1641      * to avoid processInputBuffer() will get called. Also it is important 
1642      * to remove the file events after this, because this call adds 
1643      * the READABLE event. */ 
1644     sdsfree(c
->querybuf
); 
1646     if (c
->flags 
& REDIS_BLOCKED
) 
1649     aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
); 
1650     aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
); 
1651     listRelease(c
->reply
); 
1654     /* Remove from the list of clients */ 
1655     ln 
= listSearchKey(server
.clients
,c
); 
1656     redisAssert(ln 
!= NULL
); 
1657     listDelNode(server
.clients
,ln
); 
1658     /* Remove from the list of clients waiting for VM operations */ 
1659     if (server
.vm_enabled 
&& listLength(c
->io_keys
)) { 
1660         ln 
= listSearchKey(server
.io_clients
,c
); 
1661         if (ln
) listDelNode(server
.io_clients
,ln
); 
1662         listRelease(c
->io_keys
); 
1664     listRelease(c
->io_keys
); 
1666     if (c
->flags 
& REDIS_SLAVE
) { 
1667         if (c
->replstate 
== REDIS_REPL_SEND_BULK 
&& c
->repldbfd 
!= -1) 
1669         list 
*l 
= (c
->flags 
& REDIS_MONITOR
) ? server
.monitors 
: server
.slaves
; 
1670         ln 
= listSearchKey(l
,c
); 
1671         redisAssert(ln 
!= NULL
); 
1674     if (c
->flags 
& REDIS_MASTER
) { 
1675         server
.master 
= NULL
; 
1676         server
.replstate 
= REDIS_REPL_CONNECT
; 
1680     freeClientMultiState(c
); 
1684 #define GLUEREPLY_UP_TO (1024) 
1685 static void glueReplyBuffersIfNeeded(redisClient 
*c
) { 
1687     char buf
[GLUEREPLY_UP_TO
]; 
1691     listRewind(c
->reply
); 
1692     while((ln 
= listYield(c
->reply
))) { 
1696         objlen 
= sdslen(o
->ptr
); 
1697         if (copylen 
+ objlen 
<= GLUEREPLY_UP_TO
) { 
1698             memcpy(buf
+copylen
,o
->ptr
,objlen
); 
1700             listDelNode(c
->reply
,ln
); 
1702             if (copylen 
== 0) return; 
1706     /* Now the output buffer is empty, add the new single element */ 
1707     o 
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
)); 
1708     listAddNodeHead(c
->reply
,o
); 
1711 static void sendReplyToClient(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
1712     redisClient 
*c 
= privdata
; 
1713     int nwritten 
= 0, totwritten 
= 0, objlen
; 
1716     REDIS_NOTUSED(mask
); 
1718     /* Use writev() if we have enough buffers to send */ 
1719     if (!server
.glueoutputbuf 
&& 
1720         listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD 
&&  
1721         !(c
->flags 
& REDIS_MASTER
)) 
1723         sendReplyToClientWritev(el
, fd
, privdata
, mask
); 
1727     while(listLength(c
->reply
)) { 
1728         if (server
.glueoutputbuf 
&& listLength(c
->reply
) > 1) 
1729             glueReplyBuffersIfNeeded(c
); 
1731         o 
= listNodeValue(listFirst(c
->reply
)); 
1732         objlen 
= sdslen(o
->ptr
); 
1735             listDelNode(c
->reply
,listFirst(c
->reply
)); 
1739         if (c
->flags 
& REDIS_MASTER
) { 
1740             /* Don't reply to a master */ 
1741             nwritten 
= objlen 
- c
->sentlen
; 
1743             nwritten 
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen 
- c
->sentlen
); 
1744             if (nwritten 
<= 0) break; 
1746         c
->sentlen 
+= nwritten
; 
1747         totwritten 
+= nwritten
; 
1748         /* If we fully sent the object on head go to the next one */ 
1749         if (c
->sentlen 
== objlen
) { 
1750             listDelNode(c
->reply
,listFirst(c
->reply
)); 
1753         /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT 
1754          * bytes, in a single threaded server it's a good idea to serve 
1755          * other clients as well, even if a very large request comes from 
1756          * super fast link that is always able to accept data (in real world 
1757          * scenario think about 'KEYS *' against the loopback interfae) */ 
1758         if (totwritten 
> REDIS_MAX_WRITE_PER_EVENT
) break; 
1760     if (nwritten 
== -1) { 
1761         if (errno 
== EAGAIN
) { 
1764             redisLog(REDIS_VERBOSE
, 
1765                 "Error writing to client: %s", strerror(errno
)); 
1770     if (totwritten 
> 0) c
->lastinteraction 
= time(NULL
); 
1771     if (listLength(c
->reply
) == 0) { 
1773         aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
); 
1777 static void sendReplyToClientWritev(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) 
1779     redisClient 
*c 
= privdata
; 
1780     int nwritten 
= 0, totwritten 
= 0, objlen
, willwrite
; 
1782     struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
]; 
1783     int offset
, ion 
= 0; 
1785     REDIS_NOTUSED(mask
); 
1788     while (listLength(c
->reply
)) { 
1789         offset 
= c
->sentlen
; 
1793         /* fill-in the iov[] array */ 
1794         for(node 
= listFirst(c
->reply
); node
; node 
= listNextNode(node
)) { 
1795             o 
= listNodeValue(node
); 
1796             objlen 
= sdslen(o
->ptr
); 
1798             if (totwritten 
+ objlen 
- offset 
> REDIS_MAX_WRITE_PER_EVENT
)  
1801             if(ion 
== REDIS_WRITEV_IOVEC_COUNT
) 
1802                 break; /* no more iovecs */ 
1804             iov
[ion
].iov_base 
= ((char*)o
->ptr
) + offset
; 
1805             iov
[ion
].iov_len 
= objlen 
- offset
; 
1806             willwrite 
+= objlen 
- offset
; 
1807             offset 
= 0; /* just for the first item */ 
1814         /* write all collected blocks at once */ 
1815         if((nwritten 
= writev(fd
, iov
, ion
)) < 0) { 
1816             if (errno 
!= EAGAIN
) { 
1817                 redisLog(REDIS_VERBOSE
, 
1818                          "Error writing to client: %s", strerror(errno
)); 
1825         totwritten 
+= nwritten
; 
1826         offset 
= c
->sentlen
; 
1828         /* remove written robjs from c->reply */ 
1829         while (nwritten 
&& listLength(c
->reply
)) { 
1830             o 
= listNodeValue(listFirst(c
->reply
)); 
1831             objlen 
= sdslen(o
->ptr
); 
1833             if(nwritten 
>= objlen 
- offset
) { 
1834                 listDelNode(c
->reply
, listFirst(c
->reply
)); 
1835                 nwritten 
-= objlen 
- offset
; 
1839                 c
->sentlen 
+= nwritten
; 
1847         c
->lastinteraction 
= time(NULL
); 
1849     if (listLength(c
->reply
) == 0) { 
1851         aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
); 
1855 static struct redisCommand 
*lookupCommand(char *name
) { 
1857     while(cmdTable
[j
].name 
!= NULL
) { 
1858         if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
]; 
1864 /* resetClient prepare the client to process the next command */ 
1865 static void resetClient(redisClient 
*c
) { 
1871 /* Call() is the core of Redis execution of a command */ 
1872 static void call(redisClient 
*c
, struct redisCommand 
*cmd
) { 
1875     dirty 
= server
.dirty
; 
1877     if (server
.appendonly 
&& server
.dirty
-dirty
) 
1878         feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
); 
1879     if (server
.dirty
-dirty 
&& listLength(server
.slaves
)) 
1880         replicationFeedSlaves(server
.slaves
,cmd
,c
->db
->id
,c
->argv
,c
->argc
); 
1881     if (listLength(server
.monitors
)) 
1882         replicationFeedSlaves(server
.monitors
,cmd
,c
->db
->id
,c
->argv
,c
->argc
); 
1883     server
.stat_numcommands
++; 
1886 /* If this function gets called we already read a whole 
1887  * command, argments are in the client argv/argc fields. 
1888  * processCommand() execute the command or prepare the 
1889  * server for a bulk read from the client. 
1891  * If 1 is returned the client is still alive and valid and 
1892  * and other operations can be performed by the caller. Otherwise 
1893  * if 0 is returned the client was destroied (i.e. after QUIT). */ 
1894 static int processCommand(redisClient 
*c
) { 
1895     struct redisCommand 
*cmd
; 
1897     /* Free some memory if needed (maxmemory setting) */ 
1898     if (server
.maxmemory
) freeMemoryIfNeeded(); 
1900     /* Handle the multi bulk command type. This is an alternative protocol 
1901      * supported by Redis in order to receive commands that are composed of 
1902      * multiple binary-safe "bulk" arguments. The latency of processing is 
1903      * a bit higher but this allows things like multi-sets, so if this 
1904      * protocol is used only for MSET and similar commands this is a big win. */ 
1905     if (c
->multibulk 
== 0 && c
->argc 
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') { 
1906         c
->multibulk 
= atoi(((char*)c
->argv
[0]->ptr
)+1); 
1907         if (c
->multibulk 
<= 0) { 
1911             decrRefCount(c
->argv
[c
->argc
-1]); 
1915     } else if (c
->multibulk
) { 
1916         if (c
->bulklen 
== -1) { 
1917             if (((char*)c
->argv
[0]->ptr
)[0] != '$') { 
1918                 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n")); 
1922                 int bulklen 
= atoi(((char*)c
->argv
[0]->ptr
)+1); 
1923                 decrRefCount(c
->argv
[0]); 
1924                 if (bulklen 
< 0 || bulklen 
> 1024*1024*1024) { 
1926                     addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n")); 
1931                 c
->bulklen 
= bulklen
+2; /* add two bytes for CR+LF */ 
1935             c
->mbargv 
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1)); 
1936             c
->mbargv
[c
->mbargc
] = c
->argv
[0]; 
1940             if (c
->multibulk 
== 0) { 
1944                 /* Here we need to swap the multi-bulk argc/argv with the 
1945                  * normal argc/argv of the client structure. */ 
1947                 c
->argv 
= c
->mbargv
; 
1948                 c
->mbargv 
= auxargv
; 
1951                 c
->argc 
= c
->mbargc
; 
1952                 c
->mbargc 
= auxargc
; 
1954                 /* We need to set bulklen to something different than -1 
1955                  * in order for the code below to process the command without 
1956                  * to try to read the last argument of a bulk command as 
1957                  * a special argument. */ 
1959                 /* continue below and process the command */ 
1966     /* -- end of multi bulk commands processing -- */ 
1968     /* The QUIT command is handled as a special case. Normal command 
1969      * procs are unable to close the client connection safely */ 
1970     if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) { 
1974     cmd 
= lookupCommand(c
->argv
[0]->ptr
); 
1977             sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n", 
1978                 (char*)c
->argv
[0]->ptr
)); 
1981     } else if ((cmd
->arity 
> 0 && cmd
->arity 
!= c
->argc
) || 
1982                (c
->argc 
< -cmd
->arity
)) { 
1984             sdscatprintf(sdsempty(), 
1985                 "-ERR wrong number of arguments for '%s' command\r\n", 
1989     } else if (server
.maxmemory 
&& cmd
->flags 
& REDIS_CMD_DENYOOM 
&& zmalloc_used_memory() > server
.maxmemory
) { 
1990         addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n")); 
1993     } else if (cmd
->flags 
& REDIS_CMD_BULK 
&& c
->bulklen 
== -1) { 
1994         int bulklen 
= atoi(c
->argv
[c
->argc
-1]->ptr
); 
1996         decrRefCount(c
->argv
[c
->argc
-1]); 
1997         if (bulklen 
< 0 || bulklen 
> 1024*1024*1024) { 
1999             addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n")); 
2004         c
->bulklen 
= bulklen
+2; /* add two bytes for CR+LF */ 
2005         /* It is possible that the bulk read is already in the 
2006          * buffer. Check this condition and handle it accordingly. 
2007          * This is just a fast path, alternative to call processInputBuffer(). 
2008          * It's a good idea since the code is small and this condition 
2009          * happens most of the times. */ 
2010         if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) { 
2011             c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2); 
2013             c
->querybuf 
= sdsrange(c
->querybuf
,c
->bulklen
,-1); 
2018     /* Let's try to share objects on the command arguments vector */ 
2019     if (server
.shareobjects
) { 
2021         for(j 
= 1; j 
< c
->argc
; j
++) 
2022             c
->argv
[j
] = tryObjectSharing(c
->argv
[j
]); 
2024     /* Let's try to encode the bulk object to save space. */ 
2025     if (cmd
->flags 
& REDIS_CMD_BULK
) 
2026         tryObjectEncoding(c
->argv
[c
->argc
-1]); 
2028     /* Check if the user is authenticated */ 
2029     if (server
.requirepass 
&& !c
->authenticated 
&& cmd
->proc 
!= authCommand
) { 
2030         addReplySds(c
,sdsnew("-ERR operation not permitted\r\n")); 
2035     /* Exec the command */ 
2036     if (c
->flags 
& REDIS_MULTI 
&& cmd
->proc 
!= execCommand
) { 
2037         queueMultiCommand(c
,cmd
); 
2038         addReply(c
,shared
.queued
); 
2043     /* Prepare the client for the next command */ 
2044     if (c
->flags 
& REDIS_CLOSE
) { 
2052 static void replicationFeedSlaves(list 
*slaves
, struct redisCommand 
*cmd
, int dictid
, robj 
**argv
, int argc
) { 
2056     /* (args*2)+1 is enough room for args, spaces, newlines */ 
2057     robj 
*static_outv
[REDIS_STATIC_ARGS
*2+1]; 
2059     if (argc 
<= REDIS_STATIC_ARGS
) { 
2062         outv 
= zmalloc(sizeof(robj
*)*(argc
*2+1)); 
2065     for (j 
= 0; j 
< argc
; j
++) { 
2066         if (j 
!= 0) outv
[outc
++] = shared
.space
; 
2067         if ((cmd
->flags 
& REDIS_CMD_BULK
) && j 
== argc
-1) { 
2070             lenobj 
= createObject(REDIS_STRING
, 
2071                 sdscatprintf(sdsempty(),"%lu\r\n", 
2072                     (unsigned long) stringObjectLen(argv
[j
]))); 
2073             lenobj
->refcount 
= 0; 
2074             outv
[outc
++] = lenobj
; 
2076         outv
[outc
++] = argv
[j
]; 
2078     outv
[outc
++] = shared
.crlf
; 
2080     /* Increment all the refcounts at start and decrement at end in order to 
2081      * be sure to free objects if there is no slave in a replication state 
2082      * able to be feed with commands */ 
2083     for (j 
= 0; j 
< outc
; j
++) incrRefCount(outv
[j
]); 
2085     while((ln 
= listYield(slaves
))) { 
2086         redisClient 
*slave 
= ln
->value
; 
2088         /* Don't feed slaves that are still waiting for BGSAVE to start */ 
2089         if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_START
) continue; 
2091         /* Feed all the other slaves, MONITORs and so on */ 
2092         if (slave
->slaveseldb 
!= dictid
) { 
2096             case 0: selectcmd 
= shared
.select0
; break; 
2097             case 1: selectcmd 
= shared
.select1
; break; 
2098             case 2: selectcmd 
= shared
.select2
; break; 
2099             case 3: selectcmd 
= shared
.select3
; break; 
2100             case 4: selectcmd 
= shared
.select4
; break; 
2101             case 5: selectcmd 
= shared
.select5
; break; 
2102             case 6: selectcmd 
= shared
.select6
; break; 
2103             case 7: selectcmd 
= shared
.select7
; break; 
2104             case 8: selectcmd 
= shared
.select8
; break; 
2105             case 9: selectcmd 
= shared
.select9
; break; 
2107                 selectcmd 
= createObject(REDIS_STRING
, 
2108                     sdscatprintf(sdsempty(),"select %d\r\n",dictid
)); 
2109                 selectcmd
->refcount 
= 0; 
2112             addReply(slave
,selectcmd
); 
2113             slave
->slaveseldb 
= dictid
; 
2115         for (j 
= 0; j 
< outc
; j
++) addReply(slave
,outv
[j
]); 
2117     for (j 
= 0; j 
< outc
; j
++) decrRefCount(outv
[j
]); 
2118     if (outv 
!= static_outv
) zfree(outv
); 
2121 static void processInputBuffer(redisClient 
*c
) { 
2123     /* Before to process the input buffer, make sure the client is not 
2124      * waitig for a blocking operation such as BLPOP. Note that the first 
2125      * iteration the client is never blocked, otherwise the processInputBuffer 
2126      * would not be called at all, but after the execution of the first commands 
2127      * in the input buffer the client may be blocked, and the "goto again" 
2128      * will try to reiterate. The following line will make it return asap. */ 
2129     if (c
->flags 
& REDIS_BLOCKED 
|| c
->flags 
& REDIS_IO_WAIT
) return; 
2130     if (c
->bulklen 
== -1) { 
2131         /* Read the first line of the query */ 
2132         char *p 
= strchr(c
->querybuf
,'\n'); 
2139             query 
= c
->querybuf
; 
2140             c
->querybuf 
= sdsempty(); 
2141             querylen 
= 1+(p
-(query
)); 
2142             if (sdslen(query
) > querylen
) { 
2143                 /* leave data after the first line of the query in the buffer */ 
2144                 c
->querybuf 
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
); 
2146             *p 
= '\0'; /* remove "\n" */ 
2147             if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */ 
2148             sdsupdatelen(query
); 
2150             /* Now we can split the query in arguments */ 
2151             argv 
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
); 
2154             if (c
->argv
) zfree(c
->argv
); 
2155             c
->argv 
= zmalloc(sizeof(robj
*)*argc
); 
2157             for (j 
= 0; j 
< argc
; j
++) { 
2158                 if (sdslen(argv
[j
])) { 
2159                     c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]); 
2167                 /* Execute the command. If the client is still valid 
2168                  * after processCommand() return and there is something 
2169                  * on the query buffer try to process the next command. */ 
2170                 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
; 
2172                 /* Nothing to process, argc == 0. Just process the query 
2173                  * buffer if it's not empty or return to the caller */ 
2174                 if (sdslen(c
->querybuf
)) goto again
; 
2177         } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) { 
2178             redisLog(REDIS_VERBOSE
, "Client protocol error"); 
2183         /* Bulk read handling. Note that if we are at this point 
2184            the client already sent a command terminated with a newline, 
2185            we are reading the bulk data that is actually the last 
2186            argument of the command. */ 
2187         int qbl 
= sdslen(c
->querybuf
); 
2189         if (c
->bulklen 
<= qbl
) { 
2190             /* Copy everything but the final CRLF as final argument */ 
2191             c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2); 
2193             c
->querybuf 
= sdsrange(c
->querybuf
,c
->bulklen
,-1); 
2194             /* Process the command. If the client is still valid after 
2195              * the processing and there is more data in the buffer 
2196              * try to parse it. */ 
2197             if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
; 
2203 static void readQueryFromClient(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
2204     redisClient 
*c 
= (redisClient
*) privdata
; 
2205     char buf
[REDIS_IOBUF_LEN
]; 
2208     REDIS_NOTUSED(mask
); 
2210     nread 
= read(fd
, buf
, REDIS_IOBUF_LEN
); 
2212         if (errno 
== EAGAIN
) { 
2215             redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
)); 
2219     } else if (nread 
== 0) { 
2220         redisLog(REDIS_VERBOSE
, "Client closed connection"); 
2225         c
->querybuf 
= sdscatlen(c
->querybuf
, buf
, nread
); 
2226         c
->lastinteraction 
= time(NULL
); 
2230     processInputBuffer(c
); 
2233 static int selectDb(redisClient 
*c
, int id
) { 
2234     if (id 
< 0 || id 
>= server
.dbnum
) 
2236     c
->db 
= &server
.db
[id
]; 
2240 static void *dupClientReplyValue(void *o
) { 
2241     incrRefCount((robj
*)o
); 
2245 static redisClient 
*createClient(int fd
) { 
2246     redisClient 
*c 
= zmalloc(sizeof(*c
)); 
2248     anetNonBlock(NULL
,fd
); 
2249     anetTcpNoDelay(NULL
,fd
); 
2250     if (!c
) return NULL
; 
2253     c
->querybuf 
= sdsempty(); 
2262     c
->lastinteraction 
= time(NULL
); 
2263     c
->authenticated 
= 0; 
2264     c
->replstate 
= REDIS_REPL_NONE
; 
2265     c
->reply 
= listCreate(); 
2266     listSetFreeMethod(c
->reply
,decrRefCount
); 
2267     listSetDupMethod(c
->reply
,dupClientReplyValue
); 
2268     c
->blockingkeys 
= NULL
; 
2269     c
->blockingkeysnum 
= 0; 
2270     c
->io_keys 
= listCreate(); 
2271     listSetFreeMethod(c
->io_keys
,decrRefCount
); 
2272     if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
, 
2273         readQueryFromClient
, c
) == AE_ERR
) { 
2277     listAddNodeTail(server
.clients
,c
); 
2278     initClientMultiState(c
); 
2282 static void addReply(redisClient 
*c
, robj 
*obj
) { 
2283     if (listLength(c
->reply
) == 0 && 
2284         (c
->replstate 
== REDIS_REPL_NONE 
|| 
2285          c
->replstate 
== REDIS_REPL_ONLINE
) && 
2286         aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
, 
2287         sendReplyToClient
, c
) == AE_ERR
) return; 
2289     if (server
.vm_enabled 
&& obj
->storage 
!= REDIS_VM_MEMORY
) { 
2290         obj 
= dupStringObject(obj
); 
2291         obj
->refcount 
= 0; /* getDecodedObject() will increment the refcount */ 
2293     listAddNodeTail(c
->reply
,getDecodedObject(obj
)); 
2296 static void addReplySds(redisClient 
*c
, sds s
) { 
2297     robj 
*o 
= createObject(REDIS_STRING
,s
); 
2302 static void addReplyDouble(redisClient 
*c
, double d
) { 
2305     snprintf(buf
,sizeof(buf
),"%.17g",d
); 
2306     addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n", 
2307         (unsigned long) strlen(buf
),buf
)); 
2310 static void addReplyBulkLen(redisClient 
*c
, robj 
*obj
) { 
2313     if (obj
->encoding 
== REDIS_ENCODING_RAW
) { 
2314         len 
= sdslen(obj
->ptr
); 
2316         long n 
= (long)obj
->ptr
; 
2318         /* Compute how many bytes will take this integer as a radix 10 string */ 
2324         while((n 
= n
/10) != 0) { 
2328     addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",(unsigned long)len
)); 
2331 static void acceptHandler(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
2336     REDIS_NOTUSED(mask
); 
2337     REDIS_NOTUSED(privdata
); 
2339     cfd 
= anetAccept(server
.neterr
, fd
, cip
, &cport
); 
2340     if (cfd 
== AE_ERR
) { 
2341         redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
); 
2344     redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
); 
2345     if ((c 
= createClient(cfd
)) == NULL
) { 
2346         redisLog(REDIS_WARNING
,"Error allocating resoures for the client"); 
2347         close(cfd
); /* May be already closed, just ingore errors */ 
2350     /* If maxclient directive is set and this is one client more... close the 
2351      * connection. Note that we create the client instead to check before 
2352      * for this condition, since now the socket is already set in nonblocking 
2353      * mode and we can send an error for free using the Kernel I/O */ 
2354     if (server
.maxclients 
&& listLength(server
.clients
) > server
.maxclients
) { 
2355         char *err 
= "-ERR max number of clients reached\r\n"; 
2357         /* That's a best effort error message, don't check write errors */ 
2358         if (write(c
->fd
,err
,strlen(err
)) == -1) { 
2359             /* Nothing to do, Just to avoid the warning... */ 
2364     server
.stat_numconnections
++; 
2367 /* ======================= Redis objects implementation ===================== */ 
2369 static robj 
*createObject(int type
, void *ptr
) { 
2372     if (listLength(server
.objfreelist
)) { 
2373         listNode 
*head 
= listFirst(server
.objfreelist
); 
2374         o 
= listNodeValue(head
); 
2375         listDelNode(server
.objfreelist
,head
); 
2377         if (server
.vm_enabled
) { 
2378             o 
= zmalloc(sizeof(*o
)); 
2380             o 
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
)); 
2384     o
->encoding 
= REDIS_ENCODING_RAW
; 
2387     if (server
.vm_enabled
) { 
2388         o
->vm
.atime 
= server
.unixtime
; 
2389         o
->storage 
= REDIS_VM_MEMORY
; 
2394 static robj 
*createStringObject(char *ptr
, size_t len
) { 
2395     return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
)); 
2398 static robj 
*dupStringObject(robj 
*o
) { 
2399     assert(o
->encoding 
== REDIS_ENCODING_RAW
); 
2400     return createStringObject(o
->ptr
,sdslen(o
->ptr
)); 
2403 static robj 
*createListObject(void) { 
2404     list 
*l 
= listCreate(); 
2406     listSetFreeMethod(l
,decrRefCount
); 
2407     return createObject(REDIS_LIST
,l
); 
2410 static robj 
*createSetObject(void) { 
2411     dict 
*d 
= dictCreate(&setDictType
,NULL
); 
2412     return createObject(REDIS_SET
,d
); 
2415 static robj 
*createZsetObject(void) { 
2416     zset 
*zs 
= zmalloc(sizeof(*zs
)); 
2418     zs
->dict 
= dictCreate(&zsetDictType
,NULL
); 
2419     zs
->zsl 
= zslCreate(); 
2420     return createObject(REDIS_ZSET
,zs
); 
2423 static void freeStringObject(robj 
*o
) { 
2424     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
2429 static void freeListObject(robj 
*o
) { 
2430     listRelease((list
*) o
->ptr
); 
2433 static void freeSetObject(robj 
*o
) { 
2434     dictRelease((dict
*) o
->ptr
); 
2437 static void freeZsetObject(robj 
*o
) { 
2440     dictRelease(zs
->dict
); 
2445 static void freeHashObject(robj 
*o
) { 
2446     dictRelease((dict
*) o
->ptr
); 
2449 static void incrRefCount(robj 
*o
) { 
2450     redisAssert(!server
.vm_enabled 
|| o
->storage 
== REDIS_VM_MEMORY
); 
2454 static void decrRefCount(void *obj
) { 
2457     /* Object is swapped out, or in the process of being loaded. */ 
2458     if (server
.vm_enabled 
&& 
2459         (o
->storage 
== REDIS_VM_SWAPPED 
|| o
->storage 
== REDIS_VM_LOADING
)) 
2461         if (o
->storage 
== REDIS_VM_SWAPPED 
|| o
->storage 
== REDIS_VM_LOADING
) { 
2462             redisAssert(o
->refcount 
== 1); 
2464         if (o
->storage 
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
); 
2465         redisAssert(o
->type 
== REDIS_STRING
); 
2466         freeStringObject(o
); 
2467         vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
); 
2468         if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX 
|| 
2469             !listAddNodeHead(server
.objfreelist
,o
)) 
2471         server
.vm_stats_swapped_objects
--; 
2474     /* Object is in memory, or in the process of being swapped out. */ 
2475     if (--(o
->refcount
) == 0) { 
2476         if (server
.vm_enabled 
&& o
->storage 
== REDIS_VM_SWAPPING
) 
2477             vmCancelThreadedIOJob(obj
); 
2479         case REDIS_STRING
: freeStringObject(o
); break; 
2480         case REDIS_LIST
: freeListObject(o
); break; 
2481         case REDIS_SET
: freeSetObject(o
); break; 
2482         case REDIS_ZSET
: freeZsetObject(o
); break; 
2483         case REDIS_HASH
: freeHashObject(o
); break; 
2484         default: redisAssert(0 != 0); break; 
2486         if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX 
|| 
2487             !listAddNodeHead(server
.objfreelist
,o
)) 
2492 static robj 
*lookupKey(redisDb 
*db
, robj 
*key
) { 
2493     dictEntry 
*de 
= dictFind(db
->dict
,key
); 
2495         robj 
*key 
= dictGetEntryKey(de
); 
2496         robj 
*val 
= dictGetEntryVal(de
); 
2498         if (server
.vm_enabled
) { 
2499             if (key
->storage 
== REDIS_VM_MEMORY 
|| 
2500                 key
->storage 
== REDIS_VM_SWAPPING
) 
2502                 /* If we were swapping the object out, stop it, this key 
2504                 if (key
->storage 
== REDIS_VM_SWAPPING
) 
2505                     vmCancelThreadedIOJob(key
); 
2506                 /* Update the access time of the key for the aging algorithm. */ 
2507                 key
->vm
.atime 
= server
.unixtime
; 
2509                 /* Our value was swapped on disk. Bring it at home. */ 
2510                 redisAssert(val 
== NULL
); 
2511                 val 
= vmLoadObject(key
); 
2512                 dictGetEntryVal(de
) = val
; 
2521 static robj 
*lookupKeyRead(redisDb 
*db
, robj 
*key
) { 
2522     expireIfNeeded(db
,key
); 
2523     return lookupKey(db
,key
); 
2526 static robj 
*lookupKeyWrite(redisDb 
*db
, robj 
*key
) { 
2527     deleteIfVolatile(db
,key
); 
2528     return lookupKey(db
,key
); 
2531 static int deleteKey(redisDb 
*db
, robj 
*key
) { 
2534     /* We need to protect key from destruction: after the first dictDelete() 
2535      * it may happen that 'key' is no longer valid if we don't increment 
2536      * it's count. This may happen when we get the object reference directly 
2537      * from the hash table with dictRandomKey() or dict iterators */ 
2539     if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
); 
2540     retval 
= dictDelete(db
->dict
,key
); 
2543     return retval 
== DICT_OK
; 
2546 /* Try to share an object against the shared objects pool */ 
2547 static robj 
*tryObjectSharing(robj 
*o
) { 
2548     struct dictEntry 
*de
; 
2551     if (o 
== NULL 
|| server
.shareobjects 
== 0) return o
; 
2553     redisAssert(o
->type 
== REDIS_STRING
); 
2554     de 
= dictFind(server
.sharingpool
,o
); 
2556         robj 
*shared 
= dictGetEntryKey(de
); 
2558         c 
= ((unsigned long) dictGetEntryVal(de
))+1; 
2559         dictGetEntryVal(de
) = (void*) c
; 
2560         incrRefCount(shared
); 
2564         /* Here we are using a stream algorihtm: Every time an object is 
2565          * shared we increment its count, everytime there is a miss we 
2566          * recrement the counter of a random object. If this object reaches 
2567          * zero we remove the object and put the current object instead. */ 
2568         if (dictSize(server
.sharingpool
) >= 
2569                 server
.sharingpoolsize
) { 
2570             de 
= dictGetRandomKey(server
.sharingpool
); 
2571             redisAssert(de 
!= NULL
); 
2572             c 
= ((unsigned long) dictGetEntryVal(de
))-1; 
2573             dictGetEntryVal(de
) = (void*) c
; 
2575                 dictDelete(server
.sharingpool
,de
->key
); 
2578             c 
= 0; /* If the pool is empty we want to add this object */ 
2583             retval 
= dictAdd(server
.sharingpool
,o
,(void*)1); 
2584             redisAssert(retval 
== DICT_OK
); 
2591 /* Check if the nul-terminated string 's' can be represented by a long 
2592  * (that is, is a number that fits into long without any other space or 
2593  * character before or after the digits). 
2595  * If so, the function returns REDIS_OK and *longval is set to the value 
2596  * of the number. Otherwise REDIS_ERR is returned */ 
2597 static int isStringRepresentableAsLong(sds s
, long *longval
) { 
2598     char buf
[32], *endptr
; 
2602     value 
= strtol(s
, &endptr
, 10); 
2603     if (endptr
[0] != '\0') return REDIS_ERR
; 
2604     slen 
= snprintf(buf
,32,"%ld",value
); 
2606     /* If the number converted back into a string is not identical 
2607      * then it's not possible to encode the string as integer */ 
2608     if (sdslen(s
) != (unsigned)slen 
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
; 
2609     if (longval
) *longval 
= value
; 
2613 /* Try to encode a string object in order to save space */ 
2614 static int tryObjectEncoding(robj 
*o
) { 
2618     if (o
->encoding 
!= REDIS_ENCODING_RAW
) 
2619         return REDIS_ERR
; /* Already encoded */ 
2621     /* It's not save to encode shared objects: shared objects can be shared 
2622      * everywhere in the "object space" of Redis. Encoded objects can only 
2623      * appear as "values" (and not, for instance, as keys) */ 
2624      if (o
->refcount 
> 1) return REDIS_ERR
; 
2626     /* Currently we try to encode only strings */ 
2627     redisAssert(o
->type 
== REDIS_STRING
); 
2629     /* Check if we can represent this string as a long integer */ 
2630     if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return REDIS_ERR
; 
2632     /* Ok, this object can be encoded */ 
2633     o
->encoding 
= REDIS_ENCODING_INT
; 
2635     o
->ptr 
= (void*) value
; 
2639 /* Get a decoded version of an encoded object (returned as a new object). 
2640  * If the object is already raw-encoded just increment the ref count. */ 
2641 static robj 
*getDecodedObject(robj 
*o
) { 
2644     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
2648     if (o
->type 
== REDIS_STRING 
&& o
->encoding 
== REDIS_ENCODING_INT
) { 
2651         snprintf(buf
,32,"%ld",(long)o
->ptr
); 
2652         dec 
= createStringObject(buf
,strlen(buf
)); 
2655         redisAssert(1 != 1); 
2659 /* Compare two string objects via strcmp() or alike. 
2660  * Note that the objects may be integer-encoded. In such a case we 
2661  * use snprintf() to get a string representation of the numbers on the stack 
2662  * and compare the strings, it's much faster than calling getDecodedObject(). 
2664  * Important note: if objects are not integer encoded, but binary-safe strings, 
2665  * sdscmp() from sds.c will apply memcmp() so this function ca be considered 
2667 static int compareStringObjects(robj 
*a
, robj 
*b
) { 
2668     redisAssert(a
->type 
== REDIS_STRING 
&& b
->type 
== REDIS_STRING
); 
2669     char bufa
[128], bufb
[128], *astr
, *bstr
; 
2672     if (a 
== b
) return 0; 
2673     if (a
->encoding 
!= REDIS_ENCODING_RAW
) { 
2674         snprintf(bufa
,sizeof(bufa
),"%ld",(long) a
->ptr
); 
2680     if (b
->encoding 
!= REDIS_ENCODING_RAW
) { 
2681         snprintf(bufb
,sizeof(bufb
),"%ld",(long) b
->ptr
); 
2687     return bothsds 
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
); 
2690 static size_t stringObjectLen(robj 
*o
) { 
2691     redisAssert(o
->type 
== REDIS_STRING
); 
2692     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
2693         return sdslen(o
->ptr
); 
2697         return snprintf(buf
,32,"%ld",(long)o
->ptr
); 
2701 /*============================ RDB saving/loading =========================== */ 
2703 static int rdbSaveType(FILE *fp
, unsigned char type
) { 
2704     if (fwrite(&type
,1,1,fp
) == 0) return -1; 
2708 static int rdbSaveTime(FILE *fp
, time_t t
) { 
2709     int32_t t32 
= (int32_t) t
; 
2710     if (fwrite(&t32
,4,1,fp
) == 0) return -1; 
2714 /* check rdbLoadLen() comments for more info */ 
2715 static int rdbSaveLen(FILE *fp
, uint32_t len
) { 
2716     unsigned char buf
[2]; 
2719         /* Save a 6 bit len */ 
2720         buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6); 
2721         if (fwrite(buf
,1,1,fp
) == 0) return -1; 
2722     } else if (len 
< (1<<14)) { 
2723         /* Save a 14 bit len */ 
2724         buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6); 
2726         if (fwrite(buf
,2,1,fp
) == 0) return -1; 
2728         /* Save a 32 bit len */ 
2729         buf
[0] = (REDIS_RDB_32BITLEN
<<6); 
2730         if (fwrite(buf
,1,1,fp
) == 0) return -1; 
2732         if (fwrite(&len
,4,1,fp
) == 0) return -1; 
2737 /* String objects in the form "2391" "-100" without any space and with a 
2738  * range of values that can fit in an 8, 16 or 32 bit signed value can be 
2739  * encoded as integers to save space */ 
2740 static int rdbTryIntegerEncoding(sds s
, unsigned char *enc
) { 
2742     char *endptr
, buf
[32]; 
2744     /* Check if it's possible to encode this value as a number */ 
2745     value 
= strtoll(s
, &endptr
, 10); 
2746     if (endptr
[0] != '\0') return 0; 
2747     snprintf(buf
,32,"%lld",value
); 
2749     /* If the number converted back into a string is not identical 
2750      * then it's not possible to encode the string as integer */ 
2751     if (strlen(buf
) != sdslen(s
) || memcmp(buf
,s
,sdslen(s
))) return 0; 
2753     /* Finally check if it fits in our ranges */ 
2754     if (value 
>= -(1<<7) && value 
<= (1<<7)-1) { 
2755         enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
; 
2756         enc
[1] = value
&0xFF; 
2758     } else if (value 
>= -(1<<15) && value 
<= (1<<15)-1) { 
2759         enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
; 
2760         enc
[1] = value
&0xFF; 
2761         enc
[2] = (value
>>8)&0xFF; 
2763     } else if (value 
>= -((long long)1<<31) && value 
<= ((long long)1<<31)-1) { 
2764         enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
; 
2765         enc
[1] = value
&0xFF; 
2766         enc
[2] = (value
>>8)&0xFF; 
2767         enc
[3] = (value
>>16)&0xFF; 
2768         enc
[4] = (value
>>24)&0xFF; 
2775 static int rdbSaveLzfStringObject(FILE *fp
, robj 
*obj
) { 
2776     unsigned int comprlen
, outlen
; 
2780     /* We require at least four bytes compression for this to be worth it */ 
2781     outlen 
= sdslen(obj
->ptr
)-4; 
2782     if (outlen 
<= 0) return 0; 
2783     if ((out 
= zmalloc(outlen
+1)) == NULL
) return 0; 
2784     comprlen 
= lzf_compress(obj
->ptr
, sdslen(obj
->ptr
), out
, outlen
); 
2785     if (comprlen 
== 0) { 
2789     /* Data compressed! Let's save it on disk */ 
2790     byte 
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
; 
2791     if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
; 
2792     if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
; 
2793     if (rdbSaveLen(fp
,sdslen(obj
->ptr
)) == -1) goto writeerr
; 
2794     if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
; 
2803 /* Save a string objet as [len][data] on disk. If the object is a string 
2804  * representation of an integer value we try to safe it in a special form */ 
2805 static int rdbSaveStringObjectRaw(FILE *fp
, robj 
*obj
) { 
2809     len 
= sdslen(obj
->ptr
); 
2811     /* Try integer encoding */ 
2813         unsigned char buf
[5]; 
2814         if ((enclen 
= rdbTryIntegerEncoding(obj
->ptr
,buf
)) > 0) { 
2815             if (fwrite(buf
,enclen
,1,fp
) == 0) return -1; 
2820     /* Try LZF compression - under 20 bytes it's unable to compress even 
2821      * aaaaaaaaaaaaaaaaaa so skip it */ 
2822     if (server
.rdbcompression 
&& len 
> 20) { 
2825         retval 
= rdbSaveLzfStringObject(fp
,obj
); 
2826         if (retval 
== -1) return -1; 
2827         if (retval 
> 0) return 0; 
2828         /* retval == 0 means data can't be compressed, save the old way */ 
2831     /* Store verbatim */ 
2832     if (rdbSaveLen(fp
,len
) == -1) return -1; 
2833     if (len 
&& fwrite(obj
->ptr
,len
,1,fp
) == 0) return -1; 
2837 /* Like rdbSaveStringObjectRaw() but handle encoded objects */ 
2838 static int rdbSaveStringObject(FILE *fp
, robj 
*obj
) { 
2841     if (obj
->storage 
== REDIS_VM_MEMORY 
&& 
2842        obj
->encoding 
!= REDIS_ENCODING_RAW
) 
2844         obj 
= getDecodedObject(obj
); 
2845         retval 
= rdbSaveStringObjectRaw(fp
,obj
); 
2848         /* This is a fast path when we are sure the object is not encoded. 
2849          * Note that's any *faster* actually as we needed to add the conditional 
2850          * but because this may happen in a background process we don't want 
2851          * to touch the object fields with incr/decrRefCount in order to 
2852          * preveny copy on write of pages. 
2854          * Also incrRefCount() will have a failing assert() if we try to call 
2855          * it against an object with storage != REDIS_VM_MEMORY. */ 
2856         retval 
= rdbSaveStringObjectRaw(fp
,obj
); 
2861 /* Save a double value. Doubles are saved as strings prefixed by an unsigned 
2862  * 8 bit integer specifing the length of the representation. 
2863  * This 8 bit integer has special values in order to specify the following 
2869 static int rdbSaveDoubleValue(FILE *fp
, double val
) { 
2870     unsigned char buf
[128]; 
2876     } else if (!isfinite(val
)) { 
2878         buf
[0] = (val 
< 0) ? 255 : 254; 
2880         snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
); 
2881         buf
[0] = strlen((char*)buf
+1); 
2884     if (fwrite(buf
,len
,1,fp
) == 0) return -1; 
2888 /* Save a Redis object. */ 
2889 static int rdbSaveObject(FILE *fp
, robj 
*o
) { 
2890     if (o
->type 
== REDIS_STRING
) { 
2891         /* Save a string value */ 
2892         if (rdbSaveStringObject(fp
,o
) == -1) return -1; 
2893     } else if (o
->type 
== REDIS_LIST
) { 
2894         /* Save a list value */ 
2895         list 
*list 
= o
->ptr
; 
2899         if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1; 
2900         while((ln 
= listYield(list
))) { 
2901             robj 
*eleobj 
= listNodeValue(ln
); 
2903             if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1; 
2905     } else if (o
->type 
== REDIS_SET
) { 
2906         /* Save a set value */ 
2908         dictIterator 
*di 
= dictGetIterator(set
); 
2911         if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1; 
2912         while((de 
= dictNext(di
)) != NULL
) { 
2913             robj 
*eleobj 
= dictGetEntryKey(de
); 
2915             if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1; 
2917         dictReleaseIterator(di
); 
2918     } else if (o
->type 
== REDIS_ZSET
) { 
2919         /* Save a set value */ 
2921         dictIterator 
*di 
= dictGetIterator(zs
->dict
); 
2924         if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1; 
2925         while((de 
= dictNext(di
)) != NULL
) { 
2926             robj 
*eleobj 
= dictGetEntryKey(de
); 
2927             double *score 
= dictGetEntryVal(de
); 
2929             if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1; 
2930             if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1; 
2932         dictReleaseIterator(di
); 
2934         redisAssert(0 != 0); 
2939 /* Return the length the object will have on disk if saved with 
2940  * the rdbSaveObject() function. Currently we use a trick to get 
2941  * this length with very little changes to the code. In the future 
2942  * we could switch to a faster solution. */ 
2943 static off_t 
rdbSavedObjectLen(robj 
*o
, FILE *fp
) { 
2944     if (fp 
== NULL
) fp 
= server
.devnull
; 
2946     assert(rdbSaveObject(fp
,o
) != 1); 
2950 /* Return the number of pages required to save this object in the swap file */ 
2951 static off_t 
rdbSavedObjectPages(robj 
*o
, FILE *fp
) { 
2952     off_t bytes 
= rdbSavedObjectLen(o
,fp
); 
2954     return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
; 
2957 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */ 
2958 static int rdbSave(char *filename
) { 
2959     dictIterator 
*di 
= NULL
; 
2964     time_t now 
= time(NULL
); 
2966     snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid()); 
2967     fp 
= fopen(tmpfile
,"w"); 
2969         redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
)); 
2972     if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
; 
2973     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
2974         redisDb 
*db 
= server
.db
+j
; 
2976         if (dictSize(d
) == 0) continue; 
2977         di 
= dictGetIterator(d
); 
2983         /* Write the SELECT DB opcode */ 
2984         if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
; 
2985         if (rdbSaveLen(fp
,j
) == -1) goto werr
; 
2987         /* Iterate this DB writing every entry */ 
2988         while((de 
= dictNext(di
)) != NULL
) { 
2989             robj 
*key 
= dictGetEntryKey(de
); 
2990             robj 
*o 
= dictGetEntryVal(de
); 
2991             time_t expiretime 
= getExpire(db
,key
); 
2993             /* Save the expire time */ 
2994             if (expiretime 
!= -1) { 
2995                 /* If this key is already expired skip it */ 
2996                 if (expiretime 
< now
) continue; 
2997                 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
; 
2998                 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
; 
3000             /* Save the key and associated value. This requires special 
3001              * handling if the value is swapped out. */ 
3002             if (!server
.vm_enabled 
|| key
->storage 
== REDIS_VM_MEMORY 
|| 
3003                                       key
->storage 
== REDIS_VM_SWAPPING
) { 
3004                 /* Save type, key, value */ 
3005                 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
; 
3006                 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
; 
3007                 if (rdbSaveObject(fp
,o
) == -1) goto werr
; 
3009                 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */ 
3011                 /* Get a preview of the object in memory */ 
3012                 po 
= vmPreviewObject(key
); 
3013                 /* Save type, key, value */ 
3014                 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
; 
3015                 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
; 
3016                 if (rdbSaveObject(fp
,po
) == -1) goto werr
; 
3017                 /* Remove the loaded object from memory */ 
3021         dictReleaseIterator(di
); 
3024     if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
; 
3026     /* Make sure data will not remain on the OS's output buffers */ 
3031     /* Use RENAME to make sure the DB file is changed atomically only 
3032      * if the generate DB file is ok. */ 
3033     if (rename(tmpfile
,filename
) == -1) { 
3034         redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
)); 
3038     redisLog(REDIS_NOTICE
,"DB saved on disk"); 
3040     server
.lastsave 
= time(NULL
); 
3046     redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
)); 
3047     if (di
) dictReleaseIterator(di
); 
3051 static int rdbSaveBackground(char *filename
) { 
3054     if (server
.bgsavechildpid 
!= -1) return REDIS_ERR
; 
3055     if ((childpid 
= fork()) == 0) { 
3058         if (rdbSave(filename
) == REDIS_OK
) { 
3065         if (childpid 
== -1) { 
3066             redisLog(REDIS_WARNING
,"Can't save in background: fork: %s", 
3070         redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
); 
3071         server
.bgsavechildpid 
= childpid
; 
3074     return REDIS_OK
; /* unreached */ 
3077 static void rdbRemoveTempFile(pid_t childpid
) { 
3080     snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
); 
3084 static int rdbLoadType(FILE *fp
) { 
3086     if (fread(&type
,1,1,fp
) == 0) return -1; 
3090 static time_t rdbLoadTime(FILE *fp
) { 
3092     if (fread(&t32
,4,1,fp
) == 0) return -1; 
3093     return (time_t) t32
; 
3096 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top 
3097  * of this file for a description of how this are stored on disk. 
3099  * isencoded is set to 1 if the readed length is not actually a length but 
3100  * an "encoding type", check the above comments for more info */ 
3101 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) { 
3102     unsigned char buf
[2]; 
3106     if (isencoded
) *isencoded 
= 0; 
3107     if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
; 
3108     type 
= (buf
[0]&0xC0)>>6; 
3109     if (type 
== REDIS_RDB_6BITLEN
) { 
3110         /* Read a 6 bit len */ 
3112     } else if (type 
== REDIS_RDB_ENCVAL
) { 
3113         /* Read a 6 bit len encoding type */ 
3114         if (isencoded
) *isencoded 
= 1; 
3116     } else if (type 
== REDIS_RDB_14BITLEN
) { 
3117         /* Read a 14 bit len */ 
3118         if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
; 
3119         return ((buf
[0]&0x3F)<<8)|buf
[1]; 
3121         /* Read a 32 bit len */ 
3122         if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
; 
3127 static robj 
*rdbLoadIntegerObject(FILE *fp
, int enctype
) { 
3128     unsigned char enc
[4]; 
3131     if (enctype 
== REDIS_RDB_ENC_INT8
) { 
3132         if (fread(enc
,1,1,fp
) == 0) return NULL
; 
3133         val 
= (signed char)enc
[0]; 
3134     } else if (enctype 
== REDIS_RDB_ENC_INT16
) { 
3136         if (fread(enc
,2,1,fp
) == 0) return NULL
; 
3137         v 
= enc
[0]|(enc
[1]<<8); 
3139     } else if (enctype 
== REDIS_RDB_ENC_INT32
) { 
3141         if (fread(enc
,4,1,fp
) == 0) return NULL
; 
3142         v 
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24); 
3145         val 
= 0; /* anti-warning */ 
3148     return createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",val
)); 
3151 static robj 
*rdbLoadLzfStringObject(FILE*fp
) { 
3152     unsigned int len
, clen
; 
3153     unsigned char *c 
= NULL
; 
3156     if ((clen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
3157     if ((len 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
3158     if ((c 
= zmalloc(clen
)) == NULL
) goto err
; 
3159     if ((val 
= sdsnewlen(NULL
,len
)) == NULL
) goto err
; 
3160     if (fread(c
,clen
,1,fp
) == 0) goto err
; 
3161     if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
; 
3163     return createObject(REDIS_STRING
,val
); 
3170 static robj 
*rdbLoadStringObject(FILE*fp
) { 
3175     len 
= rdbLoadLen(fp
,&isencoded
); 
3178         case REDIS_RDB_ENC_INT8
: 
3179         case REDIS_RDB_ENC_INT16
: 
3180         case REDIS_RDB_ENC_INT32
: 
3181             return tryObjectSharing(rdbLoadIntegerObject(fp
,len
)); 
3182         case REDIS_RDB_ENC_LZF
: 
3183             return tryObjectSharing(rdbLoadLzfStringObject(fp
)); 
3189     if (len 
== REDIS_RDB_LENERR
) return NULL
; 
3190     val 
= sdsnewlen(NULL
,len
); 
3191     if (len 
&& fread(val
,len
,1,fp
) == 0) { 
3195     return tryObjectSharing(createObject(REDIS_STRING
,val
)); 
3198 /* For information about double serialization check rdbSaveDoubleValue() */ 
3199 static int rdbLoadDoubleValue(FILE *fp
, double *val
) { 
3203     if (fread(&len
,1,1,fp
) == 0) return -1; 
3205     case 255: *val 
= R_NegInf
; return 0; 
3206     case 254: *val 
= R_PosInf
; return 0; 
3207     case 253: *val 
= R_Nan
; return 0; 
3209         if (fread(buf
,len
,1,fp
) == 0) return -1; 
3211         sscanf(buf
, "%lg", val
); 
3216 /* Load a Redis object of the specified type from the specified file. 
3217  * On success a newly allocated object is returned, otherwise NULL. */ 
3218 static robj 
*rdbLoadObject(int type
, FILE *fp
) { 
3221     if (type 
== REDIS_STRING
) { 
3222         /* Read string value */ 
3223         if ((o 
= rdbLoadStringObject(fp
)) == NULL
) return NULL
; 
3224         tryObjectEncoding(o
); 
3225     } else if (type 
== REDIS_LIST 
|| type 
== REDIS_SET
) { 
3226         /* Read list/set value */ 
3229         if ((listlen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
3230         o 
= (type 
== REDIS_LIST
) ? createListObject() : createSetObject(); 
3231         /* Load every single element of the list/set */ 
3235             if ((ele 
= rdbLoadStringObject(fp
)) == NULL
) return NULL
; 
3236             tryObjectEncoding(ele
); 
3237             if (type 
== REDIS_LIST
) { 
3238                 listAddNodeTail((list
*)o
->ptr
,ele
); 
3240                 dictAdd((dict
*)o
->ptr
,ele
,NULL
); 
3243     } else if (type 
== REDIS_ZSET
) { 
3244         /* Read list/set value */ 
3248         if ((zsetlen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
3249         o 
= createZsetObject(); 
3251         /* Load every single element of the list/set */ 
3254             double *score 
= zmalloc(sizeof(double)); 
3256             if ((ele 
= rdbLoadStringObject(fp
)) == NULL
) return NULL
; 
3257             tryObjectEncoding(ele
); 
3258             if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
; 
3259             dictAdd(zs
->dict
,ele
,score
); 
3260             zslInsert(zs
->zsl
,*score
,ele
); 
3261             incrRefCount(ele
); /* added to skiplist */ 
3264         redisAssert(0 != 0); 
3269 static int rdbLoad(char *filename
) { 
3271     robj 
*keyobj 
= NULL
; 
3273     int type
, retval
, rdbver
; 
3274     dict 
*d 
= server
.db
[0].dict
; 
3275     redisDb 
*db 
= server
.db
+0; 
3277     time_t expiretime 
= -1, now 
= time(NULL
); 
3278     long long loadedkeys 
= 0; 
3280     fp 
= fopen(filename
,"r"); 
3281     if (!fp
) return REDIS_ERR
; 
3282     if (fread(buf
,9,1,fp
) == 0) goto eoferr
; 
3284     if (memcmp(buf
,"REDIS",5) != 0) { 
3286         redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file"); 
3289     rdbver 
= atoi(buf
+5); 
3292         redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
); 
3299         if ((type 
= rdbLoadType(fp
)) == -1) goto eoferr
; 
3300         if (type 
== REDIS_EXPIRETIME
) { 
3301             if ((expiretime 
= rdbLoadTime(fp
)) == -1) goto eoferr
; 
3302             /* We read the time so we need to read the object type again */ 
3303             if ((type 
= rdbLoadType(fp
)) == -1) goto eoferr
; 
3305         if (type 
== REDIS_EOF
) break; 
3306         /* Handle SELECT DB opcode as a special case */ 
3307         if (type 
== REDIS_SELECTDB
) { 
3308             if ((dbid 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) 
3310             if (dbid 
>= (unsigned)server
.dbnum
) { 
3311                 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
); 
3314             db 
= server
.db
+dbid
; 
3319         if ((keyobj 
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
; 
3321         if ((o 
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
; 
3322         /* Add the new object in the hash table */ 
3323         retval 
= dictAdd(d
,keyobj
,o
); 
3324         if (retval 
== DICT_ERR
) { 
3325             redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj
->ptr
); 
3328         /* Set the expire time if needed */ 
3329         if (expiretime 
!= -1) { 
3330             setExpire(db
,keyobj
,expiretime
); 
3331             /* Delete this key if already expired */ 
3332             if (expiretime 
< now
) deleteKey(db
,keyobj
); 
3336         /* Handle swapping while loading big datasets when VM is on */ 
3338         if (server
.vm_enabled 
&& (loadedkeys 
% 5000) == 0) { 
3339             while (zmalloc_used_memory() > server
.vm_max_memory
) { 
3340                 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break; 
3347 eoferr
: /* unexpected end of file is handled here with a fatal exit */ 
3348     if (keyobj
) decrRefCount(keyobj
); 
3349     redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now."); 
3351     return REDIS_ERR
; /* Just to avoid warning */ 
3354 /*================================== Commands =============================== */ 
3356 static void authCommand(redisClient 
*c
) { 
3357     if (!server
.requirepass 
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) { 
3358       c
->authenticated 
= 1; 
3359       addReply(c
,shared
.ok
); 
3361       c
->authenticated 
= 0; 
3362       addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n")); 
3366 static void pingCommand(redisClient 
*c
) { 
3367     addReply(c
,shared
.pong
); 
3370 static void echoCommand(redisClient 
*c
) { 
3371     addReplyBulkLen(c
,c
->argv
[1]); 
3372     addReply(c
,c
->argv
[1]); 
3373     addReply(c
,shared
.crlf
); 
3376 /*=================================== Strings =============================== */ 
3378 static void setGenericCommand(redisClient 
*c
, int nx
) { 
3381     if (nx
) deleteIfVolatile(c
->db
,c
->argv
[1]); 
3382     retval 
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]); 
3383     if (retval 
== DICT_ERR
) { 
3385             /* If the key is about a swapped value, we want a new key object 
3386              * to overwrite the old. So we delete the old key in the database. 
3387              * This will also make sure that swap pages about the old object 
3388              * will be marked as free. */ 
3389             if (deleteIfSwapped(c
->db
,c
->argv
[1])) 
3390                 incrRefCount(c
->argv
[1]); 
3391             dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]); 
3392             incrRefCount(c
->argv
[2]); 
3394             addReply(c
,shared
.czero
); 
3398         incrRefCount(c
->argv
[1]); 
3399         incrRefCount(c
->argv
[2]); 
3402     removeExpire(c
->db
,c
->argv
[1]); 
3403     addReply(c
, nx 
? shared
.cone 
: shared
.ok
); 
3406 static void setCommand(redisClient 
*c
) { 
3407     setGenericCommand(c
,0); 
3410 static void setnxCommand(redisClient 
*c
) { 
3411     setGenericCommand(c
,1); 
3414 static int getGenericCommand(redisClient 
*c
) { 
3415     robj 
*o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
3418         addReply(c
,shared
.nullbulk
); 
3421         if (o
->type 
!= REDIS_STRING
) { 
3422             addReply(c
,shared
.wrongtypeerr
); 
3425             addReplyBulkLen(c
,o
); 
3427             addReply(c
,shared
.crlf
); 
3433 static void getCommand(redisClient 
*c
) { 
3434     getGenericCommand(c
); 
3437 static void getsetCommand(redisClient 
*c
) { 
3438     if (getGenericCommand(c
) == REDIS_ERR
) return; 
3439     if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) { 
3440         dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]); 
3442         incrRefCount(c
->argv
[1]); 
3444     incrRefCount(c
->argv
[2]); 
3446     removeExpire(c
->db
,c
->argv
[1]); 
3449 static void mgetCommand(redisClient 
*c
) { 
3452     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1)); 
3453     for (j 
= 1; j 
< c
->argc
; j
++) { 
3454         robj 
*o 
= lookupKeyRead(c
->db
,c
->argv
[j
]); 
3456             addReply(c
,shared
.nullbulk
); 
3458             if (o
->type 
!= REDIS_STRING
) { 
3459                 addReply(c
,shared
.nullbulk
); 
3461                 addReplyBulkLen(c
,o
); 
3463                 addReply(c
,shared
.crlf
); 
3469 static void msetGenericCommand(redisClient 
*c
, int nx
) { 
3470     int j
, busykeys 
= 0; 
3472     if ((c
->argc 
% 2) == 0) { 
3473         addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n")); 
3476     /* Handle the NX flag. The MSETNX semantic is to return zero and don't 
3477      * set nothing at all if at least one already key exists. */ 
3479         for (j 
= 1; j 
< c
->argc
; j 
+= 2) { 
3480             if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) { 
3486         addReply(c
, shared
.czero
); 
3490     for (j 
= 1; j 
< c
->argc
; j 
+= 2) { 
3493         tryObjectEncoding(c
->argv
[j
+1]); 
3494         retval 
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]); 
3495         if (retval 
== DICT_ERR
) { 
3496             dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]); 
3497             incrRefCount(c
->argv
[j
+1]); 
3499             incrRefCount(c
->argv
[j
]); 
3500             incrRefCount(c
->argv
[j
+1]); 
3502         removeExpire(c
->db
,c
->argv
[j
]); 
3504     server
.dirty 
+= (c
->argc
-1)/2; 
3505     addReply(c
, nx 
? shared
.cone 
: shared
.ok
); 
3508 static void msetCommand(redisClient 
*c
) { 
3509     msetGenericCommand(c
,0); 
3512 static void msetnxCommand(redisClient 
*c
) { 
3513     msetGenericCommand(c
,1); 
3516 static void incrDecrCommand(redisClient 
*c
, long long incr
) { 
3521     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
3525         if (o
->type 
!= REDIS_STRING
) { 
3530             if (o
->encoding 
== REDIS_ENCODING_RAW
) 
3531                 value 
= strtoll(o
->ptr
, &eptr
, 10); 
3532             else if (o
->encoding 
== REDIS_ENCODING_INT
) 
3533                 value 
= (long)o
->ptr
; 
3535                 redisAssert(1 != 1); 
3540     o 
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
)); 
3541     tryObjectEncoding(o
); 
3542     retval 
= dictAdd(c
->db
->dict
,c
->argv
[1],o
); 
3543     if (retval 
== DICT_ERR
) { 
3544         dictReplace(c
->db
->dict
,c
->argv
[1],o
); 
3545         removeExpire(c
->db
,c
->argv
[1]); 
3547         incrRefCount(c
->argv
[1]); 
3550     addReply(c
,shared
.colon
); 
3552     addReply(c
,shared
.crlf
); 
3555 static void incrCommand(redisClient 
*c
) { 
3556     incrDecrCommand(c
,1); 
3559 static void decrCommand(redisClient 
*c
) { 
3560     incrDecrCommand(c
,-1); 
3563 static void incrbyCommand(redisClient 
*c
) { 
3564     long long incr 
= strtoll(c
->argv
[2]->ptr
, NULL
, 10); 
3565     incrDecrCommand(c
,incr
); 
3568 static void decrbyCommand(redisClient 
*c
) { 
3569     long long incr 
= strtoll(c
->argv
[2]->ptr
, NULL
, 10); 
3570     incrDecrCommand(c
,-incr
); 
3573 /* ========================= Type agnostic commands ========================= */ 
3575 static void delCommand(redisClient 
*c
) { 
3578     for (j 
= 1; j 
< c
->argc
; j
++) { 
3579         if (deleteKey(c
->db
,c
->argv
[j
])) { 
3586         addReply(c
,shared
.czero
); 
3589         addReply(c
,shared
.cone
); 
3592         addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",deleted
)); 
3597 static void existsCommand(redisClient 
*c
) { 
3598     addReply(c
,lookupKeyRead(c
->db
,c
->argv
[1]) ? shared
.cone 
: shared
.czero
); 
3601 static void selectCommand(redisClient 
*c
) { 
3602     int id 
= atoi(c
->argv
[1]->ptr
); 
3604     if (selectDb(c
,id
) == REDIS_ERR
) { 
3605         addReplySds(c
,sdsnew("-ERR invalid DB index\r\n")); 
3607         addReply(c
,shared
.ok
); 
3611 static void randomkeyCommand(redisClient 
*c
) { 
3615         de 
= dictGetRandomKey(c
->db
->dict
); 
3616         if (!de 
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break; 
3619         addReply(c
,shared
.plus
); 
3620         addReply(c
,shared
.crlf
); 
3622         addReply(c
,shared
.plus
); 
3623         addReply(c
,dictGetEntryKey(de
)); 
3624         addReply(c
,shared
.crlf
); 
3628 static void keysCommand(redisClient 
*c
) { 
3631     sds pattern 
= c
->argv
[1]->ptr
; 
3632     int plen 
= sdslen(pattern
); 
3633     unsigned long numkeys 
= 0, keyslen 
= 0; 
3634     robj 
*lenobj 
= createObject(REDIS_STRING
,NULL
); 
3636     di 
= dictGetIterator(c
->db
->dict
); 
3638     decrRefCount(lenobj
); 
3639     while((de 
= dictNext(di
)) != NULL
) { 
3640         robj 
*keyobj 
= dictGetEntryKey(de
); 
3642         sds key 
= keyobj
->ptr
; 
3643         if ((pattern
[0] == '*' && pattern
[1] == '\0') || 
3644             stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) { 
3645             if (expireIfNeeded(c
->db
,keyobj
) == 0) { 
3647                     addReply(c
,shared
.space
); 
3650                 keyslen 
+= sdslen(key
); 
3654     dictReleaseIterator(di
); 
3655     lenobj
->ptr 
= sdscatprintf(sdsempty(),"$%lu\r\n",keyslen
+(numkeys 
? (numkeys
-1) : 0)); 
3656     addReply(c
,shared
.crlf
); 
3659 static void dbsizeCommand(redisClient 
*c
) { 
3661         sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
))); 
3664 static void lastsaveCommand(redisClient 
*c
) { 
3666         sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
)); 
3669 static void typeCommand(redisClient 
*c
) { 
3673     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
3678         case REDIS_STRING
: type 
= "+string"; break; 
3679         case REDIS_LIST
: type 
= "+list"; break; 
3680         case REDIS_SET
: type 
= "+set"; break; 
3681         case REDIS_ZSET
: type 
= "+zset"; break; 
3682         default: type 
= "unknown"; break; 
3685     addReplySds(c
,sdsnew(type
)); 
3686     addReply(c
,shared
.crlf
); 
3689 static void saveCommand(redisClient 
*c
) { 
3690     if (server
.bgsavechildpid 
!= -1) { 
3691         addReplySds(c
,sdsnew("-ERR background save in progress\r\n")); 
3694     if (rdbSave(server
.dbfilename
) == REDIS_OK
) { 
3695         addReply(c
,shared
.ok
); 
3697         addReply(c
,shared
.err
); 
3701 static void bgsaveCommand(redisClient 
*c
) { 
3702     if (server
.bgsavechildpid 
!= -1) { 
3703         addReplySds(c
,sdsnew("-ERR background save already in progress\r\n")); 
3706     if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) { 
3707         char *status 
= "+Background saving started\r\n"; 
3708         addReplySds(c
,sdsnew(status
)); 
3710         addReply(c
,shared
.err
); 
3714 static void shutdownCommand(redisClient 
*c
) { 
3715     redisLog(REDIS_WARNING
,"User requested shutdown, saving DB..."); 
3716     /* Kill the saving child if there is a background saving in progress. 
3717        We want to avoid race conditions, for instance our saving child may 
3718        overwrite the synchronous saving did by SHUTDOWN. */ 
3719     if (server
.bgsavechildpid 
!= -1) { 
3720         redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!"); 
3721         kill(server
.bgsavechildpid
,SIGKILL
); 
3722         rdbRemoveTempFile(server
.bgsavechildpid
); 
3724     if (server
.appendonly
) { 
3725         /* Append only file: fsync() the AOF and exit */ 
3726         fsync(server
.appendfd
); 
3729         /* Snapshotting. Perform a SYNC SAVE and exit */ 
3730         if (rdbSave(server
.dbfilename
) == REDIS_OK
) { 
3731             if (server
.daemonize
) 
3732                 unlink(server
.pidfile
); 
3733             redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory()); 
3734             redisLog(REDIS_WARNING
,"Server exit now, bye bye..."); 
3737             /* Ooops.. error saving! The best we can do is to continue operating. 
3738              * Note that if there was a background saving process, in the next 
3739              * cron() Redis will be notified that the background saving aborted, 
3740              * handling special stuff like slaves pending for synchronization... */ 
3741             redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");  
3742             addReplySds(c
,sdsnew("-ERR can't quit, problems saving the DB\r\n")); 
3747 static void renameGenericCommand(redisClient 
*c
, int nx
) { 
3750     /* To use the same key as src and dst is probably an error */ 
3751     if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) { 
3752         addReply(c
,shared
.sameobjecterr
); 
3756     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
3758         addReply(c
,shared
.nokeyerr
); 
3762     deleteIfVolatile(c
->db
,c
->argv
[2]); 
3763     if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) { 
3766             addReply(c
,shared
.czero
); 
3769         dictReplace(c
->db
->dict
,c
->argv
[2],o
); 
3771         incrRefCount(c
->argv
[2]); 
3773     deleteKey(c
->db
,c
->argv
[1]); 
3775     addReply(c
,nx 
? shared
.cone 
: shared
.ok
); 
3778 static void renameCommand(redisClient 
*c
) { 
3779     renameGenericCommand(c
,0); 
3782 static void renamenxCommand(redisClient 
*c
) { 
3783     renameGenericCommand(c
,1); 
3786 static void moveCommand(redisClient 
*c
) { 
3791     /* Obtain source and target DB pointers */ 
3794     if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) { 
3795         addReply(c
,shared
.outofrangeerr
); 
3799     selectDb(c
,srcid
); /* Back to the source DB */ 
3801     /* If the user is moving using as target the same 
3802      * DB as the source DB it is probably an error. */ 
3804         addReply(c
,shared
.sameobjecterr
); 
3808     /* Check if the element exists and get a reference */ 
3809     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
3811         addReply(c
,shared
.czero
); 
3815     /* Try to add the element to the target DB */ 
3816     deleteIfVolatile(dst
,c
->argv
[1]); 
3817     if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) { 
3818         addReply(c
,shared
.czero
); 
3821     incrRefCount(c
->argv
[1]); 
3824     /* OK! key moved, free the entry in the source DB */ 
3825     deleteKey(src
,c
->argv
[1]); 
3827     addReply(c
,shared
.cone
); 
3830 /* =================================== Lists ================================ */ 
3831 static void pushGenericCommand(redisClient 
*c
, int where
) { 
3835     lobj 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
3837         if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) { 
3838             addReply(c
,shared
.ok
); 
3841         lobj 
= createListObject(); 
3843         if (where 
== REDIS_HEAD
) { 
3844             listAddNodeHead(list
,c
->argv
[2]); 
3846             listAddNodeTail(list
,c
->argv
[2]); 
3848         dictAdd(c
->db
->dict
,c
->argv
[1],lobj
); 
3849         incrRefCount(c
->argv
[1]); 
3850         incrRefCount(c
->argv
[2]); 
3852         if (lobj
->type 
!= REDIS_LIST
) { 
3853             addReply(c
,shared
.wrongtypeerr
); 
3856         if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) { 
3857             addReply(c
,shared
.ok
); 
3861         if (where 
== REDIS_HEAD
) { 
3862             listAddNodeHead(list
,c
->argv
[2]); 
3864             listAddNodeTail(list
,c
->argv
[2]); 
3866         incrRefCount(c
->argv
[2]); 
3869     addReply(c
,shared
.ok
); 
3872 static void lpushCommand(redisClient 
*c
) { 
3873     pushGenericCommand(c
,REDIS_HEAD
); 
3876 static void rpushCommand(redisClient 
*c
) { 
3877     pushGenericCommand(c
,REDIS_TAIL
); 
3880 static void llenCommand(redisClient 
*c
) { 
3884     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
3886         addReply(c
,shared
.czero
); 
3889         if (o
->type 
!= REDIS_LIST
) { 
3890             addReply(c
,shared
.wrongtypeerr
); 
3893             addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",listLength(l
))); 
3898 static void lindexCommand(redisClient 
*c
) { 
3900     int index 
= atoi(c
->argv
[2]->ptr
); 
3902     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
3904         addReply(c
,shared
.nullbulk
); 
3906         if (o
->type 
!= REDIS_LIST
) { 
3907             addReply(c
,shared
.wrongtypeerr
); 
3909             list 
*list 
= o
->ptr
; 
3912             ln 
= listIndex(list
, index
); 
3914                 addReply(c
,shared
.nullbulk
); 
3916                 robj 
*ele 
= listNodeValue(ln
); 
3917                 addReplyBulkLen(c
,ele
); 
3919                 addReply(c
,shared
.crlf
); 
3925 static void lsetCommand(redisClient 
*c
) { 
3927     int index 
= atoi(c
->argv
[2]->ptr
); 
3929     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
3931         addReply(c
,shared
.nokeyerr
); 
3933         if (o
->type 
!= REDIS_LIST
) { 
3934             addReply(c
,shared
.wrongtypeerr
); 
3936             list 
*list 
= o
->ptr
; 
3939             ln 
= listIndex(list
, index
); 
3941                 addReply(c
,shared
.outofrangeerr
); 
3943                 robj 
*ele 
= listNodeValue(ln
); 
3946                 listNodeValue(ln
) = c
->argv
[3]; 
3947                 incrRefCount(c
->argv
[3]); 
3948                 addReply(c
,shared
.ok
); 
3955 static void popGenericCommand(redisClient 
*c
, int where
) { 
3958     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
3960         addReply(c
,shared
.nullbulk
); 
3962         if (o
->type 
!= REDIS_LIST
) { 
3963             addReply(c
,shared
.wrongtypeerr
); 
3965             list 
*list 
= o
->ptr
; 
3968             if (where 
== REDIS_HEAD
) 
3969                 ln 
= listFirst(list
); 
3971                 ln 
= listLast(list
); 
3974                 addReply(c
,shared
.nullbulk
); 
3976                 robj 
*ele 
= listNodeValue(ln
); 
3977                 addReplyBulkLen(c
,ele
); 
3979                 addReply(c
,shared
.crlf
); 
3980                 listDelNode(list
,ln
); 
3987 static void lpopCommand(redisClient 
*c
) { 
3988     popGenericCommand(c
,REDIS_HEAD
); 
3991 static void rpopCommand(redisClient 
*c
) { 
3992     popGenericCommand(c
,REDIS_TAIL
); 
3995 static void lrangeCommand(redisClient 
*c
) { 
3997     int start 
= atoi(c
->argv
[2]->ptr
); 
3998     int end 
= atoi(c
->argv
[3]->ptr
); 
4000     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
4002         addReply(c
,shared
.nullmultibulk
); 
4004         if (o
->type 
!= REDIS_LIST
) { 
4005             addReply(c
,shared
.wrongtypeerr
); 
4007             list 
*list 
= o
->ptr
; 
4009             int llen 
= listLength(list
); 
4013             /* convert negative indexes */ 
4014             if (start 
< 0) start 
= llen
+start
; 
4015             if (end 
< 0) end 
= llen
+end
; 
4016             if (start 
< 0) start 
= 0; 
4017             if (end 
< 0) end 
= 0; 
4019             /* indexes sanity checks */ 
4020             if (start 
> end 
|| start 
>= llen
) { 
4021                 /* Out of range start or start > end result in empty list */ 
4022                 addReply(c
,shared
.emptymultibulk
); 
4025             if (end 
>= llen
) end 
= llen
-1; 
4026             rangelen 
= (end
-start
)+1; 
4028             /* Return the result in form of a multi-bulk reply */ 
4029             ln 
= listIndex(list
, start
); 
4030             addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
)); 
4031             for (j 
= 0; j 
< rangelen
; j
++) { 
4032                 ele 
= listNodeValue(ln
); 
4033                 addReplyBulkLen(c
,ele
); 
4035                 addReply(c
,shared
.crlf
); 
4042 static void ltrimCommand(redisClient 
*c
) { 
4044     int start 
= atoi(c
->argv
[2]->ptr
); 
4045     int end 
= atoi(c
->argv
[3]->ptr
); 
4047     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4049         addReply(c
,shared
.ok
); 
4051         if (o
->type 
!= REDIS_LIST
) { 
4052             addReply(c
,shared
.wrongtypeerr
); 
4054             list 
*list 
= o
->ptr
; 
4056             int llen 
= listLength(list
); 
4057             int j
, ltrim
, rtrim
; 
4059             /* convert negative indexes */ 
4060             if (start 
< 0) start 
= llen
+start
; 
4061             if (end 
< 0) end 
= llen
+end
; 
4062             if (start 
< 0) start 
= 0; 
4063             if (end 
< 0) end 
= 0; 
4065             /* indexes sanity checks */ 
4066             if (start 
> end 
|| start 
>= llen
) { 
4067                 /* Out of range start or start > end result in empty list */ 
4071                 if (end 
>= llen
) end 
= llen
-1; 
4076             /* Remove list elements to perform the trim */ 
4077             for (j 
= 0; j 
< ltrim
; j
++) { 
4078                 ln 
= listFirst(list
); 
4079                 listDelNode(list
,ln
); 
4081             for (j 
= 0; j 
< rtrim
; j
++) { 
4082                 ln 
= listLast(list
); 
4083                 listDelNode(list
,ln
); 
4086             addReply(c
,shared
.ok
); 
4091 static void lremCommand(redisClient 
*c
) { 
4094     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4096         addReply(c
,shared
.czero
); 
4098         if (o
->type 
!= REDIS_LIST
) { 
4099             addReply(c
,shared
.wrongtypeerr
); 
4101             list 
*list 
= o
->ptr
; 
4102             listNode 
*ln
, *next
; 
4103             int toremove 
= atoi(c
->argv
[2]->ptr
); 
4108                 toremove 
= -toremove
; 
4111             ln 
= fromtail 
? list
->tail 
: list
->head
; 
4113                 robj 
*ele 
= listNodeValue(ln
); 
4115                 next 
= fromtail 
? ln
->prev 
: ln
->next
; 
4116                 if (compareStringObjects(ele
,c
->argv
[3]) == 0) { 
4117                     listDelNode(list
,ln
); 
4120                     if (toremove 
&& removed 
== toremove
) break; 
4124             addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
)); 
4129 /* This is the semantic of this command: 
4130  *  RPOPLPUSH srclist dstlist: 
4131  *   IF LLEN(srclist) > 0 
4132  *     element = RPOP srclist 
4133  *     LPUSH dstlist element 
4140  * The idea is to be able to get an element from a list in a reliable way 
4141  * since the element is not just returned but pushed against another list 
4142  * as well. This command was originally proposed by Ezra Zygmuntowicz. 
4144 static void rpoplpushcommand(redisClient 
*c
) { 
4147     sobj 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4149         addReply(c
,shared
.nullbulk
); 
4151         if (sobj
->type 
!= REDIS_LIST
) { 
4152             addReply(c
,shared
.wrongtypeerr
); 
4154             list 
*srclist 
= sobj
->ptr
; 
4155             listNode 
*ln 
= listLast(srclist
); 
4158                 addReply(c
,shared
.nullbulk
); 
4160                 robj 
*dobj 
= lookupKeyWrite(c
->db
,c
->argv
[2]); 
4161                 robj 
*ele 
= listNodeValue(ln
); 
4164                 if (dobj 
&& dobj
->type 
!= REDIS_LIST
) { 
4165                     addReply(c
,shared
.wrongtypeerr
); 
4169                 /* Add the element to the target list (unless it's directly 
4170                  * passed to some BLPOP-ing client */ 
4171                 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) { 
4173                         /* Create the list if the key does not exist */ 
4174                         dobj 
= createListObject(); 
4175                         dictAdd(c
->db
->dict
,c
->argv
[2],dobj
); 
4176                         incrRefCount(c
->argv
[2]); 
4178                     dstlist 
= dobj
->ptr
; 
4179                     listAddNodeHead(dstlist
,ele
); 
4183                 /* Send the element to the client as reply as well */ 
4184                 addReplyBulkLen(c
,ele
); 
4186                 addReply(c
,shared
.crlf
); 
4188                 /* Finally remove the element from the source list */ 
4189                 listDelNode(srclist
,ln
); 
4197 /* ==================================== Sets ================================ */ 
4199 static void saddCommand(redisClient 
*c
) { 
4202     set 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4204         set 
= createSetObject(); 
4205         dictAdd(c
->db
->dict
,c
->argv
[1],set
); 
4206         incrRefCount(c
->argv
[1]); 
4208         if (set
->type 
!= REDIS_SET
) { 
4209             addReply(c
,shared
.wrongtypeerr
); 
4213     if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) { 
4214         incrRefCount(c
->argv
[2]); 
4216         addReply(c
,shared
.cone
); 
4218         addReply(c
,shared
.czero
); 
4222 static void sremCommand(redisClient 
*c
) { 
4225     set 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4227         addReply(c
,shared
.czero
); 
4229         if (set
->type 
!= REDIS_SET
) { 
4230             addReply(c
,shared
.wrongtypeerr
); 
4233         if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) { 
4235             if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
); 
4236             addReply(c
,shared
.cone
); 
4238             addReply(c
,shared
.czero
); 
4243 static void smoveCommand(redisClient 
*c
) { 
4244     robj 
*srcset
, *dstset
; 
4246     srcset 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4247     dstset 
= lookupKeyWrite(c
->db
,c
->argv
[2]); 
4249     /* If the source key does not exist return 0, if it's of the wrong type 
4251     if (srcset 
== NULL 
|| srcset
->type 
!= REDIS_SET
) { 
4252         addReply(c
, srcset 
? shared
.wrongtypeerr 
: shared
.czero
); 
4255     /* Error if the destination key is not a set as well */ 
4256     if (dstset 
&& dstset
->type 
!= REDIS_SET
) { 
4257         addReply(c
,shared
.wrongtypeerr
); 
4260     /* Remove the element from the source set */ 
4261     if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) { 
4262         /* Key not found in the src set! return zero */ 
4263         addReply(c
,shared
.czero
); 
4267     /* Add the element to the destination set */ 
4269         dstset 
= createSetObject(); 
4270         dictAdd(c
->db
->dict
,c
->argv
[2],dstset
); 
4271         incrRefCount(c
->argv
[2]); 
4273     if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
) 
4274         incrRefCount(c
->argv
[3]); 
4275     addReply(c
,shared
.cone
); 
4278 static void sismemberCommand(redisClient 
*c
) { 
4281     set 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
4283         addReply(c
,shared
.czero
); 
4285         if (set
->type 
!= REDIS_SET
) { 
4286             addReply(c
,shared
.wrongtypeerr
); 
4289         if (dictFind(set
->ptr
,c
->argv
[2])) 
4290             addReply(c
,shared
.cone
); 
4292             addReply(c
,shared
.czero
); 
4296 static void scardCommand(redisClient 
*c
) { 
4300     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
4302         addReply(c
,shared
.czero
); 
4305         if (o
->type 
!= REDIS_SET
) { 
4306             addReply(c
,shared
.wrongtypeerr
); 
4309             addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n", 
4315 static void spopCommand(redisClient 
*c
) { 
4319     set 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4321         addReply(c
,shared
.nullbulk
); 
4323         if (set
->type 
!= REDIS_SET
) { 
4324             addReply(c
,shared
.wrongtypeerr
); 
4327         de 
= dictGetRandomKey(set
->ptr
); 
4329             addReply(c
,shared
.nullbulk
); 
4331             robj 
*ele 
= dictGetEntryKey(de
); 
4333             addReplyBulkLen(c
,ele
); 
4335             addReply(c
,shared
.crlf
); 
4336             dictDelete(set
->ptr
,ele
); 
4337             if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
); 
4343 static void srandmemberCommand(redisClient 
*c
) { 
4347     set 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
4349         addReply(c
,shared
.nullbulk
); 
4351         if (set
->type 
!= REDIS_SET
) { 
4352             addReply(c
,shared
.wrongtypeerr
); 
4355         de 
= dictGetRandomKey(set
->ptr
); 
4357             addReply(c
,shared
.nullbulk
); 
4359             robj 
*ele 
= dictGetEntryKey(de
); 
4361             addReplyBulkLen(c
,ele
); 
4363             addReply(c
,shared
.crlf
); 
4368 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) { 
4369     dict 
**d1 
= (void*) s1
, **d2 
= (void*) s2
; 
4371     return dictSize(*d1
)-dictSize(*d2
); 
4374 static void sinterGenericCommand(redisClient 
*c
, robj 
**setskeys
, unsigned long setsnum
, robj 
*dstkey
) { 
4375     dict 
**dv 
= zmalloc(sizeof(dict
*)*setsnum
); 
4378     robj 
*lenobj 
= NULL
, *dstset 
= NULL
; 
4379     unsigned long j
, cardinality 
= 0; 
4381     for (j 
= 0; j 
< setsnum
; j
++) { 
4385                     lookupKeyWrite(c
->db
,setskeys
[j
]) : 
4386                     lookupKeyRead(c
->db
,setskeys
[j
]); 
4390                 if (deleteKey(c
->db
,dstkey
)) 
4392                 addReply(c
,shared
.czero
); 
4394                 addReply(c
,shared
.nullmultibulk
); 
4398         if (setobj
->type 
!= REDIS_SET
) { 
4400             addReply(c
,shared
.wrongtypeerr
); 
4403         dv
[j
] = setobj
->ptr
; 
4405     /* Sort sets from the smallest to largest, this will improve our 
4406      * algorithm's performace */ 
4407     qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
); 
4409     /* The first thing we should output is the total number of elements... 
4410      * since this is a multi-bulk write, but at this stage we don't know 
4411      * the intersection set size, so we use a trick, append an empty object 
4412      * to the output list and save the pointer to later modify it with the 
4415         lenobj 
= createObject(REDIS_STRING
,NULL
); 
4417         decrRefCount(lenobj
); 
4419         /* If we have a target key where to store the resulting set 
4420          * create this key with an empty set inside */ 
4421         dstset 
= createSetObject(); 
4424     /* Iterate all the elements of the first (smallest) set, and test 
4425      * the element against all the other sets, if at least one set does 
4426      * not include the element it is discarded */ 
4427     di 
= dictGetIterator(dv
[0]); 
4429     while((de 
= dictNext(di
)) != NULL
) { 
4432         for (j 
= 1; j 
< setsnum
; j
++) 
4433             if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break; 
4435             continue; /* at least one set does not contain the member */ 
4436         ele 
= dictGetEntryKey(de
); 
4438             addReplyBulkLen(c
,ele
); 
4440             addReply(c
,shared
.crlf
); 
4443             dictAdd(dstset
->ptr
,ele
,NULL
); 
4447     dictReleaseIterator(di
); 
4450         /* Store the resulting set into the target */ 
4451         deleteKey(c
->db
,dstkey
); 
4452         dictAdd(c
->db
->dict
,dstkey
,dstset
); 
4453         incrRefCount(dstkey
); 
4457         lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
); 
4459         addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n", 
4460             dictSize((dict
*)dstset
->ptr
))); 
4466 static void sinterCommand(redisClient 
*c
) { 
4467     sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
); 
4470 static void sinterstoreCommand(redisClient 
*c
) { 
4471     sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]); 
4474 #define REDIS_OP_UNION 0 
4475 #define REDIS_OP_DIFF 1 
4477 static void sunionDiffGenericCommand(redisClient 
*c
, robj 
**setskeys
, int setsnum
, robj 
*dstkey
, int op
) { 
4478     dict 
**dv 
= zmalloc(sizeof(dict
*)*setsnum
); 
4481     robj 
*dstset 
= NULL
; 
4482     int j
, cardinality 
= 0; 
4484     for (j 
= 0; j 
< setsnum
; j
++) { 
4488                     lookupKeyWrite(c
->db
,setskeys
[j
]) : 
4489                     lookupKeyRead(c
->db
,setskeys
[j
]); 
4494         if (setobj
->type 
!= REDIS_SET
) { 
4496             addReply(c
,shared
.wrongtypeerr
); 
4499         dv
[j
] = setobj
->ptr
; 
4502     /* We need a temp set object to store our union. If the dstkey 
4503      * is not NULL (that is, we are inside an SUNIONSTORE operation) then 
4504      * this set object will be the resulting object to set into the target key*/ 
4505     dstset 
= createSetObject(); 
4507     /* Iterate all the elements of all the sets, add every element a single 
4508      * time to the result set */ 
4509     for (j 
= 0; j 
< setsnum
; j
++) { 
4510         if (op 
== REDIS_OP_DIFF 
&& j 
== 0 && !dv
[j
]) break; /* result set is empty */ 
4511         if (!dv
[j
]) continue; /* non existing keys are like empty sets */ 
4513         di 
= dictGetIterator(dv
[j
]); 
4515         while((de 
= dictNext(di
)) != NULL
) { 
4518             /* dictAdd will not add the same element multiple times */ 
4519             ele 
= dictGetEntryKey(de
); 
4520             if (op 
== REDIS_OP_UNION 
|| j 
== 0) { 
4521                 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) { 
4525             } else if (op 
== REDIS_OP_DIFF
) { 
4526                 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) { 
4531         dictReleaseIterator(di
); 
4533         if (op 
== REDIS_OP_DIFF 
&& cardinality 
== 0) break; /* result set is empty */ 
4536     /* Output the content of the resulting set, if not in STORE mode */ 
4538         addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
)); 
4539         di 
= dictGetIterator(dstset
->ptr
); 
4540         while((de 
= dictNext(di
)) != NULL
) { 
4543             ele 
= dictGetEntryKey(de
); 
4544             addReplyBulkLen(c
,ele
); 
4546             addReply(c
,shared
.crlf
); 
4548         dictReleaseIterator(di
); 
4550         /* If we have a target key where to store the resulting set 
4551          * create this key with the result set inside */ 
4552         deleteKey(c
->db
,dstkey
); 
4553         dictAdd(c
->db
->dict
,dstkey
,dstset
); 
4554         incrRefCount(dstkey
); 
4559         decrRefCount(dstset
); 
4561         addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n", 
4562             dictSize((dict
*)dstset
->ptr
))); 
4568 static void sunionCommand(redisClient 
*c
) { 
4569     sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
); 
4572 static void sunionstoreCommand(redisClient 
*c
) { 
4573     sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
); 
4576 static void sdiffCommand(redisClient 
*c
) { 
4577     sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
); 
4580 static void sdiffstoreCommand(redisClient 
*c
) { 
4581     sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
); 
4584 /* ==================================== ZSets =============================== */ 
4586 /* ZSETs are ordered sets using two data structures to hold the same elements 
4587  * in order to get O(log(N)) INSERT and REMOVE operations into a sorted 
4590  * The elements are added to an hash table mapping Redis objects to scores. 
4591  * At the same time the elements are added to a skip list mapping scores 
4592  * to Redis objects (so objects are sorted by scores in this "view"). */ 
4594 /* This skiplist implementation is almost a C translation of the original 
4595  * algorithm described by William Pugh in "Skip Lists: A Probabilistic 
4596  * Alternative to Balanced Trees", modified in three ways: 
4597  * a) this implementation allows for repeated values. 
4598  * b) the comparison is not just by key (our 'score') but by satellite data. 
4599  * c) there is a back pointer, so it's a doubly linked list with the back 
4600  * pointers being only at "level 1". This allows to traverse the list 
4601  * from tail to head, useful for ZREVRANGE. */ 
4603 static zskiplistNode 
*zslCreateNode(int level
, double score
, robj 
*obj
) { 
4604     zskiplistNode 
*zn 
= zmalloc(sizeof(*zn
)); 
4606     zn
->forward 
= zmalloc(sizeof(zskiplistNode
*) * level
); 
4612 static zskiplist 
*zslCreate(void) { 
4616     zsl 
= zmalloc(sizeof(*zsl
)); 
4619     zsl
->header 
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
); 
4620     for (j 
= 0; j 
< ZSKIPLIST_MAXLEVEL
; j
++) 
4621         zsl
->header
->forward
[j
] = NULL
; 
4622     zsl
->header
->backward 
= NULL
; 
4627 static void zslFreeNode(zskiplistNode 
*node
) { 
4628     decrRefCount(node
->obj
); 
4629     zfree(node
->forward
); 
4633 static void zslFree(zskiplist 
*zsl
) { 
4634     zskiplistNode 
*node 
= zsl
->header
->forward
[0], *next
; 
4636     zfree(zsl
->header
->forward
); 
4639         next 
= node
->forward
[0]; 
4646 static int zslRandomLevel(void) { 
4648     while ((random()&0xFFFF) < (ZSKIPLIST_P 
* 0xFFFF)) 
4653 static void zslInsert(zskiplist 
*zsl
, double score
, robj 
*obj
) { 
4654     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
4658     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
4659         while (x
->forward
[i
] && 
4660             (x
->forward
[i
]->score 
< score 
|| 
4661                 (x
->forward
[i
]->score 
== score 
&& 
4662                 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) 
4666     /* we assume the key is not already inside, since we allow duplicated 
4667      * scores, and the re-insertion of score and redis object should never 
4668      * happpen since the caller of zslInsert() should test in the hash table 
4669      * if the element is already inside or not. */ 
4670     level 
= zslRandomLevel(); 
4671     if (level 
> zsl
->level
) { 
4672         for (i 
= zsl
->level
; i 
< level
; i
++) 
4673             update
[i
] = zsl
->header
; 
4676     x 
= zslCreateNode(level
,score
,obj
); 
4677     for (i 
= 0; i 
< level
; i
++) { 
4678         x
->forward
[i
] = update
[i
]->forward
[i
]; 
4679         update
[i
]->forward
[i
] = x
; 
4681     x
->backward 
= (update
[0] == zsl
->header
) ? NULL 
: update
[0]; 
4683         x
->forward
[0]->backward 
= x
; 
4689 /* Delete an element with matching score/object from the skiplist. */ 
4690 static int zslDelete(zskiplist 
*zsl
, double score
, robj 
*obj
) { 
4691     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
4695     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
4696         while (x
->forward
[i
] && 
4697             (x
->forward
[i
]->score 
< score 
|| 
4698                 (x
->forward
[i
]->score 
== score 
&& 
4699                 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) 
4703     /* We may have multiple elements with the same score, what we need 
4704      * is to find the element with both the right score and object. */ 
4706     if (x 
&& score 
== x
->score 
&& compareStringObjects(x
->obj
,obj
) == 0) { 
4707         for (i 
= 0; i 
< zsl
->level
; i
++) { 
4708             if (update
[i
]->forward
[i
] != x
) break; 
4709             update
[i
]->forward
[i
] = x
->forward
[i
]; 
4711         if (x
->forward
[0]) { 
4712             x
->forward
[0]->backward 
= (x
->backward 
== zsl
->header
) ? 
4715             zsl
->tail 
= x
->backward
; 
4718         while(zsl
->level 
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
) 
4723         return 0; /* not found */ 
4725     return 0; /* not found */ 
4728 /* Delete all the elements with score between min and max from the skiplist. 
4729  * Min and mx are inclusive, so a score >= min || score <= max is deleted. 
4730  * Note that this function takes the reference to the hash table view of the 
4731  * sorted set, in order to remove the elements from the hash table too. */ 
4732 static unsigned long zslDeleteRange(zskiplist 
*zsl
, double min
, double max
, dict 
*dict
) { 
4733     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
4734     unsigned long removed 
= 0; 
4738     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
4739         while (x
->forward
[i
] && x
->forward
[i
]->score 
< min
) 
4743     /* We may have multiple elements with the same score, what we need 
4744      * is to find the element with both the right score and object. */ 
4746     while (x 
&& x
->score 
<= max
) { 
4747         zskiplistNode 
*next
; 
4749         for (i 
= 0; i 
< zsl
->level
; i
++) { 
4750             if (update
[i
]->forward
[i
] != x
) break; 
4751             update
[i
]->forward
[i
] = x
->forward
[i
]; 
4753         if (x
->forward
[0]) { 
4754             x
->forward
[0]->backward 
= (x
->backward 
== zsl
->header
) ? 
4757             zsl
->tail 
= x
->backward
; 
4759         next 
= x
->forward
[0]; 
4760         dictDelete(dict
,x
->obj
); 
4762         while(zsl
->level 
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
) 
4768     return removed
; /* not found */ 
4771 /* Find the first node having a score equal or greater than the specified one. 
4772  * Returns NULL if there is no match. */ 
4773 static zskiplistNode 
*zslFirstWithScore(zskiplist 
*zsl
, double score
) { 
4778     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
4779         while (x
->forward
[i
] && x
->forward
[i
]->score 
< score
) 
4782     /* We may have multiple elements with the same score, what we need 
4783      * is to find the element with both the right score and object. */ 
4784     return x
->forward
[0]; 
4787 /* The actual Z-commands implementations */ 
4789 /* This generic command implements both ZADD and ZINCRBY. 
4790  * scoreval is the score if the operation is a ZADD (doincrement == 0) or 
4791  * the increment if the operation is a ZINCRBY (doincrement == 1). */ 
4792 static void zaddGenericCommand(redisClient 
*c
, robj 
*key
, robj 
*ele
, double scoreval
, int doincrement
) { 
4797     zsetobj 
= lookupKeyWrite(c
->db
,key
); 
4798     if (zsetobj 
== NULL
) { 
4799         zsetobj 
= createZsetObject(); 
4800         dictAdd(c
->db
->dict
,key
,zsetobj
); 
4803         if (zsetobj
->type 
!= REDIS_ZSET
) { 
4804             addReply(c
,shared
.wrongtypeerr
); 
4810     /* Ok now since we implement both ZADD and ZINCRBY here the code 
4811      * needs to handle the two different conditions. It's all about setting 
4812      * '*score', that is, the new score to set, to the right value. */ 
4813     score 
= zmalloc(sizeof(double)); 
4817         /* Read the old score. If the element was not present starts from 0 */ 
4818         de 
= dictFind(zs
->dict
,ele
); 
4820             double *oldscore 
= dictGetEntryVal(de
); 
4821             *score 
= *oldscore 
+ scoreval
; 
4829     /* What follows is a simple remove and re-insert operation that is common 
4830      * to both ZADD and ZINCRBY... */ 
4831     if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) { 
4832         /* case 1: New element */ 
4833         incrRefCount(ele
); /* added to hash */ 
4834         zslInsert(zs
->zsl
,*score
,ele
); 
4835         incrRefCount(ele
); /* added to skiplist */ 
4838             addReplyDouble(c
,*score
); 
4840             addReply(c
,shared
.cone
); 
4845         /* case 2: Score update operation */ 
4846         de 
= dictFind(zs
->dict
,ele
); 
4847         redisAssert(de 
!= NULL
); 
4848         oldscore 
= dictGetEntryVal(de
); 
4849         if (*score 
!= *oldscore
) { 
4852             /* Remove and insert the element in the skip list with new score */ 
4853             deleted 
= zslDelete(zs
->zsl
,*oldscore
,ele
); 
4854             redisAssert(deleted 
!= 0); 
4855             zslInsert(zs
->zsl
,*score
,ele
); 
4857             /* Update the score in the hash table */ 
4858             dictReplace(zs
->dict
,ele
,score
); 
4864             addReplyDouble(c
,*score
); 
4866             addReply(c
,shared
.czero
); 
4870 static void zaddCommand(redisClient 
*c
) { 
4873     scoreval 
= strtod(c
->argv
[2]->ptr
,NULL
); 
4874     zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0); 
4877 static void zincrbyCommand(redisClient 
*c
) { 
4880     scoreval 
= strtod(c
->argv
[2]->ptr
,NULL
); 
4881     zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1); 
4884 static void zremCommand(redisClient 
*c
) { 
4888     zsetobj 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4889     if (zsetobj 
== NULL
) { 
4890         addReply(c
,shared
.czero
); 
4896         if (zsetobj
->type 
!= REDIS_ZSET
) { 
4897             addReply(c
,shared
.wrongtypeerr
); 
4901         de 
= dictFind(zs
->dict
,c
->argv
[2]); 
4903             addReply(c
,shared
.czero
); 
4906         /* Delete from the skiplist */ 
4907         oldscore 
= dictGetEntryVal(de
); 
4908         deleted 
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]); 
4909         redisAssert(deleted 
!= 0); 
4911         /* Delete from the hash table */ 
4912         dictDelete(zs
->dict
,c
->argv
[2]); 
4913         if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
); 
4915         addReply(c
,shared
.cone
); 
4919 static void zremrangebyscoreCommand(redisClient 
*c
) { 
4920     double min 
= strtod(c
->argv
[2]->ptr
,NULL
); 
4921     double max 
= strtod(c
->argv
[3]->ptr
,NULL
); 
4925     zsetobj 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4926     if (zsetobj 
== NULL
) { 
4927         addReply(c
,shared
.czero
); 
4931         if (zsetobj
->type 
!= REDIS_ZSET
) { 
4932             addReply(c
,shared
.wrongtypeerr
); 
4936         deleted 
= zslDeleteRange(zs
->zsl
,min
,max
,zs
->dict
); 
4937         if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
); 
4938         server
.dirty 
+= deleted
; 
4939         addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",deleted
)); 
4943 static void zrangeGenericCommand(redisClient 
*c
, int reverse
) { 
4945     int start 
= atoi(c
->argv
[2]->ptr
); 
4946     int end 
= atoi(c
->argv
[3]->ptr
); 
4949     if (c
->argc 
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) { 
4951     } else if (c
->argc 
>= 5) { 
4952         addReply(c
,shared
.syntaxerr
); 
4956     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
4958         addReply(c
,shared
.nullmultibulk
); 
4960         if (o
->type 
!= REDIS_ZSET
) { 
4961             addReply(c
,shared
.wrongtypeerr
); 
4963             zset 
*zsetobj 
= o
->ptr
; 
4964             zskiplist 
*zsl 
= zsetobj
->zsl
; 
4967             int llen 
= zsl
->length
; 
4971             /* convert negative indexes */ 
4972             if (start 
< 0) start 
= llen
+start
; 
4973             if (end 
< 0) end 
= llen
+end
; 
4974             if (start 
< 0) start 
= 0; 
4975             if (end 
< 0) end 
= 0; 
4977             /* indexes sanity checks */ 
4978             if (start 
> end 
|| start 
>= llen
) { 
4979                 /* Out of range start or start > end result in empty list */ 
4980                 addReply(c
,shared
.emptymultibulk
); 
4983             if (end 
>= llen
) end 
= llen
-1; 
4984             rangelen 
= (end
-start
)+1; 
4986             /* Return the result in form of a multi-bulk reply */ 
4992                 ln 
= zsl
->header
->forward
[0]; 
4994                     ln 
= ln
->forward
[0]; 
4997             addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n", 
4998                 withscores 
? (rangelen
*2) : rangelen
)); 
4999             for (j 
= 0; j 
< rangelen
; j
++) { 
5001                 addReplyBulkLen(c
,ele
); 
5003                 addReply(c
,shared
.crlf
); 
5005                     addReplyDouble(c
,ln
->score
); 
5006                 ln 
= reverse 
? ln
->backward 
: ln
->forward
[0]; 
5012 static void zrangeCommand(redisClient 
*c
) { 
5013     zrangeGenericCommand(c
,0); 
5016 static void zrevrangeCommand(redisClient 
*c
) { 
5017     zrangeGenericCommand(c
,1); 
5020 static void zrangebyscoreCommand(redisClient 
*c
) { 
5022     double min 
= strtod(c
->argv
[2]->ptr
,NULL
); 
5023     double max 
= strtod(c
->argv
[3]->ptr
,NULL
); 
5024     int offset 
= 0, limit 
= -1; 
5026     if (c
->argc 
!= 4 && c
->argc 
!= 7) { 
5028             sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n")); 
5030     } else if (c
->argc 
== 7 && strcasecmp(c
->argv
[4]->ptr
,"limit")) { 
5031         addReply(c
,shared
.syntaxerr
); 
5033     } else if (c
->argc 
== 7) { 
5034         offset 
= atoi(c
->argv
[5]->ptr
); 
5035         limit 
= atoi(c
->argv
[6]->ptr
); 
5036         if (offset 
< 0) offset 
= 0; 
5039     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
5041         addReply(c
,shared
.nullmultibulk
); 
5043         if (o
->type 
!= REDIS_ZSET
) { 
5044             addReply(c
,shared
.wrongtypeerr
); 
5046             zset 
*zsetobj 
= o
->ptr
; 
5047             zskiplist 
*zsl 
= zsetobj
->zsl
; 
5050             unsigned int rangelen 
= 0; 
5052             /* Get the first node with the score >= min */ 
5053             ln 
= zslFirstWithScore(zsl
,min
); 
5055                 /* No element matching the speciifed interval */ 
5056                 addReply(c
,shared
.emptymultibulk
); 
5060             /* We don't know in advance how many matching elements there 
5061              * are in the list, so we push this object that will represent 
5062              * the multi-bulk length in the output buffer, and will "fix" 
5064             lenobj 
= createObject(REDIS_STRING
,NULL
); 
5066             decrRefCount(lenobj
); 
5068             while(ln 
&& ln
->score 
<= max
) { 
5071                     ln 
= ln
->forward
[0]; 
5074                 if (limit 
== 0) break; 
5076                 addReplyBulkLen(c
,ele
); 
5078                 addReply(c
,shared
.crlf
); 
5079                 ln 
= ln
->forward
[0]; 
5081                 if (limit 
> 0) limit
--; 
5083             lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%d\r\n",rangelen
); 
5088 static void zcardCommand(redisClient 
*c
) { 
5092     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
5094         addReply(c
,shared
.czero
); 
5097         if (o
->type 
!= REDIS_ZSET
) { 
5098             addReply(c
,shared
.wrongtypeerr
); 
5101             addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",zs
->zsl
->length
)); 
5106 static void zscoreCommand(redisClient 
*c
) { 
5110     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
5112         addReply(c
,shared
.nullbulk
); 
5115         if (o
->type 
!= REDIS_ZSET
) { 
5116             addReply(c
,shared
.wrongtypeerr
); 
5121             de 
= dictFind(zs
->dict
,c
->argv
[2]); 
5123                 addReply(c
,shared
.nullbulk
); 
5125                 double *score 
= dictGetEntryVal(de
); 
5127                 addReplyDouble(c
,*score
); 
5133 /* ========================= Non type-specific commands  ==================== */ 
5135 static void flushdbCommand(redisClient 
*c
) { 
5136     server
.dirty 
+= dictSize(c
->db
->dict
); 
5137     dictEmpty(c
->db
->dict
); 
5138     dictEmpty(c
->db
->expires
); 
5139     addReply(c
,shared
.ok
); 
5142 static void flushallCommand(redisClient 
*c
) { 
5143     server
.dirty 
+= emptyDb(); 
5144     addReply(c
,shared
.ok
); 
5145     rdbSave(server
.dbfilename
); 
5149 static redisSortOperation 
*createSortOperation(int type
, robj 
*pattern
) { 
5150     redisSortOperation 
*so 
= zmalloc(sizeof(*so
)); 
5152     so
->pattern 
= pattern
; 
5156 /* Return the value associated to the key with a name obtained 
5157  * substituting the first occurence of '*' in 'pattern' with 'subst' */ 
5158 static robj 
*lookupKeyByPattern(redisDb 
*db
, robj 
*pattern
, robj 
*subst
) { 
5162     int prefixlen
, sublen
, postfixlen
; 
5163     /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */ 
5167         char buf
[REDIS_SORTKEY_MAX
+1]; 
5170     /* If the pattern is "#" return the substitution object itself in order 
5171      * to implement the "SORT ... GET #" feature. */ 
5172     spat 
= pattern
->ptr
; 
5173     if (spat
[0] == '#' && spat
[1] == '\0') { 
5177     /* The substitution object may be specially encoded. If so we create 
5178      * a decoded object on the fly. Otherwise getDecodedObject will just 
5179      * increment the ref count, that we'll decrement later. */ 
5180     subst 
= getDecodedObject(subst
); 
5183     if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
; 
5184     p 
= strchr(spat
,'*'); 
5186         decrRefCount(subst
); 
5191     sublen 
= sdslen(ssub
); 
5192     postfixlen 
= sdslen(spat
)-(prefixlen
+1); 
5193     memcpy(keyname
.buf
,spat
,prefixlen
); 
5194     memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
); 
5195     memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
); 
5196     keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0'; 
5197     keyname
.len 
= prefixlen
+sublen
+postfixlen
; 
5199     initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2)) 
5200     decrRefCount(subst
); 
5202     /* printf("lookup '%s' => %p\n", keyname.buf,de); */ 
5203     return lookupKeyRead(db
,&keyobj
); 
5206 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with 
5207  * the additional parameter is not standard but a BSD-specific we have to 
5208  * pass sorting parameters via the global 'server' structure */ 
5209 static int sortCompare(const void *s1
, const void *s2
) { 
5210     const redisSortObject 
*so1 
= s1
, *so2 
= s2
; 
5213     if (!server
.sort_alpha
) { 
5214         /* Numeric sorting. Here it's trivial as we precomputed scores */ 
5215         if (so1
->u
.score 
> so2
->u
.score
) { 
5217         } else if (so1
->u
.score 
< so2
->u
.score
) { 
5223         /* Alphanumeric sorting */ 
5224         if (server
.sort_bypattern
) { 
5225             if (!so1
->u
.cmpobj 
|| !so2
->u
.cmpobj
) { 
5226                 /* At least one compare object is NULL */ 
5227                 if (so1
->u
.cmpobj 
== so2
->u
.cmpobj
) 
5229                 else if (so1
->u
.cmpobj 
== NULL
) 
5234                 /* We have both the objects, use strcoll */ 
5235                 cmp 
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
); 
5238             /* Compare elements directly */ 
5241             dec1 
= getDecodedObject(so1
->obj
); 
5242             dec2 
= getDecodedObject(so2
->obj
); 
5243             cmp 
= strcoll(dec1
->ptr
,dec2
->ptr
); 
5248     return server
.sort_desc 
? -cmp 
: cmp
; 
5251 /* The SORT command is the most complex command in Redis. Warning: this code 
5252  * is optimized for speed and a bit less for readability */ 
5253 static void sortCommand(redisClient 
*c
) { 
5256     int desc 
= 0, alpha 
= 0; 
5257     int limit_start 
= 0, limit_count 
= -1, start
, end
; 
5258     int j
, dontsort 
= 0, vectorlen
; 
5259     int getop 
= 0; /* GET operation counter */ 
5260     robj 
*sortval
, *sortby 
= NULL
, *storekey 
= NULL
; 
5261     redisSortObject 
*vector
; /* Resulting vector to sort */ 
5263     /* Lookup the key to sort. It must be of the right types */ 
5264     sortval 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
5265     if (sortval 
== NULL
) { 
5266         addReply(c
,shared
.nullmultibulk
); 
5269     if (sortval
->type 
!= REDIS_SET 
&& sortval
->type 
!= REDIS_LIST 
&& 
5270         sortval
->type 
!= REDIS_ZSET
) 
5272         addReply(c
,shared
.wrongtypeerr
); 
5276     /* Create a list of operations to perform for every sorted element. 
5277      * Operations can be GET/DEL/INCR/DECR */ 
5278     operations 
= listCreate(); 
5279     listSetFreeMethod(operations
,zfree
); 
5282     /* Now we need to protect sortval incrementing its count, in the future 
5283      * SORT may have options able to overwrite/delete keys during the sorting 
5284      * and the sorted key itself may get destroied */ 
5285     incrRefCount(sortval
); 
5287     /* The SORT command has an SQL-alike syntax, parse it */ 
5288     while(j 
< c
->argc
) { 
5289         int leftargs 
= c
->argc
-j
-1; 
5290         if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) { 
5292         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) { 
5294         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) { 
5296         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs 
>= 2) { 
5297             limit_start 
= atoi(c
->argv
[j
+1]->ptr
); 
5298             limit_count 
= atoi(c
->argv
[j
+2]->ptr
); 
5300         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs 
>= 1) { 
5301             storekey 
= c
->argv
[j
+1]; 
5303         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs 
>= 1) { 
5304             sortby 
= c
->argv
[j
+1]; 
5305             /* If the BY pattern does not contain '*', i.e. it is constant, 
5306              * we don't need to sort nor to lookup the weight keys. */ 
5307             if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort 
= 1; 
5309         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs 
>= 1) { 
5310             listAddNodeTail(operations
,createSortOperation( 
5311                 REDIS_SORT_GET
,c
->argv
[j
+1])); 
5315             decrRefCount(sortval
); 
5316             listRelease(operations
); 
5317             addReply(c
,shared
.syntaxerr
); 
5323     /* Load the sorting vector with all the objects to sort */ 
5324     switch(sortval
->type
) { 
5325     case REDIS_LIST
: vectorlen 
= listLength((list
*)sortval
->ptr
); break; 
5326     case REDIS_SET
: vectorlen 
=  dictSize((dict
*)sortval
->ptr
); break; 
5327     case REDIS_ZSET
: vectorlen 
= dictSize(((zset
*)sortval
->ptr
)->dict
); break; 
5328     default: vectorlen 
= 0; redisAssert(0); /* Avoid GCC warning */ 
5330     vector 
= zmalloc(sizeof(redisSortObject
)*vectorlen
); 
5333     if (sortval
->type 
== REDIS_LIST
) { 
5334         list 
*list 
= sortval
->ptr
; 
5338         while((ln 
= listYield(list
))) { 
5339             robj 
*ele 
= ln
->value
; 
5340             vector
[j
].obj 
= ele
; 
5341             vector
[j
].u
.score 
= 0; 
5342             vector
[j
].u
.cmpobj 
= NULL
; 
5350         if (sortval
->type 
== REDIS_SET
) { 
5353             zset 
*zs 
= sortval
->ptr
; 
5357         di 
= dictGetIterator(set
); 
5358         while((setele 
= dictNext(di
)) != NULL
) { 
5359             vector
[j
].obj 
= dictGetEntryKey(setele
); 
5360             vector
[j
].u
.score 
= 0; 
5361             vector
[j
].u
.cmpobj 
= NULL
; 
5364         dictReleaseIterator(di
); 
5366     redisAssert(j 
== vectorlen
); 
5368     /* Now it's time to load the right scores in the sorting vector */ 
5369     if (dontsort 
== 0) { 
5370         for (j 
= 0; j 
< vectorlen
; j
++) { 
5374                 byval 
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
); 
5375                 if (!byval 
|| byval
->type 
!= REDIS_STRING
) continue; 
5377                     vector
[j
].u
.cmpobj 
= getDecodedObject(byval
); 
5379                     if (byval
->encoding 
== REDIS_ENCODING_RAW
) { 
5380                         vector
[j
].u
.score 
= strtod(byval
->ptr
,NULL
); 
5382                         /* Don't need to decode the object if it's 
5383                          * integer-encoded (the only encoding supported) so 
5384                          * far. We can just cast it */ 
5385                         if (byval
->encoding 
== REDIS_ENCODING_INT
) { 
5386                             vector
[j
].u
.score 
= (long)byval
->ptr
; 
5388                             redisAssert(1 != 1); 
5393                     if (vector
[j
].obj
->encoding 
== REDIS_ENCODING_RAW
) 
5394                         vector
[j
].u
.score 
= strtod(vector
[j
].obj
->ptr
,NULL
); 
5396                         if (vector
[j
].obj
->encoding 
== REDIS_ENCODING_INT
) 
5397                             vector
[j
].u
.score 
= (long) vector
[j
].obj
->ptr
; 
5399                             redisAssert(1 != 1); 
5406     /* We are ready to sort the vector... perform a bit of sanity check 
5407      * on the LIMIT option too. We'll use a partial version of quicksort. */ 
5408     start 
= (limit_start 
< 0) ? 0 : limit_start
; 
5409     end 
= (limit_count 
< 0) ? vectorlen
-1 : start
+limit_count
-1; 
5410     if (start 
>= vectorlen
) { 
5411         start 
= vectorlen
-1; 
5414     if (end 
>= vectorlen
) end 
= vectorlen
-1; 
5416     if (dontsort 
== 0) { 
5417         server
.sort_desc 
= desc
; 
5418         server
.sort_alpha 
= alpha
; 
5419         server
.sort_bypattern 
= sortby 
? 1 : 0; 
5420         if (sortby 
&& (start 
!= 0 || end 
!= vectorlen
-1)) 
5421             pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
); 
5423             qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
); 
5426     /* Send command output to the output buffer, performing the specified 
5427      * GET/DEL/INCR/DECR operations if any. */ 
5428     outputlen 
= getop 
? getop
*(end
-start
+1) : end
-start
+1; 
5429     if (storekey 
== NULL
) { 
5430         /* STORE option not specified, sent the sorting result to client */ 
5431         addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
)); 
5432         for (j 
= start
; j 
<= end
; j
++) { 
5435                 addReplyBulkLen(c
,vector
[j
].obj
); 
5436                 addReply(c
,vector
[j
].obj
); 
5437                 addReply(c
,shared
.crlf
); 
5439             listRewind(operations
); 
5440             while((ln 
= listYield(operations
))) { 
5441                 redisSortOperation 
*sop 
= ln
->value
; 
5442                 robj 
*val 
= lookupKeyByPattern(c
->db
,sop
->pattern
, 
5445                 if (sop
->type 
== REDIS_SORT_GET
) { 
5446                     if (!val 
|| val
->type 
!= REDIS_STRING
) { 
5447                         addReply(c
,shared
.nullbulk
); 
5449                         addReplyBulkLen(c
,val
); 
5451                         addReply(c
,shared
.crlf
); 
5454                     redisAssert(sop
->type 
== REDIS_SORT_GET
); /* always fails */ 
5459         robj 
*listObject 
= createListObject(); 
5460         list 
*listPtr 
= (list
*) listObject
->ptr
; 
5462         /* STORE option specified, set the sorting result as a List object */ 
5463         for (j 
= start
; j 
<= end
; j
++) { 
5466                 listAddNodeTail(listPtr
,vector
[j
].obj
); 
5467                 incrRefCount(vector
[j
].obj
); 
5469             listRewind(operations
); 
5470             while((ln 
= listYield(operations
))) { 
5471                 redisSortOperation 
*sop 
= ln
->value
; 
5472                 robj 
*val 
= lookupKeyByPattern(c
->db
,sop
->pattern
, 
5475                 if (sop
->type 
== REDIS_SORT_GET
) { 
5476                     if (!val 
|| val
->type 
!= REDIS_STRING
) { 
5477                         listAddNodeTail(listPtr
,createStringObject("",0)); 
5479                         listAddNodeTail(listPtr
,val
); 
5483                     redisAssert(sop
->type 
== REDIS_SORT_GET
); /* always fails */ 
5487         if (dictReplace(c
->db
->dict
,storekey
,listObject
)) { 
5488             incrRefCount(storekey
); 
5490         /* Note: we add 1 because the DB is dirty anyway since even if the 
5491          * SORT result is empty a new key is set and maybe the old content 
5493         server
.dirty 
+= 1+outputlen
; 
5494         addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
)); 
5498     decrRefCount(sortval
); 
5499     listRelease(operations
); 
5500     for (j 
= 0; j 
< vectorlen
; j
++) { 
5501         if (sortby 
&& alpha 
&& vector
[j
].u
.cmpobj
) 
5502             decrRefCount(vector
[j
].u
.cmpobj
); 
5507 /* Convert an amount of bytes into a human readable string in the form 
5508  * of 100B, 2G, 100M, 4K, and so forth. */ 
5509 static void bytesToHuman(char *s
, unsigned long long n
) { 
5514         sprintf(s
,"%lluB",n
); 
5516     } else if (n 
< (1024*1024)) { 
5517         d 
= (double)n
/(1024); 
5518         sprintf(s
,"%.2fK",d
); 
5519     } else if (n 
< (1024LL*1024*1024)) { 
5520         d 
= (double)n
/(1024*1024); 
5521         sprintf(s
,"%.2fM",d
); 
5522     } else if (n 
< (1024LL*1024*1024*1024)) { 
5523         d 
= (double)n
/(1024LL*1024*1024); 
5524         sprintf(s
,"%.2fM",d
); 
5528 /* Create the string returned by the INFO command. This is decoupled 
5529  * by the INFO command itself as we need to report the same information 
5530  * on memory corruption problems. */ 
5531 static sds 
genRedisInfoString(void) { 
5533     time_t uptime 
= time(NULL
)-server
.stat_starttime
; 
5537     bytesToHuman(hmem
,server
.usedmemory
); 
5538     info 
= sdscatprintf(sdsempty(), 
5539         "redis_version:%s\r\n" 
5541         "multiplexing_api:%s\r\n" 
5542         "process_id:%ld\r\n" 
5543         "uptime_in_seconds:%ld\r\n" 
5544         "uptime_in_days:%ld\r\n" 
5545         "connected_clients:%d\r\n" 
5546         "connected_slaves:%d\r\n" 
5547         "blocked_clients:%d\r\n" 
5548         "used_memory:%zu\r\n" 
5549         "used_memory_human:%s\r\n" 
5550         "changes_since_last_save:%lld\r\n" 
5551         "bgsave_in_progress:%d\r\n" 
5552         "last_save_time:%ld\r\n" 
5553         "bgrewriteaof_in_progress:%d\r\n" 
5554         "total_connections_received:%lld\r\n" 
5555         "total_commands_processed:%lld\r\n" 
5559         (sizeof(long) == 8) ? "64" : "32", 
5564         listLength(server
.clients
)-listLength(server
.slaves
), 
5565         listLength(server
.slaves
), 
5566         server
.blockedclients
, 
5570         server
.bgsavechildpid 
!= -1, 
5572         server
.bgrewritechildpid 
!= -1, 
5573         server
.stat_numconnections
, 
5574         server
.stat_numcommands
, 
5575         server
.vm_enabled 
!= 0, 
5576         server
.masterhost 
== NULL 
? "master" : "slave" 
5578     if (server
.masterhost
) { 
5579         info 
= sdscatprintf(info
, 
5580             "master_host:%s\r\n" 
5581             "master_port:%d\r\n" 
5582             "master_link_status:%s\r\n" 
5583             "master_last_io_seconds_ago:%d\r\n" 
5586             (server
.replstate 
== REDIS_REPL_CONNECTED
) ? 
5588             server
.master 
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1 
5591     if (server
.vm_enabled
) { 
5592         info 
= sdscatprintf(info
, 
5593             "vm_conf_max_memory:%llu\r\n" 
5594             "vm_conf_page_size:%llu\r\n" 
5595             "vm_conf_pages:%llu\r\n" 
5596             "vm_stats_used_pages:%llu\r\n" 
5597             "vm_stats_swapped_objects:%llu\r\n" 
5598             "vm_stats_swappin_count:%llu\r\n" 
5599             "vm_stats_swappout_count:%llu\r\n" 
5600             "vm_stats_io_newjobs_len:%lu\r\n" 
5601             "vm_stats_io_processing_len:%lu\r\n" 
5602             "vm_stats_io_processed_len:%lu\r\n" 
5603             "vm_stats_io_waiting_clients:%lu\r\n" 
5604             ,(unsigned long long) server
.vm_max_memory
, 
5605             (unsigned long long) server
.vm_page_size
, 
5606             (unsigned long long) server
.vm_pages
, 
5607             (unsigned long long) server
.vm_stats_used_pages
, 
5608             (unsigned long long) server
.vm_stats_swapped_objects
, 
5609             (unsigned long long) server
.vm_stats_swapins
, 
5610             (unsigned long long) server
.vm_stats_swapouts
, 
5611             (unsigned long) listLength(server
.io_newjobs
), 
5612             (unsigned long) listLength(server
.io_processing
), 
5613             (unsigned long) listLength(server
.io_processed
), 
5614             (unsigned long) listLength(server
.io_clients
) 
5617     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
5618         long long keys
, vkeys
; 
5620         keys 
= dictSize(server
.db
[j
].dict
); 
5621         vkeys 
= dictSize(server
.db
[j
].expires
); 
5622         if (keys 
|| vkeys
) { 
5623             info 
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n", 
5630 static void infoCommand(redisClient 
*c
) { 
5631     sds info 
= genRedisInfoString(); 
5632     addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n", 
5633         (unsigned long)sdslen(info
))); 
5634     addReplySds(c
,info
); 
5635     addReply(c
,shared
.crlf
); 
5638 static void monitorCommand(redisClient 
*c
) { 
5639     /* ignore MONITOR if aleady slave or in monitor mode */ 
5640     if (c
->flags 
& REDIS_SLAVE
) return; 
5642     c
->flags 
|= (REDIS_SLAVE
|REDIS_MONITOR
); 
5644     listAddNodeTail(server
.monitors
,c
); 
5645     addReply(c
,shared
.ok
); 
5648 /* ================================= Expire ================================= */ 
5649 static int removeExpire(redisDb 
*db
, robj 
*key
) { 
5650     if (dictDelete(db
->expires
,key
) == DICT_OK
) { 
5657 static int setExpire(redisDb 
*db
, robj 
*key
, time_t when
) { 
5658     if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) { 
5666 /* Return the expire time of the specified key, or -1 if no expire 
5667  * is associated with this key (i.e. the key is non volatile) */ 
5668 static time_t getExpire(redisDb 
*db
, robj 
*key
) { 
5671     /* No expire? return ASAP */ 
5672     if (dictSize(db
->expires
) == 0 || 
5673        (de 
= dictFind(db
->expires
,key
)) == NULL
) return -1; 
5675     return (time_t) dictGetEntryVal(de
); 
5678 static int expireIfNeeded(redisDb 
*db
, robj 
*key
) { 
5682     /* No expire? return ASAP */ 
5683     if (dictSize(db
->expires
) == 0 || 
5684        (de 
= dictFind(db
->expires
,key
)) == NULL
) return 0; 
5686     /* Lookup the expire */ 
5687     when 
= (time_t) dictGetEntryVal(de
); 
5688     if (time(NULL
) <= when
) return 0; 
5690     /* Delete the key */ 
5691     dictDelete(db
->expires
,key
); 
5692     return dictDelete(db
->dict
,key
) == DICT_OK
; 
5695 static int deleteIfVolatile(redisDb 
*db
, robj 
*key
) { 
5698     /* No expire? return ASAP */ 
5699     if (dictSize(db
->expires
) == 0 || 
5700        (de 
= dictFind(db
->expires
,key
)) == NULL
) return 0; 
5702     /* Delete the key */ 
5704     dictDelete(db
->expires
,key
); 
5705     return dictDelete(db
->dict
,key
) == DICT_OK
; 
5708 static void expireGenericCommand(redisClient 
*c
, robj 
*key
, time_t seconds
) { 
5711     de 
= dictFind(c
->db
->dict
,key
); 
5713         addReply(c
,shared
.czero
); 
5717         if (deleteKey(c
->db
,key
)) server
.dirty
++; 
5718         addReply(c
, shared
.cone
); 
5721         time_t when 
= time(NULL
)+seconds
; 
5722         if (setExpire(c
->db
,key
,when
)) { 
5723             addReply(c
,shared
.cone
); 
5726             addReply(c
,shared
.czero
); 
5732 static void expireCommand(redisClient 
*c
) { 
5733     expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10)); 
5736 static void expireatCommand(redisClient 
*c
) { 
5737     expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10)-time(NULL
)); 
5740 static void ttlCommand(redisClient 
*c
) { 
5744     expire 
= getExpire(c
->db
,c
->argv
[1]); 
5746         ttl 
= (int) (expire
-time(NULL
)); 
5747         if (ttl 
< 0) ttl 
= -1; 
5749     addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
)); 
5752 /* ================================ MULTI/EXEC ============================== */ 
5754 /* Client state initialization for MULTI/EXEC */ 
5755 static void initClientMultiState(redisClient 
*c
) { 
5756     c
->mstate
.commands 
= NULL
; 
5757     c
->mstate
.count 
= 0; 
5760 /* Release all the resources associated with MULTI/EXEC state */ 
5761 static void freeClientMultiState(redisClient 
*c
) { 
5764     for (j 
= 0; j 
< c
->mstate
.count
; j
++) { 
5766         multiCmd 
*mc 
= c
->mstate
.commands
+j
; 
5768         for (i 
= 0; i 
< mc
->argc
; i
++) 
5769             decrRefCount(mc
->argv
[i
]); 
5772     zfree(c
->mstate
.commands
); 
5775 /* Add a new command into the MULTI commands queue */ 
5776 static void queueMultiCommand(redisClient 
*c
, struct redisCommand 
*cmd
) { 
5780     c
->mstate
.commands 
= zrealloc(c
->mstate
.commands
, 
5781             sizeof(multiCmd
)*(c
->mstate
.count
+1)); 
5782     mc 
= c
->mstate
.commands
+c
->mstate
.count
; 
5785     mc
->argv 
= zmalloc(sizeof(robj
*)*c
->argc
); 
5786     memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
); 
5787     for (j 
= 0; j 
< c
->argc
; j
++) 
5788         incrRefCount(mc
->argv
[j
]); 
5792 static void multiCommand(redisClient 
*c
) { 
5793     c
->flags 
|= REDIS_MULTI
; 
5794     addReply(c
,shared
.ok
); 
5797 static void execCommand(redisClient 
*c
) { 
5802     if (!(c
->flags 
& REDIS_MULTI
)) { 
5803         addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n")); 
5807     orig_argv 
= c
->argv
; 
5808     orig_argc 
= c
->argc
; 
5809     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
)); 
5810     for (j 
= 0; j 
< c
->mstate
.count
; j
++) { 
5811         c
->argc 
= c
->mstate
.commands
[j
].argc
; 
5812         c
->argv 
= c
->mstate
.commands
[j
].argv
; 
5813         call(c
,c
->mstate
.commands
[j
].cmd
); 
5815     c
->argv 
= orig_argv
; 
5816     c
->argc 
= orig_argc
; 
5817     freeClientMultiState(c
); 
5818     initClientMultiState(c
); 
5819     c
->flags 
&= (~REDIS_MULTI
); 
5822 /* =========================== Blocking Operations  ========================= */ 
5824 /* Currently Redis blocking operations support is limited to list POP ops, 
5825  * so the current implementation is not fully generic, but it is also not 
5826  * completely specific so it will not require a rewrite to support new 
5827  * kind of blocking operations in the future. 
5829  * Still it's important to note that list blocking operations can be already 
5830  * used as a notification mechanism in order to implement other blocking 
5831  * operations at application level, so there must be a very strong evidence 
5832  * of usefulness and generality before new blocking operations are implemented. 
5834  * This is how the current blocking POP works, we use BLPOP as example: 
5835  * - If the user calls BLPOP and the key exists and contains a non empty list 
5836  *   then LPOP is called instead. So BLPOP is semantically the same as LPOP 
5837  *   if there is not to block. 
5838  * - If instead BLPOP is called and the key does not exists or the list is 
5839  *   empty we need to block. In order to do so we remove the notification for 
5840  *   new data to read in the client socket (so that we'll not serve new 
5841  *   requests if the blocking request is not served). Also we put the client 
5842  *   in a dictionary (db->blockingkeys) mapping keys to a list of clients 
5843  *   blocking for this keys. 
5844  * - If a PUSH operation against a key with blocked clients waiting is 
5845  *   performed, we serve the first in the list: basically instead to push 
5846  *   the new element inside the list we return it to the (first / oldest) 
5847  *   blocking client, unblock the client, and remove it form the list. 
5849  * The above comment and the source code should be enough in order to understand 
5850  * the implementation and modify / fix it later. 
5853 /* Set a client in blocking mode for the specified key, with the specified 
5855 static void blockForKeys(redisClient 
*c
, robj 
**keys
, int numkeys
, time_t timeout
) { 
5860     c
->blockingkeys 
= zmalloc(sizeof(robj
*)*numkeys
); 
5861     c
->blockingkeysnum 
= numkeys
; 
5862     c
->blockingto 
= timeout
; 
5863     for (j 
= 0; j 
< numkeys
; j
++) { 
5864         /* Add the key in the client structure, to map clients -> keys */ 
5865         c
->blockingkeys
[j
] = keys
[j
]; 
5866         incrRefCount(keys
[j
]); 
5868         /* And in the other "side", to map keys -> clients */ 
5869         de 
= dictFind(c
->db
->blockingkeys
,keys
[j
]); 
5873             /* For every key we take a list of clients blocked for it */ 
5875             retval 
= dictAdd(c
->db
->blockingkeys
,keys
[j
],l
); 
5876             incrRefCount(keys
[j
]); 
5877             assert(retval 
== DICT_OK
); 
5879             l 
= dictGetEntryVal(de
); 
5881         listAddNodeTail(l
,c
); 
5883     /* Mark the client as a blocked client */ 
5884     c
->flags 
|= REDIS_BLOCKED
; 
5885     aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
); 
5886     server
.blockedclients
++; 
5889 /* Unblock a client that's waiting in a blocking operation such as BLPOP */ 
5890 static void unblockClient(redisClient 
*c
) { 
5895     assert(c
->blockingkeys 
!= NULL
); 
5896     /* The client may wait for multiple keys, so unblock it for every key. */ 
5897     for (j 
= 0; j 
< c
->blockingkeysnum
; j
++) { 
5898         /* Remove this client from the list of clients waiting for this key. */ 
5899         de 
= dictFind(c
->db
->blockingkeys
,c
->blockingkeys
[j
]); 
5901         l 
= dictGetEntryVal(de
); 
5902         listDelNode(l
,listSearchKey(l
,c
)); 
5903         /* If the list is empty we need to remove it to avoid wasting memory */ 
5904         if (listLength(l
) == 0) 
5905             dictDelete(c
->db
->blockingkeys
,c
->blockingkeys
[j
]); 
5906         decrRefCount(c
->blockingkeys
[j
]); 
5908     /* Cleanup the client structure */ 
5909     zfree(c
->blockingkeys
); 
5910     c
->blockingkeys 
= NULL
; 
5911     c
->flags 
&= (~REDIS_BLOCKED
); 
5912     server
.blockedclients
--; 
5913     /* Ok now we are ready to get read events from socket, note that we 
5914      * can't trap errors here as it's possible that unblockClients() is 
5915      * called from freeClient() itself, and the only thing we can do 
5916      * if we failed to register the READABLE event is to kill the client. 
5917      * Still the following function should never fail in the real world as 
5918      * we are sure the file descriptor is sane, and we exit on out of mem. */ 
5919     aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
, readQueryFromClient
, c
); 
5920     /* As a final step we want to process data if there is some command waiting 
5921      * in the input buffer. Note that this is safe even if unblockClient() 
5922      * gets called from freeClient() because freeClient() will be smart 
5923      * enough to call this function *after* c->querybuf was set to NULL. */ 
5924     if (c
->querybuf 
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
); 
5927 /* This should be called from any function PUSHing into lists. 
5928  * 'c' is the "pushing client", 'key' is the key it is pushing data against, 
5929  * 'ele' is the element pushed. 
5931  * If the function returns 0 there was no client waiting for a list push 
5934  * If the function returns 1 there was a client waiting for a list push 
5935  * against this key, the element was passed to this client thus it's not 
5936  * needed to actually add it to the list and the caller should return asap. */ 
5937 static int handleClientsWaitingListPush(redisClient 
*c
, robj 
*key
, robj 
*ele
) { 
5938     struct dictEntry 
*de
; 
5939     redisClient 
*receiver
; 
5943     de 
= dictFind(c
->db
->blockingkeys
,key
); 
5944     if (de 
== NULL
) return 0; 
5945     l 
= dictGetEntryVal(de
); 
5948     receiver 
= ln
->value
; 
5950     addReplySds(receiver
,sdsnew("*2\r\n")); 
5951     addReplyBulkLen(receiver
,key
); 
5952     addReply(receiver
,key
); 
5953     addReply(receiver
,shared
.crlf
); 
5954     addReplyBulkLen(receiver
,ele
); 
5955     addReply(receiver
,ele
); 
5956     addReply(receiver
,shared
.crlf
); 
5957     unblockClient(receiver
); 
5961 /* Blocking RPOP/LPOP */ 
5962 static void blockingPopGenericCommand(redisClient 
*c
, int where
) { 
5967     for (j 
= 1; j 
< c
->argc
-1; j
++) { 
5968         o 
= lookupKeyWrite(c
->db
,c
->argv
[j
]); 
5970             if (o
->type 
!= REDIS_LIST
) { 
5971                 addReply(c
,shared
.wrongtypeerr
); 
5974                 list 
*list 
= o
->ptr
; 
5975                 if (listLength(list
) != 0) { 
5976                     /* If the list contains elements fall back to the usual 
5977                      * non-blocking POP operation */ 
5978                     robj 
*argv
[2], **orig_argv
; 
5981                     /* We need to alter the command arguments before to call 
5982                      * popGenericCommand() as the command takes a single key. */ 
5983                     orig_argv 
= c
->argv
; 
5984                     orig_argc 
= c
->argc
; 
5985                     argv
[1] = c
->argv
[j
]; 
5989                     /* Also the return value is different, we need to output 
5990                      * the multi bulk reply header and the key name. The 
5991                      * "real" command will add the last element (the value) 
5992                      * for us. If this souds like an hack to you it's just 
5993                      * because it is... */ 
5994                     addReplySds(c
,sdsnew("*2\r\n")); 
5995                     addReplyBulkLen(c
,argv
[1]); 
5996                     addReply(c
,argv
[1]); 
5997                     addReply(c
,shared
.crlf
); 
5998                     popGenericCommand(c
,where
); 
6000                     /* Fix the client structure with the original stuff */ 
6001                     c
->argv 
= orig_argv
; 
6002                     c
->argc 
= orig_argc
; 
6008     /* If the list is empty or the key does not exists we must block */ 
6009     timeout 
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10); 
6010     if (timeout 
> 0) timeout 
+= time(NULL
); 
6011     blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
); 
6014 static void blpopCommand(redisClient 
*c
) { 
6015     blockingPopGenericCommand(c
,REDIS_HEAD
); 
6018 static void brpopCommand(redisClient 
*c
) { 
6019     blockingPopGenericCommand(c
,REDIS_TAIL
); 
6022 /* =============================== Replication  ============================= */ 
6024 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) { 
6025     ssize_t nwritten
, ret 
= size
; 
6026     time_t start 
= time(NULL
); 
6030         if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) { 
6031             nwritten 
= write(fd
,ptr
,size
); 
6032             if (nwritten 
== -1) return -1; 
6036         if ((time(NULL
)-start
) > timeout
) { 
6044 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) { 
6045     ssize_t nread
, totread 
= 0; 
6046     time_t start 
= time(NULL
); 
6050         if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) { 
6051             nread 
= read(fd
,ptr
,size
); 
6052             if (nread 
== -1) return -1; 
6057         if ((time(NULL
)-start
) > timeout
) { 
6065 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) { 
6072         if (syncRead(fd
,&c
,1,timeout
) == -1) return -1; 
6075             if (nread 
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0'; 
6086 static void syncCommand(redisClient 
*c
) { 
6087     /* ignore SYNC if aleady slave or in monitor mode */ 
6088     if (c
->flags 
& REDIS_SLAVE
) return; 
6090     /* SYNC can't be issued when the server has pending data to send to 
6091      * the client about already issued commands. We need a fresh reply 
6092      * buffer registering the differences between the BGSAVE and the current 
6093      * dataset, so that we can copy to other slaves if needed. */ 
6094     if (listLength(c
->reply
) != 0) { 
6095         addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n")); 
6099     redisLog(REDIS_NOTICE
,"Slave ask for synchronization"); 
6100     /* Here we need to check if there is a background saving operation 
6101      * in progress, or if it is required to start one */ 
6102     if (server
.bgsavechildpid 
!= -1) { 
6103         /* Ok a background save is in progress. Let's check if it is a good 
6104          * one for replication, i.e. if there is another slave that is 
6105          * registering differences since the server forked to save */ 
6109         listRewind(server
.slaves
); 
6110         while((ln 
= listYield(server
.slaves
))) { 
6112             if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_END
) break; 
6115             /* Perfect, the server is already registering differences for 
6116              * another slave. Set the right state, and copy the buffer. */ 
6117             listRelease(c
->reply
); 
6118             c
->reply 
= listDup(slave
->reply
); 
6119             c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_END
; 
6120             redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC"); 
6122             /* No way, we need to wait for the next BGSAVE in order to 
6123              * register differences */ 
6124             c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_START
; 
6125             redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC"); 
6128         /* Ok we don't have a BGSAVE in progress, let's start one */ 
6129         redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC"); 
6130         if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) { 
6131             redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE"); 
6132             addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n")); 
6135         c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_END
; 
6138     c
->flags 
|= REDIS_SLAVE
; 
6140     listAddNodeTail(server
.slaves
,c
); 
6144 static void sendBulkToSlave(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
6145     redisClient 
*slave 
= privdata
; 
6147     REDIS_NOTUSED(mask
); 
6148     char buf
[REDIS_IOBUF_LEN
]; 
6149     ssize_t nwritten
, buflen
; 
6151     if (slave
->repldboff 
== 0) { 
6152         /* Write the bulk write count before to transfer the DB. In theory here 
6153          * we don't know how much room there is in the output buffer of the 
6154          * socket, but in pratice SO_SNDLOWAT (the minimum count for output 
6155          * operations) will never be smaller than the few bytes we need. */ 
6158         bulkcount 
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long) 
6160         if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
)) 
6168     lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
); 
6169     buflen 
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
); 
6171         redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s", 
6172             (buflen 
== 0) ? "premature EOF" : strerror(errno
)); 
6176     if ((nwritten 
= write(fd
,buf
,buflen
)) == -1) { 
6177         redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s", 
6182     slave
->repldboff 
+= nwritten
; 
6183     if (slave
->repldboff 
== slave
->repldbsize
) { 
6184         close(slave
->repldbfd
); 
6185         slave
->repldbfd 
= -1; 
6186         aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
); 
6187         slave
->replstate 
= REDIS_REPL_ONLINE
; 
6188         if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, 
6189             sendReplyToClient
, slave
) == AE_ERR
) { 
6193         addReplySds(slave
,sdsempty()); 
6194         redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded"); 
6198 /* This function is called at the end of every backgrond saving. 
6199  * The argument bgsaveerr is REDIS_OK if the background saving succeeded 
6200  * otherwise REDIS_ERR is passed to the function. 
6202  * The goal of this function is to handle slaves waiting for a successful 
6203  * background saving in order to perform non-blocking synchronization. */ 
6204 static void updateSlavesWaitingBgsave(int bgsaveerr
) { 
6206     int startbgsave 
= 0; 
6208     listRewind(server
.slaves
); 
6209     while((ln 
= listYield(server
.slaves
))) { 
6210         redisClient 
*slave 
= ln
->value
; 
6212         if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_START
) { 
6214             slave
->replstate 
= REDIS_REPL_WAIT_BGSAVE_END
; 
6215         } else if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_END
) { 
6216             struct redis_stat buf
; 
6218             if (bgsaveerr 
!= REDIS_OK
) { 
6220                 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error"); 
6223             if ((slave
->repldbfd 
= open(server
.dbfilename
,O_RDONLY
)) == -1 || 
6224                 redis_fstat(slave
->repldbfd
,&buf
) == -1) { 
6226                 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
)); 
6229             slave
->repldboff 
= 0; 
6230             slave
->repldbsize 
= buf
.st_size
; 
6231             slave
->replstate 
= REDIS_REPL_SEND_BULK
; 
6232             aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
); 
6233             if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) { 
6240         if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) { 
6241             listRewind(server
.slaves
); 
6242             redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed"); 
6243             while((ln 
= listYield(server
.slaves
))) { 
6244                 redisClient 
*slave 
= ln
->value
; 
6246                 if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_START
) 
6253 static int syncWithMaster(void) { 
6254     char buf
[1024], tmpfile
[256], authcmd
[1024]; 
6256     int fd 
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
); 
6260         redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s", 
6265     /* AUTH with the master if required. */ 
6266     if(server
.masterauth
) { 
6267         snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
); 
6268         if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) { 
6270             redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s", 
6274         /* Read the AUTH result.  */ 
6275         if (syncReadLine(fd
,buf
,1024,3600) == -1) { 
6277             redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s", 
6281         if (buf
[0] != '+') { 
6283             redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?"); 
6288     /* Issue the SYNC command */ 
6289     if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) { 
6291         redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s", 
6295     /* Read the bulk write count */ 
6296     if (syncReadLine(fd
,buf
,1024,3600) == -1) { 
6298         redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s", 
6302     if (buf
[0] != '$') { 
6304         redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?"); 
6307     dumpsize 
= atoi(buf
+1); 
6308     redisLog(REDIS_NOTICE
,"Receiving %d bytes data dump from MASTER",dumpsize
); 
6309     /* Read the bulk write data on a temp file */ 
6310     snprintf(tmpfile
,256,"temp-%d.%ld.rdb",(int)time(NULL
),(long int)random()); 
6311     dfd 
= open(tmpfile
,O_CREAT
|O_WRONLY
,0644); 
6314         redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
)); 
6318         int nread
, nwritten
; 
6320         nread 
= read(fd
,buf
,(dumpsize 
< 1024)?dumpsize
:1024); 
6322             redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s", 
6328         nwritten 
= write(dfd
,buf
,nread
); 
6329         if (nwritten 
== -1) { 
6330             redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
)); 
6338     if (rename(tmpfile
,server
.dbfilename
) == -1) { 
6339         redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
)); 
6345     if (rdbLoad(server
.dbfilename
) != REDIS_OK
) { 
6346         redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk"); 
6350     server
.master 
= createClient(fd
); 
6351     server
.master
->flags 
|= REDIS_MASTER
; 
6352     server
.master
->authenticated 
= 1; 
6353     server
.replstate 
= REDIS_REPL_CONNECTED
; 
6357 static void slaveofCommand(redisClient 
*c
) { 
6358     if (!strcasecmp(c
->argv
[1]->ptr
,"no") && 
6359         !strcasecmp(c
->argv
[2]->ptr
,"one")) { 
6360         if (server
.masterhost
) { 
6361             sdsfree(server
.masterhost
); 
6362             server
.masterhost 
= NULL
; 
6363             if (server
.master
) freeClient(server
.master
); 
6364             server
.replstate 
= REDIS_REPL_NONE
; 
6365             redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)"); 
6368         sdsfree(server
.masterhost
); 
6369         server
.masterhost 
= sdsdup(c
->argv
[1]->ptr
); 
6370         server
.masterport 
= atoi(c
->argv
[2]->ptr
); 
6371         if (server
.master
) freeClient(server
.master
); 
6372         server
.replstate 
= REDIS_REPL_CONNECT
; 
6373         redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)", 
6374             server
.masterhost
, server
.masterport
); 
6376     addReply(c
,shared
.ok
); 
6379 /* ============================ Maxmemory directive  ======================== */ 
6381 /* Free one object form the pre-allocated objects free list. This is useful 
6382  * under low mem conditions as by default we take 1 million free objects 
6384 static void freeOneObjectFromFreelist(void) { 
6387     listNode 
*head 
= listFirst(server
.objfreelist
); 
6388     o 
= listNodeValue(head
); 
6389     listDelNode(server
.objfreelist
,head
); 
6393 /* This function gets called when 'maxmemory' is set on the config file to limit 
6394  * the max memory used by the server, and we are out of memory. 
6395  * This function will try to, in order: 
6397  * - Free objects from the free list 
6398  * - Try to remove keys with an EXPIRE set 
6400  * It is not possible to free enough memory to reach used-memory < maxmemory 
6401  * the server will start refusing commands that will enlarge even more the 
6404 static void freeMemoryIfNeeded(void) { 
6405     while (server
.maxmemory 
&& zmalloc_used_memory() > server
.maxmemory
) { 
6406         if (listLength(server
.objfreelist
)) { 
6407             freeOneObjectFromFreelist(); 
6409             int j
, k
, freed 
= 0; 
6411             for (j 
= 0; j 
< server
.dbnum
; j
++) { 
6413                 robj 
*minkey 
= NULL
; 
6414                 struct dictEntry 
*de
; 
6416                 if (dictSize(server
.db
[j
].expires
)) { 
6418                     /* From a sample of three keys drop the one nearest to 
6419                      * the natural expire */ 
6420                     for (k 
= 0; k 
< 3; k
++) { 
6423                         de 
= dictGetRandomKey(server
.db
[j
].expires
); 
6424                         t 
= (time_t) dictGetEntryVal(de
); 
6425                         if (minttl 
== -1 || t 
< minttl
) { 
6426                             minkey 
= dictGetEntryKey(de
); 
6430                     deleteKey(server
.db
+j
,minkey
); 
6433             if (!freed
) return; /* nothing to free... */ 
6438 /* ============================== Append Only file ========================== */ 
6440 static void feedAppendOnlyFile(struct redisCommand 
*cmd
, int dictid
, robj 
**argv
, int argc
) { 
6441     sds buf 
= sdsempty(); 
6447     /* The DB this command was targetting is not the same as the last command 
6448      * we appendend. To issue a SELECT command is needed. */ 
6449     if (dictid 
!= server
.appendseldb
) { 
6452         snprintf(seldb
,sizeof(seldb
),"%d",dictid
); 
6453         buf 
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n", 
6454             (unsigned long)strlen(seldb
),seldb
); 
6455         server
.appendseldb 
= dictid
; 
6458     /* "Fix" the argv vector if the command is EXPIRE. We want to translate 
6459      * EXPIREs into EXPIREATs calls */ 
6460     if (cmd
->proc 
== expireCommand
) { 
6463         tmpargv
[0] = createStringObject("EXPIREAT",8); 
6464         tmpargv
[1] = argv
[1]; 
6465         incrRefCount(argv
[1]); 
6466         when 
= time(NULL
)+strtol(argv
[2]->ptr
,NULL
,10); 
6467         tmpargv
[2] = createObject(REDIS_STRING
, 
6468             sdscatprintf(sdsempty(),"%ld",when
)); 
6472     /* Append the actual command */ 
6473     buf 
= sdscatprintf(buf
,"*%d\r\n",argc
); 
6474     for (j 
= 0; j 
< argc
; j
++) { 
6477         o 
= getDecodedObject(o
); 
6478         buf 
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
)); 
6479         buf 
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
)); 
6480         buf 
= sdscatlen(buf
,"\r\n",2); 
6484     /* Free the objects from the modified argv for EXPIREAT */ 
6485     if (cmd
->proc 
== expireCommand
) { 
6486         for (j 
= 0; j 
< 3; j
++) 
6487             decrRefCount(argv
[j
]); 
6490     /* We want to perform a single write. This should be guaranteed atomic 
6491      * at least if the filesystem we are writing is a real physical one. 
6492      * While this will save us against the server being killed I don't think 
6493      * there is much to do about the whole server stopping for power problems 
6495      nwritten 
= write(server
.appendfd
,buf
,sdslen(buf
)); 
6496      if (nwritten 
!= (signed)sdslen(buf
)) { 
6497         /* Ooops, we are in troubles. The best thing to do for now is 
6498          * to simply exit instead to give the illusion that everything is 
6499          * working as expected. */ 
6500          if (nwritten 
== -1) { 
6501             redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
)); 
6503             redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
)); 
6507     /* If a background append only file rewriting is in progress we want to 
6508      * accumulate the differences between the child DB and the current one 
6509      * in a buffer, so that when the child process will do its work we 
6510      * can append the differences to the new append only file. */ 
6511     if (server
.bgrewritechildpid 
!= -1) 
6512         server
.bgrewritebuf 
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
)); 
6516     if (server
.appendfsync 
== APPENDFSYNC_ALWAYS 
|| 
6517         (server
.appendfsync 
== APPENDFSYNC_EVERYSEC 
&& 
6518          now
-server
.lastfsync 
> 1)) 
6520         fsync(server
.appendfd
); /* Let's try to get this data on the disk */ 
6521         server
.lastfsync 
= now
; 
6525 /* In Redis commands are always executed in the context of a client, so in 
6526  * order to load the append only file we need to create a fake client. */ 
6527 static struct redisClient 
*createFakeClient(void) { 
6528     struct redisClient 
*c 
= zmalloc(sizeof(*c
)); 
6532     c
->querybuf 
= sdsempty(); 
6536     /* We set the fake client as a slave waiting for the synchronization 
6537      * so that Redis will not try to send replies to this client. */ 
6538     c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_START
; 
6539     c
->reply 
= listCreate(); 
6540     listSetFreeMethod(c
->reply
,decrRefCount
); 
6541     listSetDupMethod(c
->reply
,dupClientReplyValue
); 
6545 static void freeFakeClient(struct redisClient 
*c
) { 
6546     sdsfree(c
->querybuf
); 
6547     listRelease(c
->reply
); 
6551 /* Replay the append log file. On error REDIS_OK is returned. On non fatal 
6552  * error (the append only file is zero-length) REDIS_ERR is returned. On 
6553  * fatal error an error message is logged and the program exists. */ 
6554 int loadAppendOnlyFile(char *filename
) { 
6555     struct redisClient 
*fakeClient
; 
6556     FILE *fp 
= fopen(filename
,"r"); 
6557     struct redis_stat sb
; 
6558     unsigned long long loadedkeys 
= 0; 
6560     if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size 
== 0) 
6564         redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
)); 
6568     fakeClient 
= createFakeClient(); 
6575         struct redisCommand 
*cmd
; 
6577         if (fgets(buf
,sizeof(buf
),fp
) == NULL
) { 
6583         if (buf
[0] != '*') goto fmterr
; 
6585         argv 
= zmalloc(sizeof(robj
*)*argc
); 
6586         for (j 
= 0; j 
< argc
; j
++) { 
6587             if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
; 
6588             if (buf
[0] != '$') goto fmterr
; 
6589             len 
= strtol(buf
+1,NULL
,10); 
6590             argsds 
= sdsnewlen(NULL
,len
); 
6591             if (len 
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
; 
6592             argv
[j
] = createObject(REDIS_STRING
,argsds
); 
6593             if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */ 
6596         /* Command lookup */ 
6597         cmd 
= lookupCommand(argv
[0]->ptr
); 
6599             redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
); 
6602         /* Try object sharing and encoding */ 
6603         if (server
.shareobjects
) { 
6605             for(j 
= 1; j 
< argc
; j
++) 
6606                 argv
[j
] = tryObjectSharing(argv
[j
]); 
6608         if (cmd
->flags 
& REDIS_CMD_BULK
) 
6609             tryObjectEncoding(argv
[argc
-1]); 
6610         /* Run the command in the context of a fake client */ 
6611         fakeClient
->argc 
= argc
; 
6612         fakeClient
->argv 
= argv
; 
6613         cmd
->proc(fakeClient
); 
6614         /* Discard the reply objects list from the fake client */ 
6615         while(listLength(fakeClient
->reply
)) 
6616             listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
)); 
6617         /* Clean up, ready for the next command */ 
6618         for (j 
= 0; j 
< argc
; j
++) decrRefCount(argv
[j
]); 
6620         /* Handle swapping while loading big datasets when VM is on */ 
6622         if (server
.vm_enabled 
&& (loadedkeys 
% 5000) == 0) { 
6623             while (zmalloc_used_memory() > server
.vm_max_memory
) { 
6624                 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break; 
6629     freeFakeClient(fakeClient
); 
6634         redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file"); 
6636         redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
)); 
6640     redisLog(REDIS_WARNING
,"Bad file format reading the append only file"); 
6644 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */ 
6645 static int fwriteBulk(FILE *fp
, robj 
*obj
) { 
6649     if (obj
->storage 
== REDIS_VM_MEMORY 
&& obj
->encoding 
!= REDIS_ENCODING_RAW
){ 
6650         obj 
= getDecodedObject(obj
); 
6653     snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
)); 
6654     if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
; 
6655     if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0) 
6657     if (fwrite("\r\n",2,1,fp
) == 0) goto err
; 
6658     if (decrrc
) decrRefCount(obj
); 
6661     if (decrrc
) decrRefCount(obj
); 
6665 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */ 
6666 static int fwriteBulkDouble(FILE *fp
, double d
) { 
6667     char buf
[128], dbuf
[128]; 
6669     snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
); 
6670     snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2); 
6671     if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0; 
6672     if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0; 
6676 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */ 
6677 static int fwriteBulkLong(FILE *fp
, long l
) { 
6678     char buf
[128], lbuf
[128]; 
6680     snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
); 
6681     snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2); 
6682     if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0; 
6683     if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0; 
6687 /* Write a sequence of commands able to fully rebuild the dataset into 
6688  * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */ 
6689 static int rewriteAppendOnlyFile(char *filename
) { 
6690     dictIterator 
*di 
= NULL
; 
6695     time_t now 
= time(NULL
); 
6697     /* Note that we have to use a different temp name here compared to the 
6698      * one used by rewriteAppendOnlyFileBackground() function. */ 
6699     snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid()); 
6700     fp 
= fopen(tmpfile
,"w"); 
6702         redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
)); 
6705     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
6706         char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n"; 
6707         redisDb 
*db 
= server
.db
+j
; 
6709         if (dictSize(d
) == 0) continue; 
6710         di 
= dictGetIterator(d
); 
6716         /* SELECT the new DB */ 
6717         if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
; 
6718         if (fwriteBulkLong(fp
,j
) == 0) goto werr
; 
6720         /* Iterate this DB writing every entry */ 
6721         while((de 
= dictNext(di
)) != NULL
) { 
6726             key 
= dictGetEntryKey(de
); 
6727             /* If the value for this key is swapped, load a preview in memory. 
6728              * We use a "swapped" flag to remember if we need to free the 
6729              * value object instead to just increment the ref count anyway 
6730              * in order to avoid copy-on-write of pages if we are forked() */ 
6731             if (!server
.vm_enabled 
|| key
->storage 
== REDIS_VM_MEMORY 
|| 
6732                 key
->storage 
== REDIS_VM_SWAPPING
) { 
6733                 o 
= dictGetEntryVal(de
); 
6736                 o 
= vmPreviewObject(key
); 
6739             expiretime 
= getExpire(db
,key
); 
6741             /* Save the key and associated value */ 
6742             if (o
->type 
== REDIS_STRING
) { 
6743                 /* Emit a SET command */ 
6744                 char cmd
[]="*3\r\n$3\r\nSET\r\n"; 
6745                 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
6747                 if (fwriteBulk(fp
,key
) == 0) goto werr
; 
6748                 if (fwriteBulk(fp
,o
) == 0) goto werr
; 
6749             } else if (o
->type 
== REDIS_LIST
) { 
6750                 /* Emit the RPUSHes needed to rebuild the list */ 
6751                 list 
*list 
= o
->ptr
; 
6755                 while((ln 
= listYield(list
))) { 
6756                     char cmd
[]="*3\r\n$5\r\nRPUSH\r\n"; 
6757                     robj 
*eleobj 
= listNodeValue(ln
); 
6759                     if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
6760                     if (fwriteBulk(fp
,key
) == 0) goto werr
; 
6761                     if (fwriteBulk(fp
,eleobj
) == 0) goto werr
; 
6763             } else if (o
->type 
== REDIS_SET
) { 
6764                 /* Emit the SADDs needed to rebuild the set */ 
6766                 dictIterator 
*di 
= dictGetIterator(set
); 
6769                 while((de 
= dictNext(di
)) != NULL
) { 
6770                     char cmd
[]="*3\r\n$4\r\nSADD\r\n"; 
6771                     robj 
*eleobj 
= dictGetEntryKey(de
); 
6773                     if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
6774                     if (fwriteBulk(fp
,key
) == 0) goto werr
; 
6775                     if (fwriteBulk(fp
,eleobj
) == 0) goto werr
; 
6777                 dictReleaseIterator(di
); 
6778             } else if (o
->type 
== REDIS_ZSET
) { 
6779                 /* Emit the ZADDs needed to rebuild the sorted set */ 
6781                 dictIterator 
*di 
= dictGetIterator(zs
->dict
); 
6784                 while((de 
= dictNext(di
)) != NULL
) { 
6785                     char cmd
[]="*4\r\n$4\r\nZADD\r\n"; 
6786                     robj 
*eleobj 
= dictGetEntryKey(de
); 
6787                     double *score 
= dictGetEntryVal(de
); 
6789                     if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
6790                     if (fwriteBulk(fp
,key
) == 0) goto werr
; 
6791                     if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
; 
6792                     if (fwriteBulk(fp
,eleobj
) == 0) goto werr
; 
6794                 dictReleaseIterator(di
); 
6796                 redisAssert(0 != 0); 
6798             /* Save the expire time */ 
6799             if (expiretime 
!= -1) { 
6800                 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n"; 
6801                 /* If this key is already expired skip it */ 
6802                 if (expiretime 
< now
) continue; 
6803                 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
6804                 if (fwriteBulk(fp
,key
) == 0) goto werr
; 
6805                 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
; 
6807             if (swapped
) decrRefCount(o
); 
6809         dictReleaseIterator(di
); 
6812     /* Make sure data will not remain on the OS's output buffers */ 
6817     /* Use RENAME to make sure the DB file is changed atomically only 
6818      * if the generate DB file is ok. */ 
6819     if (rename(tmpfile
,filename
) == -1) { 
6820         redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
)); 
6824     redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed"); 
6830     redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
)); 
6831     if (di
) dictReleaseIterator(di
); 
6835 /* This is how rewriting of the append only file in background works: 
6837  * 1) The user calls BGREWRITEAOF 
6838  * 2) Redis calls this function, that forks(): 
6839  *    2a) the child rewrite the append only file in a temp file. 
6840  *    2b) the parent accumulates differences in server.bgrewritebuf. 
6841  * 3) When the child finished '2a' exists. 
6842  * 4) The parent will trap the exit code, if it's OK, will append the 
6843  *    data accumulated into server.bgrewritebuf into the temp file, and 
6844  *    finally will rename(2) the temp file in the actual file name. 
6845  *    The the new file is reopened as the new append only file. Profit! 
6847 static int rewriteAppendOnlyFileBackground(void) { 
6850     if (server
.bgrewritechildpid 
!= -1) return REDIS_ERR
; 
6851     if ((childpid 
= fork()) == 0) { 
6856         snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid()); 
6857         if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) { 
6864         if (childpid 
== -1) { 
6865             redisLog(REDIS_WARNING
, 
6866                 "Can't rewrite append only file in background: fork: %s", 
6870         redisLog(REDIS_NOTICE
, 
6871             "Background append only file rewriting started by pid %d",childpid
); 
6872         server
.bgrewritechildpid 
= childpid
; 
6873         /* We set appendseldb to -1 in order to force the next call to the 
6874          * feedAppendOnlyFile() to issue a SELECT command, so the differences 
6875          * accumulated by the parent into server.bgrewritebuf will start 
6876          * with a SELECT statement and it will be safe to merge. */ 
6877         server
.appendseldb 
= -1; 
6880     return REDIS_OK
; /* unreached */ 
6883 static void bgrewriteaofCommand(redisClient 
*c
) { 
6884     if (server
.bgrewritechildpid 
!= -1) { 
6885         addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n")); 
6888     if (rewriteAppendOnlyFileBackground() == REDIS_OK
) { 
6889         char *status 
= "+Background append only file rewriting started\r\n"; 
6890         addReplySds(c
,sdsnew(status
)); 
6892         addReply(c
,shared
.err
); 
6896 static void aofRemoveTempFile(pid_t childpid
) { 
6899     snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
); 
6903 /* Virtual Memory is composed mainly of two subsystems: 
6904  * - Blocking Virutal Memory 
6905  * - Threaded Virtual Memory I/O 
6906  * The two parts are not fully decoupled, but functions are split among two 
6907  * different sections of the source code (delimited by comments) in order to 
6908  * make more clear what functionality is about the blocking VM and what about 
6909  * the threaded (not blocking) VM. 
6913  * Redis VM is a blocking VM (one that blocks reading swapped values from 
6914  * disk into memory when a value swapped out is needed in memory) that is made 
6915  * unblocking by trying to examine the command argument vector in order to 
6916  * load in background values that will likely be needed in order to exec 
6917  * the command. The command is executed only once all the relevant keys 
6918  * are loaded into memory. 
6920  * This basically is almost as simple of a blocking VM, but almost as parallel 
6921  * as a fully non-blocking VM. 
6924 /* =================== Virtual Memory - Blocking Side  ====================== */ 
6925 static void vmInit(void) { 
6929     server
.vm_fp 
= fopen("/tmp/redisvm","w+b"); 
6930     if (server
.vm_fp 
== NULL
) { 
6931         redisLog(REDIS_WARNING
,"Impossible to open the swap file. Exiting."); 
6934     server
.vm_fd 
= fileno(server
.vm_fp
); 
6935     server
.vm_next_page 
= 0; 
6936     server
.vm_near_pages 
= 0; 
6937     server
.vm_stats_used_pages 
= 0; 
6938     server
.vm_stats_swapped_objects 
= 0; 
6939     server
.vm_stats_swapouts 
= 0; 
6940     server
.vm_stats_swapins 
= 0; 
6941     totsize 
= server
.vm_pages
*server
.vm_page_size
; 
6942     redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
); 
6943     if (ftruncate(server
.vm_fd
,totsize
) == -1) { 
6944         redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.", 
6948         redisLog(REDIS_NOTICE
,"Swap file allocated with success"); 
6950     server
.vm_bitmap 
= zmalloc((server
.vm_pages
+7)/8); 
6951     redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages", 
6952         (long long) (server
.vm_pages
+7)/8, server
.vm_pages
); 
6953     memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8); 
6954     /* Try to remove the swap file, so the OS will really delete it from the 
6955      * file system when Redis exists. */ 
6956     unlink("/tmp/redisvm"); 
6958     /* Initialize threaded I/O (used by Virtual Memory) */ 
6959     server
.io_newjobs 
= listCreate(); 
6960     server
.io_processing 
= listCreate(); 
6961     server
.io_processed 
= listCreate(); 
6962     server
.io_clients 
= listCreate(); 
6963     pthread_mutex_init(&server
.io_mutex
,NULL
); 
6964     server
.io_active_threads 
= 0; 
6965     if (pipe(pipefds
) == -1) { 
6966         redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting." 
6970     server
.io_ready_pipe_read 
= pipefds
[0]; 
6971     server
.io_ready_pipe_write 
= pipefds
[1]; 
6972     redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
); 
6973     /* Listen for events in the threaded I/O pipe */ 
6974     if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
, 
6975         vmThreadedIOCompletedJob
, NULL
) == AE_ERR
) 
6976         oom("creating file event"); 
6979 /* Mark the page as used */ 
6980 static void vmMarkPageUsed(off_t page
) { 
6981     off_t byte 
= page
/8; 
6983     server
.vm_bitmap
[byte
] |= 1<<bit
; 
6984     redisLog(REDIS_DEBUG
,"Mark used: %lld (byte:%lld bit:%d)\n", 
6985         (long long)page
, (long long)byte
, bit
); 
6988 /* Mark N contiguous pages as used, with 'page' being the first. */ 
6989 static void vmMarkPagesUsed(off_t page
, off_t count
) { 
6992     for (j 
= 0; j 
< count
; j
++) 
6993         vmMarkPageUsed(page
+j
); 
6994     server
.vm_stats_used_pages 
+= count
; 
6997 /* Mark the page as free */ 
6998 static void vmMarkPageFree(off_t page
) { 
6999     off_t byte 
= page
/8; 
7001     server
.vm_bitmap
[byte
] &= ~(1<<bit
); 
7004 /* Mark N contiguous pages as free, with 'page' being the first. */ 
7005 static void vmMarkPagesFree(off_t page
, off_t count
) { 
7008     for (j 
= 0; j 
< count
; j
++) 
7009         vmMarkPageFree(page
+j
); 
7010     server
.vm_stats_used_pages 
-= count
; 
7013 /* Test if the page is free */ 
7014 static int vmFreePage(off_t page
) { 
7015     off_t byte 
= page
/8; 
7017     return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0; 
7020 /* Find N contiguous free pages storing the first page of the cluster in *first. 
7021  * Returns REDIS_OK if it was able to find N contiguous pages, otherwise  
7022  * REDIS_ERR is returned. 
7024  * This function uses a simple algorithm: we try to allocate 
7025  * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start 
7026  * again from the start of the swap file searching for free spaces. 
7028  * If it looks pretty clear that there are no free pages near our offset 
7029  * we try to find less populated places doing a forward jump of 
7030  * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages 
7031  * without hurry, and then we jump again and so forth... 
7033  * This function can be improved using a free list to avoid to guess 
7034  * too much, since we could collect data about freed pages. 
7036  * note: I implemented this function just after watching an episode of 
7037  * Battlestar Galactica, where the hybrid was continuing to say "JUMP!" 
7039 static int vmFindContiguousPages(off_t 
*first
, int n
) { 
7040     off_t base
, offset 
= 0, since_jump 
= 0, numfree 
= 0; 
7042     if (server
.vm_near_pages 
== REDIS_VM_MAX_NEAR_PAGES
) { 
7043         server
.vm_near_pages 
= 0; 
7044         server
.vm_next_page 
= 0; 
7046     server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */ 
7047     base 
= server
.vm_next_page
; 
7049     while(offset 
< server
.vm_pages
) { 
7050         off_t 
this = base
+offset
; 
7052         redisLog(REDIS_DEBUG
, "THIS: %lld (%c)\n", (long long) this, vmFreePage(this) ? 'F' : 'X'); 
7053         /* If we overflow, restart from page zero */ 
7054         if (this >= server
.vm_pages
) { 
7055             this -= server
.vm_pages
; 
7057                 /* Just overflowed, what we found on tail is no longer 
7058                  * interesting, as it's no longer contiguous. */ 
7062         if (vmFreePage(this)) { 
7063             /* This is a free page */ 
7065             /* Already got N free pages? Return to the caller, with success */ 
7067                 *first 
= this-(n
-1); 
7068                 server
.vm_next_page 
= this+1; 
7072             /* The current one is not a free page */ 
7076         /* Fast-forward if the current page is not free and we already 
7077          * searched enough near this place. */ 
7079         if (!numfree 
&& since_jump 
>= REDIS_VM_MAX_RANDOM_JUMP
/4) { 
7080             offset 
+= random() % REDIS_VM_MAX_RANDOM_JUMP
; 
7082             /* Note that even if we rewind after the jump, we are don't need 
7083              * to make sure numfree is set to zero as we only jump *if* it 
7084              * is set to zero. */ 
7086             /* Otherwise just check the next page */ 
7093 /* Swap the 'val' object relative to 'key' into disk. Store all the information 
7094  * needed to later retrieve the object into the key object. 
7095  * If we can't find enough contiguous empty pages to swap the object on disk 
7096  * REDIS_ERR is returned. */ 
7097 static int vmSwapObjectBlocking(robj 
*key
, robj 
*val
) { 
7098     off_t pages 
= rdbSavedObjectPages(val
,NULL
); 
7101     assert(key
->storage 
== REDIS_VM_MEMORY
); 
7102     assert(key
->refcount 
== 1); 
7103     if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
; 
7104     if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) { 
7105         redisLog(REDIS_WARNING
, 
7106             "Critical VM problem in vmSwapObjectBlocking(): can't seek: %s", 
7110     rdbSaveObject(server
.vm_fp
,val
); 
7111     key
->vm
.page 
= page
; 
7112     key
->vm
.usedpages 
= pages
; 
7113     key
->storage 
= REDIS_VM_SWAPPED
; 
7114     key
->vtype 
= val
->type
; 
7115     decrRefCount(val
); /* Deallocate the object from memory. */ 
7116     vmMarkPagesUsed(page
,pages
); 
7117     redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)", 
7118         (unsigned char*) key
->ptr
, 
7119         (unsigned long long) page
, (unsigned long long) pages
); 
7120     server
.vm_stats_swapped_objects
++; 
7121     server
.vm_stats_swapouts
++; 
7122     fflush(server
.vm_fp
); 
7126 /* Load the value object relative to the 'key' object from swap to memory. 
7127  * The newly allocated object is returned. 
7129  * If preview is true the unserialized object is returned to the caller but 
7130  * no changes are made to the key object, nor the pages are marked as freed */ 
7131 static robj 
*vmGenericLoadObject(robj 
*key
, int preview
) { 
7134     redisAssert(key
->storage 
== REDIS_VM_SWAPPED
); 
7135     if (fseeko(server
.vm_fp
,key
->vm
.page
*server
.vm_page_size
,SEEK_SET
) == -1) { 
7136         redisLog(REDIS_WARNING
, 
7137             "Unrecoverable VM problem in vmLoadObject(): can't seek: %s", 
7141     val 
= rdbLoadObject(key
->vtype
,server
.vm_fp
); 
7143         redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmLoadObject(): can't load object from swap file: %s", strerror(errno
)); 
7147         key
->storage 
= REDIS_VM_MEMORY
; 
7148         key
->vm
.atime 
= server
.unixtime
; 
7149         vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
); 
7150         redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk", 
7151             (unsigned char*) key
->ptr
); 
7152         server
.vm_stats_swapped_objects
--; 
7154         redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk", 
7155             (unsigned char*) key
->ptr
); 
7157     server
.vm_stats_swapins
++; 
7161 /* Plain object loading, from swap to memory */ 
7162 static robj 
*vmLoadObject(robj 
*key
) { 
7163     /* If we are loading the object in background, stop it, we 
7164      * need to load this object synchronously ASAP. */ 
7165     if (key
->storage 
== REDIS_VM_LOADING
) 
7166         vmCancelThreadedIOJob(key
); 
7167     return vmGenericLoadObject(key
,0); 
7170 /* Just load the value on disk, without to modify the key. 
7171  * This is useful when we want to perform some operation on the value 
7172  * without to really bring it from swap to memory, like while saving the 
7173  * dataset or rewriting the append only log. */ 
7174 static robj 
*vmPreviewObject(robj 
*key
) { 
7175     return vmGenericLoadObject(key
,1); 
7178 /* How a good candidate is this object for swapping? 
7179  * The better candidate it is, the greater the returned value. 
7181  * Currently we try to perform a fast estimation of the object size in 
7182  * memory, and combine it with aging informations. 
7184  * Basically swappability = idle-time * log(estimated size) 
7186  * Bigger objects are preferred over smaller objects, but not 
7187  * proportionally, this is why we use the logarithm. This algorithm is 
7188  * just a first try and will probably be tuned later. */ 
7189 static double computeObjectSwappability(robj 
*o
) { 
7190     time_t age 
= server
.unixtime 
- o
->vm
.atime
; 
7194     struct dictEntry 
*de
; 
7197     if (age 
<= 0) return 0; 
7200         if (o
->encoding 
!= REDIS_ENCODING_RAW
) { 
7203             asize 
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2; 
7208         listNode 
*ln 
= listFirst(l
); 
7210         asize 
= sizeof(list
); 
7212             robj 
*ele 
= ln
->value
; 
7215             elesize 
= (ele
->encoding 
== REDIS_ENCODING_RAW
) ? 
7216                             (sizeof(*o
)+sdslen(ele
->ptr
)) : 
7218             asize 
+= (sizeof(listNode
)+elesize
)*listLength(l
); 
7223         z 
= (o
->type 
== REDIS_ZSET
); 
7224         d 
= z 
? ((zset
*)o
->ptr
)->dict 
: o
->ptr
; 
7226         asize 
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
)); 
7227         if (z
) asize 
+= sizeof(zset
)-sizeof(dict
); 
7232             de 
= dictGetRandomKey(d
); 
7233             ele 
= dictGetEntryKey(de
); 
7234             elesize 
= (ele
->encoding 
== REDIS_ENCODING_RAW
) ? 
7235                             (sizeof(*o
)+sdslen(ele
->ptr
)) : 
7237             asize 
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
); 
7238             if (z
) asize 
+= sizeof(zskiplistNode
)*dictSize(d
); 
7242     return (double)asize
*log(1+asize
); 
7245 /* Try to swap an object that's a good candidate for swapping. 
7246  * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible 
7247  * to swap any object at all. 
7249  * If 'usethreaded' is true, Redis will try to swap the object in background 
7250  * using I/O threads. */ 
7251 static int vmSwapOneObject(int usethreads
) { 
7253     struct dictEntry 
*best 
= NULL
; 
7254     double best_swappability 
= 0; 
7255     redisDb 
*best_db 
= NULL
; 
7258     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
7259         redisDb 
*db 
= server
.db
+j
; 
7260         int maxtries 
= 1000; 
7262         if (dictSize(db
->dict
) == 0) continue; 
7263         for (i 
= 0; i 
< 5; i
++) { 
7265             double swappability
; 
7267             if (maxtries
) maxtries
--; 
7268             de 
= dictGetRandomKey(db
->dict
); 
7269             key 
= dictGetEntryKey(de
); 
7270             val 
= dictGetEntryVal(de
); 
7271             if (key
->storage 
!= REDIS_VM_MEMORY
) { 
7272                 if (maxtries
) i
--; /* don't count this try */ 
7275             swappability 
= computeObjectSwappability(val
); 
7276             if (!best 
|| swappability 
> best_swappability
) { 
7278                 best_swappability 
= swappability
; 
7284         redisLog(REDIS_DEBUG
,"No swappable key found!"); 
7287     key 
= dictGetEntryKey(best
); 
7288     val 
= dictGetEntryVal(best
); 
7290     redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f", 
7291         key
->ptr
, best_swappability
); 
7293     /* Unshare the key if needed */ 
7294     if (key
->refcount 
> 1) { 
7295         robj 
*newkey 
= dupStringObject(key
); 
7297         key 
= dictGetEntryKey(best
) = newkey
; 
7301         vmSwapObjectThreaded(key
,val
,best_db
); 
7304         if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) { 
7305             dictGetEntryVal(best
) = NULL
; 
7313 static int vmSwapOneObjectBlocking() { 
7314     return vmSwapOneObject(0); 
7317 static int vmSwapOneObjectThreaded() { 
7318     return vmSwapOneObject(1); 
7321 /* Return true if it's safe to swap out objects in a given moment. 
7322  * Basically we don't want to swap objects out while there is a BGSAVE 
7323  * or a BGAEOREWRITE running in backgroud. */ 
7324 static int vmCanSwapOut(void) { 
7325     return (server
.bgsavechildpid 
== -1 && server
.bgrewritechildpid 
== -1); 
7328 /* Delete a key if swapped. Returns 1 if the key was found, was swapped 
7329  * and was deleted. Otherwise 0 is returned. */ 
7330 static int deleteIfSwapped(redisDb 
*db
, robj 
*key
) { 
7334     if ((de 
= dictFind(db
->dict
,key
)) == NULL
) return 0; 
7335     foundkey 
= dictGetEntryKey(de
); 
7336     if (foundkey
->storage 
== REDIS_VM_MEMORY
) return 0; 
7341 /* =================== Virtual Memory - Threaded I/O  ======================= */ 
7343 static void freeIOJob(iojob 
*j
) { 
7344     if (j
->type 
== REDIS_IOJOB_PREPARE_SWAP 
|| 
7345         j
->type 
== REDIS_IOJOB_DO_SWAP
) 
7346         decrRefCount(j
->val
); 
7347     decrRefCount(j
->key
); 
7351 /* Every time a thread finished a Job, it writes a byte into the write side 
7352  * of an unix pipe in order to "awake" the main thread, and this function 
7354 static void vmThreadedIOCompletedJob(aeEventLoop 
*el
, int fd
, void *privdata
, 
7360     REDIS_NOTUSED(mask
); 
7361     REDIS_NOTUSED(privdata
); 
7363     /* For every byte we read in the read side of the pipe, there is one 
7364      * I/O job completed to process. */ 
7365     while((retval 
= read(fd
,buf
,1)) == 1) { 
7369         struct dictEntry 
*de
; 
7371         redisLog(REDIS_DEBUG
,"Processing I/O completed job"); 
7372         assert(listLength(server
.io_processed
) != 0); 
7374         /* Get the processed element (the oldest one) */ 
7376         ln 
= listFirst(server
.io_processed
); 
7378         listDelNode(server
.io_processed
,ln
); 
7380         /* If this job is marked as canceled, just ignore it */ 
7385         /* Post process it in the main thread, as there are things we 
7386          * can do just here to avoid race conditions and/or invasive locks */ 
7387         redisLog(REDIS_DEBUG
,"Job type: %d, key at %p (%s) refcount: %d\n", j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
); 
7388         if (j
->key
->refcount 
<= 0) { 
7389             printf("Ooops ref count is <= 0!\n"); 
7392         de 
= dictFind(j
->db
->dict
,j
->key
); 
7394         key 
= dictGetEntryKey(de
); 
7395         if (j
->type 
== REDIS_IOJOB_LOAD
) { 
7396             /* Key loaded, bring it at home */ 
7397             key
->storage 
= REDIS_VM_MEMORY
; 
7398             key
->vm
.atime 
= server
.unixtime
; 
7399             vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
); 
7400             redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)", 
7401                 (unsigned char*) key
->ptr
); 
7402             server
.vm_stats_swapped_objects
--; 
7403             server
.vm_stats_swapins
++; 
7405         } else if (j
->type 
== REDIS_IOJOB_PREPARE_SWAP
) { 
7406             /* Now we know the amount of pages required to swap this object. 
7407              * Let's find some space for it, and queue this task again 
7408              * rebranded as REDIS_IOJOB_DO_SWAP. */ 
7409             if (vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
) { 
7410                 /* Ooops... no space! */ 
7413                 j
->type 
= REDIS_IOJOB_DO_SWAP
; 
7418         } else if (j
->type 
== REDIS_IOJOB_DO_SWAP
) { 
7421             /* Key swapped. We can finally free some memory. */ 
7422             val 
= dictGetEntryVal(de
); 
7423             key
->vm
.page 
= j
->page
; 
7424             key
->vm
.usedpages 
= j
->pages
; 
7425             key
->storage 
= REDIS_VM_SWAPPED
; 
7426             key
->vtype 
= j
->val
->type
; 
7427             decrRefCount(val
); /* Deallocate the object from memory. */ 
7428             dictGetEntryVal(de
) = NULL
; 
7429             vmMarkPagesUsed(j
->page
,j
->pages
); 
7430             redisLog(REDIS_DEBUG
, 
7431                 "VM: object %s swapped out at %lld (%lld pages) (threaded)", 
7432                 (unsigned char*) key
->ptr
, 
7433                 (unsigned long long) j
->page
, (unsigned long long) j
->pages
); 
7434             server
.vm_stats_swapped_objects
++; 
7435             server
.vm_stats_swapouts
++; 
7437             /* Put a few more swap requests in queue if we are still 
7439             if (zmalloc_used_memory() > server
.vm_max_memory
) { 
7443                     more 
= listLength(server
.io_newjobs
) < 
7444                             (unsigned) server
.vm_max_threads
; 
7446                     /* Don't waste CPU time if swappable objects are rare. */ 
7447                     if (vmSwapOneObjectThreaded() == REDIS_ERR
) break; 
7452     if (retval 
< 0 && errno 
!= EAGAIN
) { 
7453         redisLog(REDIS_WARNING
, 
7454             "WARNING: read(2) error in vmThreadedIOCompletedJob() %s", 
7459 static void lockThreadedIO(void) { 
7460     pthread_mutex_lock(&server
.io_mutex
); 
7463 static void unlockThreadedIO(void) { 
7464     pthread_mutex_unlock(&server
.io_mutex
); 
7467 /* Remove the specified object from the threaded I/O queue if still not 
7468  * processed, otherwise make sure to flag it as canceled. */ 
7469 static void vmCancelThreadedIOJob(robj 
*o
) { 
7471         server
.io_newjobs
, server
.io_processing
, server
.io_processed
 
7475     assert(o
->storage 
== REDIS_VM_LOADING 
|| o
->storage 
== REDIS_VM_SWAPPING
); 
7477     /* Search for a matching key in one of the queues */ 
7478     for (i 
= 0; i 
< 3; i
++) { 
7481         listRewind(lists
[i
]); 
7482         while ((ln 
= listYield(lists
[i
])) != NULL
) { 
7483             iojob 
*job 
= ln
->value
; 
7485             if (compareStringObjects(job
->key
,o
) == 0) { 
7487                 case 0: /* io_newjobs */ 
7488                     /* If the job was not yet processed the best thing to do 
7489                      * is to remove it from the queue at all */ 
7490                     decrRefCount(job
->key
); 
7491                     if (job
->type 
== REDIS_IOJOB_PREPARE_SWAP 
|| 
7492                         job
->type 
== REDIS_IOJOB_DO_SWAP
) 
7493                         decrRefCount(job
->val
); 
7494                     listDelNode(lists
[i
],ln
); 
7497                 case 1: /* io_processing */ 
7498                 case 2: /* io_processed */ 
7502                 if (o
->storage 
== REDIS_VM_LOADING
) 
7503                     o
->storage 
= REDIS_VM_SWAPPED
; 
7504                 else if (o
->storage 
== REDIS_VM_SWAPPING
) 
7505                     o
->storage 
= REDIS_VM_MEMORY
; 
7512     assert(1 != 1); /* We should never reach this */ 
7515 static void *IOThreadEntryPoint(void *arg
) { 
7520     pthread_detach(pthread_self()); 
7522         /* Get a new job to process */ 
7524         if (listLength(server
.io_newjobs
) == 0) { 
7525             /* No new jobs in queue, exit. */ 
7526             printf("Thread %lld exiting, nothing to do\n", 
7527                 (long long) pthread_self()); 
7528             server
.io_active_threads
--; 
7532         ln 
= listFirst(server
.io_newjobs
); 
7534         listDelNode(server
.io_newjobs
,ln
); 
7535         /* Add the job in the processing queue */ 
7536         j
->thread 
= pthread_self(); 
7537         listAddNodeTail(server
.io_processing
,j
); 
7538         ln 
= listLast(server
.io_processing
); /* We use ln later to remove it */ 
7540         printf("Thread %lld got a new job: %p about key '%s'\n", 
7541             (long long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
); 
7543         /* Process the Job */ 
7544         if (j
->type 
== REDIS_IOJOB_LOAD
) { 
7545         } else if (j
->type 
== REDIS_IOJOB_PREPARE_SWAP
) { 
7546             FILE *fp 
= fopen("/dev/null","w+"); 
7547             j
->pages 
= rdbSavedObjectPages(j
->val
,fp
); 
7549         } else if (j
->type 
== REDIS_IOJOB_DO_SWAP
) { 
7552         /* Done: insert the job into the processed queue */ 
7553         printf("Thread %lld completed the job: %p\n", 
7554             (long long) pthread_self(), (void*)j
); 
7556         listDelNode(server
.io_processing
,ln
); 
7557         listAddNodeTail(server
.io_processed
,j
); 
7560         /* Signal the main thread there is new stuff to process */ 
7561         assert(write(server
.io_ready_pipe_write
,"x",1) == 1); 
7563     return NULL
; /* never reached */ 
7566 static void spawnIOThread(void) { 
7569     pthread_create(&thread
,NULL
,IOThreadEntryPoint
,NULL
); 
7570     server
.io_active_threads
++; 
7573 /* This function must be called while with threaded IO locked */ 
7574 static void queueIOJob(iojob 
*j
) { 
7575     listAddNodeTail(server
.io_newjobs
,j
); 
7576     if (server
.io_active_threads 
< server
.vm_max_threads
) 
7580 static int vmSwapObjectThreaded(robj 
*key
, robj 
*val
, redisDb 
*db
) { 
7583     assert(key
->storage 
== REDIS_VM_MEMORY
); 
7584     assert(key
->refcount 
== 1); 
7586     j 
= zmalloc(sizeof(*j
)); 
7587     j
->type 
= REDIS_IOJOB_PREPARE_SWAP
; 
7589     j
->key 
= dupStringObject(key
); 
7593     j
->thread 
= (pthread_t
) -1; 
7594     key
->storage 
= REDIS_VM_SWAPPING
; 
7602 /* ================================= Debugging ============================== */ 
7604 static void debugCommand(redisClient 
*c
) { 
7605     if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) { 
7607     } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) { 
7608         if (rdbSave(server
.dbfilename
) != REDIS_OK
) { 
7609             addReply(c
,shared
.err
); 
7613         if (rdbLoad(server
.dbfilename
) != REDIS_OK
) { 
7614             addReply(c
,shared
.err
); 
7617         redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD"); 
7618         addReply(c
,shared
.ok
); 
7619     } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) { 
7621         if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) { 
7622             addReply(c
,shared
.err
); 
7625         redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF"); 
7626         addReply(c
,shared
.ok
); 
7627     } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc 
== 3) { 
7628         dictEntry 
*de 
= dictFind(c
->db
->dict
,c
->argv
[2]); 
7632             addReply(c
,shared
.nokeyerr
); 
7635         key 
= dictGetEntryKey(de
); 
7636         val 
= dictGetEntryVal(de
); 
7637         if (server
.vm_enabled 
&& (key
->storage 
== REDIS_VM_MEMORY 
|| 
7638                                   key
->storage 
== REDIS_VM_SWAPPING
)) { 
7639             addReplySds(c
,sdscatprintf(sdsempty(), 
7640                 "+Key at:%p refcount:%d, value at:%p refcount:%d " 
7641                 "encoding:%d serializedlength:%lld\r\n", 
7642                 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
, 
7643                 val
->encoding
, rdbSavedObjectLen(val
,NULL
))); 
7645             addReplySds(c
,sdscatprintf(sdsempty(), 
7646                 "+Key at:%p refcount:%d, value swapped at: page %llu " 
7647                 "using %llu pages\r\n", 
7648                 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
, 
7649                 (unsigned long long) key
->vm
.usedpages
)); 
7651     } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc 
== 3) { 
7652         dictEntry 
*de 
= dictFind(c
->db
->dict
,c
->argv
[2]); 
7655         if (!server
.vm_enabled
) { 
7656             addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n")); 
7660             addReply(c
,shared
.nokeyerr
); 
7663         key 
= dictGetEntryKey(de
); 
7664         val 
= dictGetEntryVal(de
); 
7665         /* If the key is shared we want to create a copy */ 
7666         if (key
->refcount 
> 1) { 
7667             robj 
*newkey 
= dupStringObject(key
); 
7669             key 
= dictGetEntryKey(de
) = newkey
; 
7672         if (key
->storage 
!= REDIS_VM_MEMORY
) { 
7673             addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n")); 
7674         } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) { 
7675             dictGetEntryVal(de
) = NULL
; 
7676             addReply(c
,shared
.ok
); 
7678             addReply(c
,shared
.err
); 
7681         addReplySds(c
,sdsnew( 
7682             "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPOUT <key>|RELOAD]\r\n")); 
7686 static void _redisAssert(char *estr
) { 
7687     redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ==="); 
7688     redisLog(REDIS_WARNING
,"==> %s\n",estr
); 
7689 #ifdef HAVE_BACKTRACE 
7690     redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)"); 
7695 /* =================================== Main! ================================ */ 
7698 int linuxOvercommitMemoryValue(void) { 
7699     FILE *fp 
= fopen("/proc/sys/vm/overcommit_memory","r"); 
7703     if (fgets(buf
,64,fp
) == NULL
) { 
7712 void linuxOvercommitMemoryWarning(void) { 
7713     if (linuxOvercommitMemoryValue() == 0) { 
7714         redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low condition memory. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect."); 
7717 #endif /* __linux__ */ 
7719 static void daemonize(void) { 
7723     if (fork() != 0) exit(0); /* parent exits */ 
7724     printf("New pid: %d\n", getpid()); 
7725     setsid(); /* create a new session */ 
7727     /* Every output goes to /dev/null. If Redis is daemonized but 
7728      * the 'logfile' is set to 'stdout' in the configuration file 
7729      * it will not log at all. */ 
7730     if ((fd 
= open("/dev/null", O_RDWR
, 0)) != -1) { 
7731         dup2(fd
, STDIN_FILENO
); 
7732         dup2(fd
, STDOUT_FILENO
); 
7733         dup2(fd
, STDERR_FILENO
); 
7734         if (fd 
> STDERR_FILENO
) close(fd
); 
7736     /* Try to write the pid file */ 
7737     fp 
= fopen(server
.pidfile
,"w"); 
7739         fprintf(fp
,"%d\n",getpid()); 
7744 int main(int argc
, char **argv
) { 
7747         resetServerSaveParams(); 
7748         loadServerConfig(argv
[1]); 
7749     } else if (argc 
> 2) { 
7750         fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n"); 
7753         redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'"); 
7755     if (server
.daemonize
) daemonize(); 
7757     redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
); 
7759     linuxOvercommitMemoryWarning(); 
7761     if (server
.appendonly
) { 
7762         if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
) 
7763             redisLog(REDIS_NOTICE
,"DB loaded from append only file"); 
7765         if (rdbLoad(server
.dbfilename
) == REDIS_OK
) 
7766             redisLog(REDIS_NOTICE
,"DB loaded from disk"); 
7768     redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
); 
7770     aeDeleteEventLoop(server
.el
); 
7774 /* ============================= Backtrace support ========================= */ 
7776 #ifdef HAVE_BACKTRACE 
7777 static char *findFuncName(void *pointer
, unsigned long *offset
); 
7779 static void *getMcontextEip(ucontext_t 
*uc
) { 
7780 #if defined(__FreeBSD__) 
7781     return (void*) uc
->uc_mcontext
.mc_eip
; 
7782 #elif defined(__dietlibc__) 
7783     return (void*) uc
->uc_mcontext
.eip
; 
7784 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6) 
7786     return (void*) uc
->uc_mcontext
->__ss
.__rip
; 
7788     return (void*) uc
->uc_mcontext
->__ss
.__eip
; 
7790 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6) 
7791   #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__) 
7792     return (void*) uc
->uc_mcontext
->__ss
.__rip
; 
7794     return (void*) uc
->uc_mcontext
->__ss
.__eip
; 
7796 #elif defined(__i386__) || defined(__X86_64__)  || defined(__x86_64__) 
7797     return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */ 
7798 #elif defined(__ia64__) /* Linux IA64 */ 
7799     return (void*) uc
->uc_mcontext
.sc_ip
; 
7805 static void segvHandler(int sig
, siginfo_t 
*info
, void *secret
) { 
7807     char **messages 
= NULL
; 
7808     int i
, trace_size 
= 0; 
7809     unsigned long offset
=0; 
7810     ucontext_t 
*uc 
= (ucontext_t
*) secret
; 
7812     REDIS_NOTUSED(info
); 
7814     redisLog(REDIS_WARNING
, 
7815         "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
); 
7816     infostring 
= genRedisInfoString(); 
7817     redisLog(REDIS_WARNING
, "%s",infostring
); 
7818     /* It's not safe to sdsfree() the returned string under memory 
7819      * corruption conditions. Let it leak as we are going to abort */ 
7821     trace_size 
= backtrace(trace
, 100); 
7822     /* overwrite sigaction with caller's address */ 
7823     if (getMcontextEip(uc
) != NULL
) { 
7824         trace
[1] = getMcontextEip(uc
); 
7826     messages 
= backtrace_symbols(trace
, trace_size
); 
7828     for (i
=1; i
<trace_size
; ++i
) { 
7829         char *fn 
= findFuncName(trace
[i
], &offset
), *p
; 
7831         p 
= strchr(messages
[i
],'+'); 
7832         if (!fn 
|| (p 
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) { 
7833             redisLog(REDIS_WARNING
,"%s", messages
[i
]); 
7835             redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
); 
7838     /* free(messages); Don't call free() with possibly corrupted memory. */ 
7842 static void setupSigSegvAction(void) { 
7843     struct sigaction act
; 
7845     sigemptyset (&act
.sa_mask
); 
7846     /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction 
7847      * is used. Otherwise, sa_handler is used */ 
7848     act
.sa_flags 
= SA_NODEFER 
| SA_ONSTACK 
| SA_RESETHAND 
| SA_SIGINFO
; 
7849     act
.sa_sigaction 
= segvHandler
; 
7850     sigaction (SIGSEGV
, &act
, NULL
); 
7851     sigaction (SIGBUS
, &act
, NULL
); 
7852     sigaction (SIGFPE
, &act
, NULL
); 
7853     sigaction (SIGILL
, &act
, NULL
); 
7854     sigaction (SIGBUS
, &act
, NULL
); 
7858 #include "staticsymbols.h" 
7859 /* This function try to convert a pointer into a function name. It's used in 
7860  * oreder to provide a backtrace under segmentation fault that's able to 
7861  * display functions declared as static (otherwise the backtrace is useless). */ 
7862 static char *findFuncName(void *pointer
, unsigned long *offset
){ 
7864     unsigned long off
, minoff 
= 0; 
7866     /* Try to match against the Symbol with the smallest offset */ 
7867     for (i
=0; symsTable
[i
].pointer
; i
++) { 
7868         unsigned long lp 
= (unsigned long) pointer
; 
7870         if (lp 
!= (unsigned long)-1 && lp 
>= symsTable
[i
].pointer
) { 
7871             off
=lp
-symsTable
[i
].pointer
; 
7872             if (ret 
< 0 || off 
< minoff
) { 
7878     if (ret 
== -1) return NULL
; 
7880     return symsTable
[ret
].name
; 
7882 #else /* HAVE_BACKTRACE */ 
7883 static void setupSigSegvAction(void) { 
7885 #endif /* HAVE_BACKTRACE */