2  * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com> 
   5  * Redistribution and use in source and binary forms, with or without 
   6  * modification, are permitted provided that the following conditions are met: 
   8  *   * Redistributions of source code must retain the above copyright notice, 
   9  *     this list of conditions and the following disclaimer. 
  10  *   * Redistributions in binary form must reproduce the above copyright 
  11  *     notice, this list of conditions and the following disclaimer in the 
  12  *     documentation and/or other materials provided with the distribution. 
  13  *   * Neither the name of Redis nor the names of its contributors may be used 
  14  *     to endorse or promote products derived from this software without 
  15  *     specific prior written permission. 
  17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
  18  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  20  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
  21  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
  22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
  23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
  24  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
  25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
  26  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
  27  * POSSIBILITY OF SUCH DAMAGE. 
  30 #define REDIS_VERSION "1.3.4" 
  40 #define __USE_POSIX199309 
  47 #endif /* HAVE_BACKTRACE */ 
  55 #include <arpa/inet.h> 
  59 #include <sys/resource.h> 
  66 #include "solarisfixes.h" 
  70 #include "ae.h"     /* Event driven programming library */ 
  71 #include "sds.h"    /* Dynamic safe strings */ 
  72 #include "anet.h"   /* Networking the easy way */ 
  73 #include "dict.h"   /* Hash tables */ 
  74 #include "adlist.h" /* Linked lists */ 
  75 #include "zmalloc.h" /* total memory usage aware version of malloc/free */ 
  76 #include "lzf.h"    /* LZF compression library */ 
  77 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */ 
  84 /* Static server configuration */ 
  85 #define REDIS_SERVERPORT        6379    /* TCP port */ 
  86 #define REDIS_MAXIDLETIME       (60*5)  /* default client timeout */ 
  87 #define REDIS_IOBUF_LEN         1024 
  88 #define REDIS_LOADBUF_LEN       1024 
  89 #define REDIS_STATIC_ARGS       4 
  90 #define REDIS_DEFAULT_DBNUM     16 
  91 #define REDIS_CONFIGLINE_MAX    1024 
  92 #define REDIS_OBJFREELIST_MAX   1000000 /* Max number of objects to cache */ 
  93 #define REDIS_MAX_SYNC_TIME     60      /* Slave can't take more to sync */ 
  94 #define REDIS_EXPIRELOOKUPS_PER_CRON    100 /* try to expire 100 keys/second */ 
  95 #define REDIS_MAX_WRITE_PER_EVENT (1024*64) 
  96 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */ 
  98 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */ 
  99 #define REDIS_WRITEV_THRESHOLD      3 
 100 /* Max number of iovecs used for each writev call */ 
 101 #define REDIS_WRITEV_IOVEC_COUNT    256 
 103 /* Hash table parameters */ 
 104 #define REDIS_HT_MINFILL        10      /* Minimal hash table fill 10% */ 
 107 #define REDIS_CMD_BULK          1       /* Bulk write command */ 
 108 #define REDIS_CMD_INLINE        2       /* Inline command */ 
 109 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with 
 110    this flags will return an error when the 'maxmemory' option is set in the 
 111    config file and the server is using more than maxmemory bytes of memory. 
 112    In short this commands are denied on low memory conditions. */ 
 113 #define REDIS_CMD_DENYOOM       4 
 116 #define REDIS_STRING 0 
 122 /* Objects encoding. Some kind of objects like Strings and Hashes can be 
 123  * internally represented in multiple ways. The 'encoding' field of the object 
 124  * is set to one of this fields for this object. */ 
 125 #define REDIS_ENCODING_RAW 0    /* Raw representation */ 
 126 #define REDIS_ENCODING_INT 1    /* Encoded as integer */ 
 127 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */ 
 128 #define REDIS_ENCODING_HT 3     /* Encoded as an hash table */ 
 130 /* Object types only used for dumping to disk */ 
 131 #define REDIS_EXPIRETIME 253 
 132 #define REDIS_SELECTDB 254 
 133 #define REDIS_EOF 255 
 135 /* Defines related to the dump file format. To store 32 bits lengths for short 
 136  * keys requires a lot of space, so we check the most significant 2 bits of 
 137  * the first byte to interpreter the length: 
 139  * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte 
 140  * 01|000000 00000000 =>  01, the len is 14 byes, 6 bits + 8 bits of next byte 
 141  * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow 
 142  * 11|000000 this means: specially encoded object will follow. The six bits 
 143  *           number specify the kind of object that follows. 
 144  *           See the REDIS_RDB_ENC_* defines. 
 146  * Lenghts up to 63 are stored using a single byte, most DB keys, and may 
 147  * values, will fit inside. */ 
 148 #define REDIS_RDB_6BITLEN 0 
 149 #define REDIS_RDB_14BITLEN 1 
 150 #define REDIS_RDB_32BITLEN 2 
 151 #define REDIS_RDB_ENCVAL 3 
 152 #define REDIS_RDB_LENERR UINT_MAX 
 154 /* When a length of a string object stored on disk has the first two bits 
 155  * set, the remaining two bits specify a special encoding for the object 
 156  * accordingly to the following defines: */ 
 157 #define REDIS_RDB_ENC_INT8 0        /* 8 bit signed integer */ 
 158 #define REDIS_RDB_ENC_INT16 1       /* 16 bit signed integer */ 
 159 #define REDIS_RDB_ENC_INT32 2       /* 32 bit signed integer */ 
 160 #define REDIS_RDB_ENC_LZF 3         /* string compressed with FASTLZ */ 
 162 /* Virtual memory object->where field. */ 
 163 #define REDIS_VM_MEMORY 0       /* The object is on memory */ 
 164 #define REDIS_VM_SWAPPED 1      /* The object is on disk */ 
 165 #define REDIS_VM_SWAPPING 2     /* Redis is swapping this object on disk */ 
 166 #define REDIS_VM_LOADING 3      /* Redis is loading this object from disk */ 
 168 /* Virtual memory static configuration stuff. 
 169  * Check vmFindContiguousPages() to know more about this magic numbers. */ 
 170 #define REDIS_VM_MAX_NEAR_PAGES 65536 
 171 #define REDIS_VM_MAX_RANDOM_JUMP 4096 
 172 #define REDIS_VM_MAX_THREADS 32 
 173 #define REDIS_THREAD_STACK_SIZE (1024*1024*4) 
 174 /* The following is the *percentage* of completed I/O jobs to process when the 
 175  * handelr is called. While Virtual Memory I/O operations are performed by 
 176  * threads, this operations must be processed by the main thread when completed 
 177  * in order to take effect. */ 
 178 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1 
 181 #define REDIS_SLAVE 1       /* This client is a slave server */ 
 182 #define REDIS_MASTER 2      /* This client is a master server */ 
 183 #define REDIS_MONITOR 4     /* This client is a slave monitor, see MONITOR */ 
 184 #define REDIS_MULTI 8       /* This client is in a MULTI context */ 
 185 #define REDIS_BLOCKED 16    /* The client is waiting in a blocking operation */ 
 186 #define REDIS_IO_WAIT 32    /* The client is waiting for Virtual Memory I/O */ 
 188 /* Slave replication state - slave side */ 
 189 #define REDIS_REPL_NONE 0   /* No active replication */ 
 190 #define REDIS_REPL_CONNECT 1    /* Must connect to master */ 
 191 #define REDIS_REPL_CONNECTED 2  /* Connected to master */ 
 193 /* Slave replication state - from the point of view of master 
 194  * Note that in SEND_BULK and ONLINE state the slave receives new updates 
 195  * in its output queue. In the WAIT_BGSAVE state instead the server is waiting 
 196  * to start the next background saving in order to send updates to it. */ 
 197 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */ 
 198 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */ 
 199 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */ 
 200 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */ 
 202 /* List related stuff */ 
 206 /* Sort operations */ 
 207 #define REDIS_SORT_GET 0 
 208 #define REDIS_SORT_ASC 1 
 209 #define REDIS_SORT_DESC 2 
 210 #define REDIS_SORTKEY_MAX 1024 
 213 #define REDIS_DEBUG 0 
 214 #define REDIS_VERBOSE 1 
 215 #define REDIS_NOTICE 2 
 216 #define REDIS_WARNING 3 
 218 /* Anti-warning macro... */ 
 219 #define REDIS_NOTUSED(V) ((void) V) 
 221 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */ 
 222 #define ZSKIPLIST_P 0.25      /* Skiplist P = 1/4 */ 
 224 /* Append only defines */ 
 225 #define APPENDFSYNC_NO 0 
 226 #define APPENDFSYNC_ALWAYS 1 
 227 #define APPENDFSYNC_EVERYSEC 2 
 229 /* Hashes related defaults */ 
 230 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64 
 231 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512 
 233 /* We can print the stacktrace, so our assert is defined this way: */ 
 234 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1))) 
 235 static void _redisAssert(char *estr
, char *file
, int line
); 
 237 /*================================= Data types ============================== */ 
 239 /* A redis object, that is a type able to hold a string / list / set */ 
 241 /* The VM object structure */ 
 242 struct redisObjectVM 
{ 
 243     off_t page
;         /* the page at witch the object is stored on disk */ 
 244     off_t usedpages
;    /* number of pages used on disk */ 
 245     time_t atime
;       /* Last access time */ 
 248 /* The actual Redis Object */ 
 249 typedef struct redisObject 
{ 
 252     unsigned char encoding
; 
 253     unsigned char storage
;  /* If this object is a key, where is the value? 
 254                              * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */ 
 255     unsigned char vtype
; /* If this object is a key, and value is swapped out, 
 256                           * this is the type of the swapped out object. */ 
 258     /* VM fields, this are only allocated if VM is active, otherwise the 
 259      * object allocation function will just allocate 
 260      * sizeof(redisObjct) minus sizeof(redisObjectVM), so using 
 261      * Redis without VM active will not have any overhead. */ 
 262     struct redisObjectVM vm
; 
 265 /* Macro used to initalize a Redis object allocated on the stack. 
 266  * Note that this macro is taken near the structure definition to make sure 
 267  * we'll update it when the structure is changed, to avoid bugs like 
 268  * bug #85 introduced exactly in this way. */ 
 269 #define initStaticStringObject(_var,_ptr) do { \ 
 271     _var.type = REDIS_STRING; \ 
 272     _var.encoding = REDIS_ENCODING_RAW; \ 
 274     if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \ 
 277 typedef struct redisDb 
{ 
 278     dict 
*dict
;                 /* The keyspace for this DB */ 
 279     dict 
*expires
;              /* Timeout of keys with a timeout set */ 
 280     dict 
*blockingkeys
;         /* Keys with clients waiting for data (BLPOP) */ 
 281     dict 
*io_keys
;              /* Keys with clients waiting for VM I/O */ 
 285 /* Client MULTI/EXEC state */ 
 286 typedef struct multiCmd 
{ 
 289     struct redisCommand 
*cmd
; 
 292 typedef struct multiState 
{ 
 293     multiCmd 
*commands
;     /* Array of MULTI commands */ 
 294     int count
;              /* Total number of MULTI commands */ 
 297 /* With multiplexing we need to take per-clinet state. 
 298  * Clients are taken in a liked list. */ 
 299 typedef struct redisClient 
{ 
 304     robj 
**argv
, **mbargv
; 
 306     int bulklen
;            /* bulk read len. -1 if not in bulk read mode */ 
 307     int multibulk
;          /* multi bulk command format active */ 
 310     time_t lastinteraction
; /* time of the last interaction, used for timeout */ 
 311     int flags
;              /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */ 
 312     int slaveseldb
;         /* slave selected db, if this client is a slave */ 
 313     int authenticated
;      /* when requirepass is non-NULL */ 
 314     int replstate
;          /* replication state if this is a slave */ 
 315     int repldbfd
;           /* replication DB file descriptor */ 
 316     long repldboff
;         /* replication DB file offset */ 
 317     off_t repldbsize
;       /* replication DB file size */ 
 318     multiState mstate
;      /* MULTI/EXEC state */ 
 319     robj 
**blockingkeys
;    /* The key we are waiting to terminate a blocking 
 320                              * operation such as BLPOP. Otherwise NULL. */ 
 321     int blockingkeysnum
;    /* Number of blocking keys */ 
 322     time_t blockingto
;      /* Blocking operation timeout. If UNIX current time 
 323                              * is >= blockingto then the operation timed out. */ 
 324     list 
*io_keys
;          /* Keys this client is waiting to be loaded from the 
 325                              * swap file in order to continue. */ 
 333 /* Global server state structure */ 
 338     dict 
*sharingpool
;          /* Poll used for object sharing */ 
 339     unsigned int sharingpoolsize
; 
 340     long long dirty
;            /* changes to DB from the last save */ 
 342     list 
*slaves
, *monitors
; 
 343     char neterr
[ANET_ERR_LEN
]; 
 345     int cronloops
;              /* number of times the cron function run */ 
 346     list 
*objfreelist
;          /* A list of freed objects to avoid malloc() */ 
 347     time_t lastsave
;            /* Unix time of last save succeeede */ 
 348     /* Fields used only for stats */ 
 349     time_t stat_starttime
;         /* server start time */ 
 350     long long stat_numcommands
;    /* number of processed commands */ 
 351     long long stat_numconnections
; /* number of connections received */ 
 364     pid_t bgsavechildpid
; 
 365     pid_t bgrewritechildpid
; 
 366     sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */ 
 367     struct saveparam 
*saveparams
; 
 372     char *appendfilename
; 
 376     /* Replication related */ 
 381     redisClient 
*master
;    /* client that is master for this slave */ 
 383     unsigned int maxclients
; 
 384     unsigned long long maxmemory
; 
 385     unsigned int blpop_blocked_clients
; 
 386     unsigned int vm_blocked_clients
; 
 387     /* Sort parameters - qsort_r() is only available under BSD so we 
 388      * have to take this state global, in order to pass it to sortCompare() */ 
 392     /* Virtual memory configuration */ 
 397     unsigned long long vm_max_memory
; 
 399     size_t hash_max_zipmap_entries
; 
 400     size_t hash_max_zipmap_value
; 
 401     /* Virtual memory state */ 
 404     off_t vm_next_page
; /* Next probably empty page */ 
 405     off_t vm_near_pages
; /* Number of pages allocated sequentially */ 
 406     unsigned char *vm_bitmap
; /* Bitmap of free/used pages */ 
 407     time_t unixtime
;    /* Unix time sampled every second. */ 
 408     /* Virtual memory I/O threads stuff */ 
 409     /* An I/O thread process an element taken from the io_jobs queue and 
 410      * put the result of the operation in the io_done list. While the 
 411      * job is being processed, it's put on io_processing queue. */ 
 412     list 
*io_newjobs
; /* List of VM I/O jobs yet to be processed */ 
 413     list 
*io_processing
; /* List of VM I/O jobs being processed */ 
 414     list 
*io_processed
; /* List of VM I/O jobs already processed */ 
 415     list 
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */ 
 416     pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */ 
 417     pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */ 
 418     pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */ 
 419     pthread_attr_t io_threads_attr
; /* attributes for threads creation */ 
 420     int io_active_threads
; /* Number of running I/O threads */ 
 421     int vm_max_threads
; /* Max number of I/O threads running at the same time */ 
 422     /* Our main thread is blocked on the event loop, locking for sockets ready 
 423      * to be read or written, so when a threaded I/O operation is ready to be 
 424      * processed by the main thread, the I/O thread will use a unix pipe to 
 425      * awake the main thread. The followings are the two pipe FDs. */ 
 426     int io_ready_pipe_read
; 
 427     int io_ready_pipe_write
; 
 428     /* Virtual memory stats */ 
 429     unsigned long long vm_stats_used_pages
; 
 430     unsigned long long vm_stats_swapped_objects
; 
 431     unsigned long long vm_stats_swapouts
; 
 432     unsigned long long vm_stats_swapins
; 
 436 typedef void redisCommandProc(redisClient 
*c
); 
 437 struct redisCommand 
{ 
 439     redisCommandProc 
*proc
; 
 442     /* What keys should be loaded in background when calling this command? */ 
 443     int vm_firstkey
; /* The first argument that's a key (0 = no keys) */ 
 444     int vm_lastkey
;  /* THe last argument that's a key */ 
 445     int vm_keystep
;  /* The step between first and last key */ 
 448 struct redisFunctionSym 
{ 
 450     unsigned long pointer
; 
 453 typedef struct _redisSortObject 
{ 
 461 typedef struct _redisSortOperation 
{ 
 464 } redisSortOperation
; 
 466 /* ZSETs use a specialized version of Skiplists */ 
 468 typedef struct zskiplistNode 
{ 
 469     struct zskiplistNode 
**forward
; 
 470     struct zskiplistNode 
*backward
; 
 476 typedef struct zskiplist 
{ 
 477     struct zskiplistNode 
*header
, *tail
; 
 478     unsigned long length
; 
 482 typedef struct zset 
{ 
 487 /* Our shared "common" objects */ 
 489 struct sharedObjectsStruct 
{ 
 490     robj 
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
, 
 491     *colon
, *nullbulk
, *nullmultibulk
, *queued
, 
 492     *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
, 
 493     *outofrangeerr
, *plus
, 
 494     *select0
, *select1
, *select2
, *select3
, *select4
, 
 495     *select5
, *select6
, *select7
, *select8
, *select9
; 
 498 /* Global vars that are actally used as constants. The following double 
 499  * values are used for double on-disk serialization, and are initialized 
 500  * at runtime to avoid strange compiler optimizations. */ 
 502 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
; 
 504 /* VM threaded I/O request message */ 
 505 #define REDIS_IOJOB_LOAD 0          /* Load from disk to memory */ 
 506 #define REDIS_IOJOB_PREPARE_SWAP 1  /* Compute needed pages */ 
 507 #define REDIS_IOJOB_DO_SWAP 2       /* Swap from memory to disk */ 
 508 typedef struct iojob 
{ 
 509     int type
;   /* Request type, REDIS_IOJOB_* */ 
 510     redisDb 
*db
;/* Redis database */ 
 511     robj 
*key
;  /* This I/O request is about swapping this key */ 
 512     robj 
*val
;  /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this 
 513                  * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */ 
 514     off_t page
; /* Swap page where to read/write the object */ 
 515     off_t pages
; /* Swap pages needed to safe object. PREPARE_SWAP return val */ 
 516     int canceled
; /* True if this command was canceled by blocking side of VM */ 
 517     pthread_t thread
; /* ID of the thread processing this entry */ 
 520 /*================================ Prototypes =============================== */ 
 522 static void freeStringObject(robj 
*o
); 
 523 static void freeListObject(robj 
*o
); 
 524 static void freeSetObject(robj 
*o
); 
 525 static void decrRefCount(void *o
); 
 526 static robj 
*createObject(int type
, void *ptr
); 
 527 static void freeClient(redisClient 
*c
); 
 528 static int rdbLoad(char *filename
); 
 529 static void addReply(redisClient 
*c
, robj 
*obj
); 
 530 static void addReplySds(redisClient 
*c
, sds s
); 
 531 static void incrRefCount(robj 
*o
); 
 532 static int rdbSaveBackground(char *filename
); 
 533 static robj 
*createStringObject(char *ptr
, size_t len
); 
 534 static robj 
*dupStringObject(robj 
*o
); 
 535 static void replicationFeedSlaves(list 
*slaves
, struct redisCommand 
*cmd
, int dictid
, robj 
**argv
, int argc
); 
 536 static void feedAppendOnlyFile(struct redisCommand 
*cmd
, int dictid
, robj 
**argv
, int argc
); 
 537 static int syncWithMaster(void); 
 538 static robj 
*tryObjectSharing(robj 
*o
); 
 539 static int tryObjectEncoding(robj 
*o
); 
 540 static robj 
*getDecodedObject(robj 
*o
); 
 541 static int removeExpire(redisDb 
*db
, robj 
*key
); 
 542 static int expireIfNeeded(redisDb 
*db
, robj 
*key
); 
 543 static int deleteIfVolatile(redisDb 
*db
, robj 
*key
); 
 544 static int deleteIfSwapped(redisDb 
*db
, robj 
*key
); 
 545 static int deleteKey(redisDb 
*db
, robj 
*key
); 
 546 static time_t getExpire(redisDb 
*db
, robj 
*key
); 
 547 static int setExpire(redisDb 
*db
, robj 
*key
, time_t when
); 
 548 static void updateSlavesWaitingBgsave(int bgsaveerr
); 
 549 static void freeMemoryIfNeeded(void); 
 550 static int processCommand(redisClient 
*c
); 
 551 static void setupSigSegvAction(void); 
 552 static void rdbRemoveTempFile(pid_t childpid
); 
 553 static void aofRemoveTempFile(pid_t childpid
); 
 554 static size_t stringObjectLen(robj 
*o
); 
 555 static void processInputBuffer(redisClient 
*c
); 
 556 static zskiplist 
*zslCreate(void); 
 557 static void zslFree(zskiplist 
*zsl
); 
 558 static void zslInsert(zskiplist 
*zsl
, double score
, robj 
*obj
); 
 559 static void sendReplyToClientWritev(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 560 static void initClientMultiState(redisClient 
*c
); 
 561 static void freeClientMultiState(redisClient 
*c
); 
 562 static void queueMultiCommand(redisClient 
*c
, struct redisCommand 
*cmd
); 
 563 static void unblockClientWaitingData(redisClient 
*c
); 
 564 static int handleClientsWaitingListPush(redisClient 
*c
, robj 
*key
, robj 
*ele
); 
 565 static void vmInit(void); 
 566 static void vmMarkPagesFree(off_t page
, off_t count
); 
 567 static robj 
*vmLoadObject(robj 
*key
); 
 568 static robj 
*vmPreviewObject(robj 
*key
); 
 569 static int vmSwapOneObjectBlocking(void); 
 570 static int vmSwapOneObjectThreaded(void); 
 571 static int vmCanSwapOut(void); 
 572 static int tryFreeOneObjectFromFreelist(void); 
 573 static void acceptHandler(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 574 static void vmThreadedIOCompletedJob(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 575 static void vmCancelThreadedIOJob(robj 
*o
); 
 576 static void lockThreadedIO(void); 
 577 static void unlockThreadedIO(void); 
 578 static int vmSwapObjectThreaded(robj 
*key
, robj 
*val
, redisDb 
*db
); 
 579 static void freeIOJob(iojob 
*j
); 
 580 static void queueIOJob(iojob 
*j
); 
 581 static int vmWriteObjectOnSwap(robj 
*o
, off_t page
); 
 582 static robj 
*vmReadObjectFromSwap(off_t page
, int type
); 
 583 static void waitEmptyIOJobsQueue(void); 
 584 static void vmReopenSwapFile(void); 
 585 static int vmFreePage(off_t page
); 
 586 static int blockClientOnSwappedKeys(struct redisCommand 
*cmd
, redisClient 
*c
); 
 587 static int dontWaitForSwappedKey(redisClient 
*c
, robj 
*key
); 
 588 static void handleClientsBlockedOnSwappedKey(redisDb 
*db
, robj 
*key
); 
 589 static void readQueryFromClient(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 590 static struct redisCommand 
*lookupCommand(char *name
); 
 591 static void call(redisClient 
*c
, struct redisCommand 
*cmd
); 
 592 static void resetClient(redisClient 
*c
); 
 594 static void authCommand(redisClient 
*c
); 
 595 static void pingCommand(redisClient 
*c
); 
 596 static void echoCommand(redisClient 
*c
); 
 597 static void setCommand(redisClient 
*c
); 
 598 static void setnxCommand(redisClient 
*c
); 
 599 static void getCommand(redisClient 
*c
); 
 600 static void delCommand(redisClient 
*c
); 
 601 static void existsCommand(redisClient 
*c
); 
 602 static void incrCommand(redisClient 
*c
); 
 603 static void decrCommand(redisClient 
*c
); 
 604 static void incrbyCommand(redisClient 
*c
); 
 605 static void decrbyCommand(redisClient 
*c
); 
 606 static void selectCommand(redisClient 
*c
); 
 607 static void randomkeyCommand(redisClient 
*c
); 
 608 static void keysCommand(redisClient 
*c
); 
 609 static void dbsizeCommand(redisClient 
*c
); 
 610 static void lastsaveCommand(redisClient 
*c
); 
 611 static void saveCommand(redisClient 
*c
); 
 612 static void bgsaveCommand(redisClient 
*c
); 
 613 static void bgrewriteaofCommand(redisClient 
*c
); 
 614 static void shutdownCommand(redisClient 
*c
); 
 615 static void moveCommand(redisClient 
*c
); 
 616 static void renameCommand(redisClient 
*c
); 
 617 static void renamenxCommand(redisClient 
*c
); 
 618 static void lpushCommand(redisClient 
*c
); 
 619 static void rpushCommand(redisClient 
*c
); 
 620 static void lpopCommand(redisClient 
*c
); 
 621 static void rpopCommand(redisClient 
*c
); 
 622 static void llenCommand(redisClient 
*c
); 
 623 static void lindexCommand(redisClient 
*c
); 
 624 static void lrangeCommand(redisClient 
*c
); 
 625 static void ltrimCommand(redisClient 
*c
); 
 626 static void typeCommand(redisClient 
*c
); 
 627 static void lsetCommand(redisClient 
*c
); 
 628 static void saddCommand(redisClient 
*c
); 
 629 static void sremCommand(redisClient 
*c
); 
 630 static void smoveCommand(redisClient 
*c
); 
 631 static void sismemberCommand(redisClient 
*c
); 
 632 static void scardCommand(redisClient 
*c
); 
 633 static void spopCommand(redisClient 
*c
); 
 634 static void srandmemberCommand(redisClient 
*c
); 
 635 static void sinterCommand(redisClient 
*c
); 
 636 static void sinterstoreCommand(redisClient 
*c
); 
 637 static void sunionCommand(redisClient 
*c
); 
 638 static void sunionstoreCommand(redisClient 
*c
); 
 639 static void sdiffCommand(redisClient 
*c
); 
 640 static void sdiffstoreCommand(redisClient 
*c
); 
 641 static void syncCommand(redisClient 
*c
); 
 642 static void flushdbCommand(redisClient 
*c
); 
 643 static void flushallCommand(redisClient 
*c
); 
 644 static void sortCommand(redisClient 
*c
); 
 645 static void lremCommand(redisClient 
*c
); 
 646 static void rpoplpushcommand(redisClient 
*c
); 
 647 static void infoCommand(redisClient 
*c
); 
 648 static void mgetCommand(redisClient 
*c
); 
 649 static void monitorCommand(redisClient 
*c
); 
 650 static void expireCommand(redisClient 
*c
); 
 651 static void expireatCommand(redisClient 
*c
); 
 652 static void getsetCommand(redisClient 
*c
); 
 653 static void ttlCommand(redisClient 
*c
); 
 654 static void slaveofCommand(redisClient 
*c
); 
 655 static void debugCommand(redisClient 
*c
); 
 656 static void msetCommand(redisClient 
*c
); 
 657 static void msetnxCommand(redisClient 
*c
); 
 658 static void zaddCommand(redisClient 
*c
); 
 659 static void zincrbyCommand(redisClient 
*c
); 
 660 static void zrangeCommand(redisClient 
*c
); 
 661 static void zrangebyscoreCommand(redisClient 
*c
); 
 662 static void zcountCommand(redisClient 
*c
); 
 663 static void zrevrangeCommand(redisClient 
*c
); 
 664 static void zcardCommand(redisClient 
*c
); 
 665 static void zremCommand(redisClient 
*c
); 
 666 static void zscoreCommand(redisClient 
*c
); 
 667 static void zremrangebyscoreCommand(redisClient 
*c
); 
 668 static void multiCommand(redisClient 
*c
); 
 669 static void execCommand(redisClient 
*c
); 
 670 static void discardCommand(redisClient 
*c
); 
 671 static void blpopCommand(redisClient 
*c
); 
 672 static void brpopCommand(redisClient 
*c
); 
 673 static void appendCommand(redisClient 
*c
); 
 674 static void substrCommand(redisClient 
*c
); 
 675 static void zrankCommand(redisClient 
*c
); 
 676 static void hsetCommand(redisClient 
*c
); 
 677 static void hgetCommand(redisClient 
*c
); 
 678 static void zunionCommand(redisClient 
*c
); 
 679 static void zinterCommand(redisClient 
*c
); 
 681 /*================================= Globals ================================= */ 
 684 static struct redisServer server
; /* server global state */ 
 685 static struct redisCommand cmdTable
[] = { 
 686     {"get",getCommand
,2,REDIS_CMD_INLINE
,1,1,1}, 
 687     {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,0,0,0}, 
 688     {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,0,0,0}, 
 689     {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1}, 
 690     {"substr",substrCommand
,4,REDIS_CMD_INLINE
,1,1,1}, 
 691     {"del",delCommand
,-2,REDIS_CMD_INLINE
,0,0,0}, 
 692     {"exists",existsCommand
,2,REDIS_CMD_INLINE
,1,1,1}, 
 693     {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,1,1}, 
 694     {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,1,1}, 
 695     {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,1,-1,1}, 
 696     {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1}, 
 697     {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1}, 
 698     {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,1,1,1}, 
 699     {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,1,1,1}, 
 700     {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,1,1,1}, 
 701     {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,1,1,1}, 
 702     {"llen",llenCommand
,2,REDIS_CMD_INLINE
,1,1,1}, 
 703     {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,1,1,1}, 
 704     {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1}, 
 705     {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,1,1,1}, 
 706     {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,1,1,1}, 
 707     {"lrem",lremCommand
,4,REDIS_CMD_BULK
,1,1,1}, 
 708     {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,2,1}, 
 709     {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1}, 
 710     {"srem",sremCommand
,3,REDIS_CMD_BULK
,1,1,1}, 
 711     {"smove",smoveCommand
,4,REDIS_CMD_BULK
,1,2,1}, 
 712     {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,1,1,1}, 
 713     {"scard",scardCommand
,2,REDIS_CMD_INLINE
,1,1,1}, 
 714     {"spop",spopCommand
,2,REDIS_CMD_INLINE
,1,1,1}, 
 715     {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,1,1,1}, 
 716     {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,-1,1}, 
 717     {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,2,-1,1}, 
 718     {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,-1,1}, 
 719     {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,2,-1,1}, 
 720     {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,-1,1}, 
 721     {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,2,-1,1}, 
 722     {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,1,1,1}, 
 723     {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1}, 
 724     {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1}, 
 725     {"zrem",zremCommand
,3,REDIS_CMD_BULK
,1,1,1}, 
 726     {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,1,1,1}, 
 727     {"zunion",zunionCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,0,0,0}, 
 728     {"zinter",zinterCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,0,0,0}, 
 729     {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,1,1,1}, 
 730     {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,1,1,1}, 
 731     {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,1,1,1}, 
 732     {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,1,1,1}, 
 733     {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,1,1,1}, 
 734     {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1}, 
 735     {"zrank",zrankCommand
,3,REDIS_CMD_INLINE
,1,1,1}, 
 736     {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1}, 
 737     {"hget",hgetCommand
,3,REDIS_CMD_BULK
,1,1,1}, 
 738     {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,1,1}, 
 739     {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,1,1}, 
 740     {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1}, 
 741     {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,-1,2}, 
 742     {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,-1,2}, 
 743     {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,0,0,0}, 
 744     {"select",selectCommand
,2,REDIS_CMD_INLINE
,0,0,0}, 
 745     {"move",moveCommand
,3,REDIS_CMD_INLINE
,1,1,1}, 
 746     {"rename",renameCommand
,3,REDIS_CMD_INLINE
,1,1,1}, 
 747     {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,1,1,1}, 
 748     {"expire",expireCommand
,3,REDIS_CMD_INLINE
,0,0,0}, 
 749     {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,0,0,0}, 
 750     {"keys",keysCommand
,2,REDIS_CMD_INLINE
,0,0,0}, 
 751     {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,0,0,0}, 
 752     {"auth",authCommand
,2,REDIS_CMD_INLINE
,0,0,0}, 
 753     {"ping",pingCommand
,1,REDIS_CMD_INLINE
,0,0,0}, 
 754     {"echo",echoCommand
,2,REDIS_CMD_BULK
,0,0,0}, 
 755     {"save",saveCommand
,1,REDIS_CMD_INLINE
,0,0,0}, 
 756     {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,0,0,0}, 
 757     {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,0,0,0}, 
 758     {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,0,0,0}, 
 759     {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,0,0,0}, 
 760     {"type",typeCommand
,2,REDIS_CMD_INLINE
,1,1,1}, 
 761     {"multi",multiCommand
,1,REDIS_CMD_INLINE
,0,0,0}, 
 762     {"exec",execCommand
,1,REDIS_CMD_INLINE
,0,0,0}, 
 763     {"discard",discardCommand
,1,REDIS_CMD_INLINE
,0,0,0}, 
 764     {"sync",syncCommand
,1,REDIS_CMD_INLINE
,0,0,0}, 
 765     {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,0,0,0}, 
 766     {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,0,0,0}, 
 767     {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,1,1}, 
 768     {"info",infoCommand
,1,REDIS_CMD_INLINE
,0,0,0}, 
 769     {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,0,0,0}, 
 770     {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,1,1,1}, 
 771     {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,0,0,0}, 
 772     {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,0,0,0}, 
 773     {NULL
,NULL
,0,0,0,0,0} 
 776 /*============================ Utility functions ============================ */ 
 778 /* Glob-style pattern matching. */ 
 779 int stringmatchlen(const char *pattern
, int patternLen
, 
 780         const char *string
, int stringLen
, int nocase
) 
 785             while (pattern
[1] == '*') { 
 790                 return 1; /* match */ 
 792                 if (stringmatchlen(pattern
+1, patternLen
-1, 
 793                             string
, stringLen
, nocase
)) 
 794                     return 1; /* match */ 
 798             return 0; /* no match */ 
 802                 return 0; /* no match */ 
 812             not = pattern
[0] == '^'; 
 819                 if (pattern
[0] == '\\') { 
 822                     if (pattern
[0] == string
[0]) 
 824                 } else if (pattern
[0] == ']') { 
 826                 } else if (patternLen 
== 0) { 
 830                 } else if (pattern
[1] == '-' && patternLen 
>= 3) { 
 831                     int start 
= pattern
[0]; 
 832                     int end 
= pattern
[2]; 
 840                         start 
= tolower(start
); 
 846                     if (c 
>= start 
&& c 
<= end
) 
 850                         if (pattern
[0] == string
[0]) 
 853                         if (tolower((int)pattern
[0]) == tolower((int)string
[0])) 
 863                 return 0; /* no match */ 
 869             if (patternLen 
>= 2) { 
 876                 if (pattern
[0] != string
[0]) 
 877                     return 0; /* no match */ 
 879                 if (tolower((int)pattern
[0]) != tolower((int)string
[0])) 
 880                     return 0; /* no match */ 
 888         if (stringLen 
== 0) { 
 889             while(*pattern 
== '*') { 
 896     if (patternLen 
== 0 && stringLen 
== 0) 
 901 static void redisLog(int level
, const char *fmt
, ...) { 
 905     fp 
= (server
.logfile 
== NULL
) ? stdout 
: fopen(server
.logfile
,"a"); 
 909     if (level 
>= server
.verbosity
) { 
 915         strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
)); 
 916         fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]); 
 917         vfprintf(fp
, fmt
, ap
); 
 923     if (server
.logfile
) fclose(fp
); 
 926 /*====================== Hash table type implementation  ==================== */ 
 928 /* This is an hash table type that uses the SDS dynamic strings libary as 
 929  * keys and radis objects as values (objects can hold SDS strings, 
 932 static void dictVanillaFree(void *privdata
, void *val
) 
 934     DICT_NOTUSED(privdata
); 
 938 static void dictListDestructor(void *privdata
, void *val
) 
 940     DICT_NOTUSED(privdata
); 
 941     listRelease((list
*)val
); 
 944 static int sdsDictKeyCompare(void *privdata
, const void *key1
, 
 948     DICT_NOTUSED(privdata
); 
 950     l1 
= sdslen((sds
)key1
); 
 951     l2 
= sdslen((sds
)key2
); 
 952     if (l1 
!= l2
) return 0; 
 953     return memcmp(key1
, key2
, l1
) == 0; 
 956 static void dictRedisObjectDestructor(void *privdata
, void *val
) 
 958     DICT_NOTUSED(privdata
); 
 960     if (val 
== NULL
) return; /* Values of swapped out keys as set to NULL */ 
 964 static int dictObjKeyCompare(void *privdata
, const void *key1
, 
 967     const robj 
*o1 
= key1
, *o2 
= key2
; 
 968     return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
); 
 971 static unsigned int dictObjHash(const void *key
) { 
 973     return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
)); 
 976 static int dictEncObjKeyCompare(void *privdata
, const void *key1
, 
 979     robj 
*o1 
= (robj
*) key1
, *o2 
= (robj
*) key2
; 
 982     o1 
= getDecodedObject(o1
); 
 983     o2 
= getDecodedObject(o2
); 
 984     cmp 
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
); 
 990 static unsigned int dictEncObjHash(const void *key
) { 
 991     robj 
*o 
= (robj
*) key
; 
 993     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
 994         return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
)); 
 996         if (o
->encoding 
== REDIS_ENCODING_INT
) { 
1000             len 
= snprintf(buf
,32,"%ld",(long)o
->ptr
); 
1001             return dictGenHashFunction((unsigned char*)buf
, len
); 
1005             o 
= getDecodedObject(o
); 
1006             hash 
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
)); 
1013 /* Sets type and expires */ 
1014 static dictType setDictType 
= { 
1015     dictEncObjHash
,            /* hash function */ 
1018     dictEncObjKeyCompare
,      /* key compare */ 
1019     dictRedisObjectDestructor
, /* key destructor */ 
1020     NULL                       
/* val destructor */ 
1023 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */ 
1024 static dictType zsetDictType 
= { 
1025     dictEncObjHash
,            /* hash function */ 
1028     dictEncObjKeyCompare
,      /* key compare */ 
1029     dictRedisObjectDestructor
, /* key destructor */ 
1030     dictVanillaFree            
/* val destructor of malloc(sizeof(double)) */ 
1034 static dictType dbDictType 
= { 
1035     dictObjHash
,                /* hash function */ 
1038     dictObjKeyCompare
,          /* key compare */ 
1039     dictRedisObjectDestructor
,  /* key destructor */ 
1040     dictRedisObjectDestructor   
/* val destructor */ 
1044 static dictType keyptrDictType 
= { 
1045     dictObjHash
,               /* hash function */ 
1048     dictObjKeyCompare
,         /* key compare */ 
1049     dictRedisObjectDestructor
, /* key destructor */ 
1050     NULL                       
/* val destructor */ 
1053 /* Hash type hash table (note that small hashes are represented with zimpaps) */ 
1054 static dictType hashDictType 
= { 
1055     dictEncObjHash
,             /* hash function */ 
1058     dictEncObjKeyCompare
,       /* key compare */ 
1059     dictRedisObjectDestructor
,  /* key destructor */ 
1060     dictRedisObjectDestructor   
/* val destructor */ 
1063 /* Keylist hash table type has unencoded redis objects as keys and 
1064  * lists as values. It's used for blocking operations (BLPOP) and to 
1065  * map swapped keys to a list of clients waiting for this keys to be loaded. */ 
1066 static dictType keylistDictType 
= { 
1067     dictObjHash
,                /* hash function */ 
1070     dictObjKeyCompare
,          /* key compare */ 
1071     dictRedisObjectDestructor
,  /* key destructor */ 
1072     dictListDestructor          
/* val destructor */ 
1075 /* ========================= Random utility functions ======================= */ 
1077 /* Redis generally does not try to recover from out of memory conditions 
1078  * when allocating objects or strings, it is not clear if it will be possible 
1079  * to report this condition to the client since the networking layer itself 
1080  * is based on heap allocation for send buffers, so we simply abort. 
1081  * At least the code will be simpler to read... */ 
1082 static void oom(const char *msg
) { 
1083     redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
); 
1088 /* ====================== Redis server networking stuff ===================== */ 
1089 static void closeTimedoutClients(void) { 
1092     time_t now 
= time(NULL
); 
1095     listRewind(server
.clients
,&li
); 
1096     while ((ln 
= listNext(&li
)) != NULL
) { 
1097         c 
= listNodeValue(ln
); 
1098         if (server
.maxidletime 
&& 
1099             !(c
->flags 
& REDIS_SLAVE
) &&    /* no timeout for slaves */ 
1100             !(c
->flags 
& REDIS_MASTER
) &&   /* no timeout for masters */ 
1101              (now 
- c
->lastinteraction 
> server
.maxidletime
)) 
1103             redisLog(REDIS_VERBOSE
,"Closing idle client"); 
1105         } else if (c
->flags 
& REDIS_BLOCKED
) { 
1106             if (c
->blockingto 
!= 0 && c
->blockingto 
< now
) { 
1107                 addReply(c
,shared
.nullmultibulk
); 
1108                 unblockClientWaitingData(c
); 
1114 static int htNeedsResize(dict 
*dict
) { 
1115     long long size
, used
; 
1117     size 
= dictSlots(dict
); 
1118     used 
= dictSize(dict
); 
1119     return (size 
&& used 
&& size 
> DICT_HT_INITIAL_SIZE 
&& 
1120             (used
*100/size 
< REDIS_HT_MINFILL
)); 
1123 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL 
1124  * we resize the hash table to save memory */ 
1125 static void tryResizeHashTables(void) { 
1128     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1129         if (htNeedsResize(server
.db
[j
].dict
)) { 
1130             redisLog(REDIS_VERBOSE
,"The hash table %d is too sparse, resize it...",j
); 
1131             dictResize(server
.db
[j
].dict
); 
1132             redisLog(REDIS_VERBOSE
,"Hash table %d resized.",j
); 
1134         if (htNeedsResize(server
.db
[j
].expires
)) 
1135             dictResize(server
.db
[j
].expires
); 
1139 /* A background saving child (BGSAVE) terminated its work. Handle this. */ 
1140 void backgroundSaveDoneHandler(int statloc
) { 
1141     int exitcode 
= WEXITSTATUS(statloc
); 
1142     int bysignal 
= WIFSIGNALED(statloc
); 
1144     if (!bysignal 
&& exitcode 
== 0) { 
1145         redisLog(REDIS_NOTICE
, 
1146             "Background saving terminated with success"); 
1148         server
.lastsave 
= time(NULL
); 
1149     } else if (!bysignal 
&& exitcode 
!= 0) { 
1150         redisLog(REDIS_WARNING
, "Background saving error"); 
1152         redisLog(REDIS_WARNING
, 
1153             "Background saving terminated by signal"); 
1154         rdbRemoveTempFile(server
.bgsavechildpid
); 
1156     server
.bgsavechildpid 
= -1; 
1157     /* Possibly there are slaves waiting for a BGSAVE in order to be served 
1158      * (the first stage of SYNC is a bulk transfer of dump.rdb) */ 
1159     updateSlavesWaitingBgsave(exitcode 
== 0 ? REDIS_OK 
: REDIS_ERR
); 
1162 /* A background append only file rewriting (BGREWRITEAOF) terminated its work. 
1164 void backgroundRewriteDoneHandler(int statloc
) { 
1165     int exitcode 
= WEXITSTATUS(statloc
); 
1166     int bysignal 
= WIFSIGNALED(statloc
); 
1168     if (!bysignal 
&& exitcode 
== 0) { 
1172         redisLog(REDIS_NOTICE
, 
1173             "Background append only file rewriting terminated with success"); 
1174         /* Now it's time to flush the differences accumulated by the parent */ 
1175         snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
); 
1176         fd 
= open(tmpfile
,O_WRONLY
|O_APPEND
); 
1178             redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
)); 
1181         /* Flush our data... */ 
1182         if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) != 
1183                 (signed) sdslen(server
.bgrewritebuf
)) { 
1184             redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
)); 
1188         redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
)); 
1189         /* Now our work is to rename the temp file into the stable file. And 
1190          * switch the file descriptor used by the server for append only. */ 
1191         if (rename(tmpfile
,server
.appendfilename
) == -1) { 
1192             redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
)); 
1196         /* Mission completed... almost */ 
1197         redisLog(REDIS_NOTICE
,"Append only file successfully rewritten."); 
1198         if (server
.appendfd 
!= -1) { 
1199             /* If append only is actually enabled... */ 
1200             close(server
.appendfd
); 
1201             server
.appendfd 
= fd
; 
1203             server
.appendseldb 
= -1; /* Make sure it will issue SELECT */ 
1204             redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends."); 
1206             /* If append only is disabled we just generate a dump in this 
1207              * format. Why not? */ 
1210     } else if (!bysignal 
&& exitcode 
!= 0) { 
1211         redisLog(REDIS_WARNING
, "Background append only file rewriting error"); 
1213         redisLog(REDIS_WARNING
, 
1214             "Background append only file rewriting terminated by signal"); 
1217     sdsfree(server
.bgrewritebuf
); 
1218     server
.bgrewritebuf 
= sdsempty(); 
1219     aofRemoveTempFile(server
.bgrewritechildpid
); 
1220     server
.bgrewritechildpid 
= -1; 
1223 static int serverCron(struct aeEventLoop 
*eventLoop
, long long id
, void *clientData
) { 
1224     int j
, loops 
= server
.cronloops
++; 
1225     REDIS_NOTUSED(eventLoop
); 
1227     REDIS_NOTUSED(clientData
); 
1229     /* We take a cached value of the unix time in the global state because 
1230      * with virtual memory and aging there is to store the current time 
1231      * in objects at every object access, and accuracy is not needed. 
1232      * To access a global var is faster than calling time(NULL) */ 
1233     server
.unixtime 
= time(NULL
); 
1235     /* Show some info about non-empty databases */ 
1236     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1237         long long size
, used
, vkeys
; 
1239         size 
= dictSlots(server
.db
[j
].dict
); 
1240         used 
= dictSize(server
.db
[j
].dict
); 
1241         vkeys 
= dictSize(server
.db
[j
].expires
); 
1242         if (!(loops 
% 5) && (used 
|| vkeys
)) { 
1243             redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
); 
1244             /* dictPrintStats(server.dict); */ 
1248     /* We don't want to resize the hash tables while a bacground saving 
1249      * is in progress: the saving child is created using fork() that is 
1250      * implemented with a copy-on-write semantic in most modern systems, so 
1251      * if we resize the HT while there is the saving child at work actually 
1252      * a lot of memory movements in the parent will cause a lot of pages 
1254     if (server
.bgsavechildpid 
== -1) tryResizeHashTables(); 
1256     /* Show information about connected clients */ 
1258         redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use, %d shared objects", 
1259             listLength(server
.clients
)-listLength(server
.slaves
), 
1260             listLength(server
.slaves
), 
1261             zmalloc_used_memory(), 
1262             dictSize(server
.sharingpool
)); 
1265     /* Close connections of timedout clients */ 
1266     if ((server
.maxidletime 
&& !(loops 
% 10)) || server
.blpop_blocked_clients
) 
1267         closeTimedoutClients(); 
1269     /* Check if a background saving or AOF rewrite in progress terminated */ 
1270     if (server
.bgsavechildpid 
!= -1 || server
.bgrewritechildpid 
!= -1) { 
1274         if ((pid 
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) { 
1275             if (pid 
== server
.bgsavechildpid
) { 
1276                 backgroundSaveDoneHandler(statloc
); 
1278                 backgroundRewriteDoneHandler(statloc
); 
1282         /* If there is not a background saving in progress check if 
1283          * we have to save now */ 
1284          time_t now 
= time(NULL
); 
1285          for (j 
= 0; j 
< server
.saveparamslen
; j
++) { 
1286             struct saveparam 
*sp 
= server
.saveparams
+j
; 
1288             if (server
.dirty 
>= sp
->changes 
&& 
1289                 now
-server
.lastsave 
> sp
->seconds
) { 
1290                 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...", 
1291                     sp
->changes
, sp
->seconds
); 
1292                 rdbSaveBackground(server
.dbfilename
); 
1298     /* Try to expire a few timed out keys. The algorithm used is adaptive and 
1299      * will use few CPU cycles if there are few expiring keys, otherwise 
1300      * it will get more aggressive to avoid that too much memory is used by 
1301      * keys that can be removed from the keyspace. */ 
1302     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1304         redisDb 
*db 
= server
.db
+j
; 
1306         /* Continue to expire if at the end of the cycle more than 25% 
1307          * of the keys were expired. */ 
1309             long num 
= dictSize(db
->expires
); 
1310             time_t now 
= time(NULL
); 
1313             if (num 
> REDIS_EXPIRELOOKUPS_PER_CRON
) 
1314                 num 
= REDIS_EXPIRELOOKUPS_PER_CRON
; 
1319                 if ((de 
= dictGetRandomKey(db
->expires
)) == NULL
) break; 
1320                 t 
= (time_t) dictGetEntryVal(de
); 
1322                     deleteKey(db
,dictGetEntryKey(de
)); 
1326         } while (expired 
> REDIS_EXPIRELOOKUPS_PER_CRON
/4); 
1329     /* Swap a few keys on disk if we are over the memory limit and VM 
1330      * is enbled. Try to free objects from the free list first. */ 
1331     if (vmCanSwapOut()) { 
1332         while (server
.vm_enabled 
&& zmalloc_used_memory() > 
1333                 server
.vm_max_memory
) 
1337             if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue; 
1338             retval 
= (server
.vm_max_threads 
== 0) ? 
1339                         vmSwapOneObjectBlocking() : 
1340                         vmSwapOneObjectThreaded(); 
1341             if (retval 
== REDIS_ERR 
&& (loops 
% 30) == 0 && 
1342                 zmalloc_used_memory() > 
1343                 (server
.vm_max_memory
+server
.vm_max_memory
/10)) 
1345                 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!"); 
1347             /* Note that when using threade I/O we free just one object, 
1348              * because anyway when the I/O thread in charge to swap this 
1349              * object out will finish, the handler of completed jobs 
1350              * will try to swap more objects if we are still out of memory. */ 
1351             if (retval 
== REDIS_ERR 
|| server
.vm_max_threads 
> 0) break; 
1355     /* Check if we should connect to a MASTER */ 
1356     if (server
.replstate 
== REDIS_REPL_CONNECT
) { 
1357         redisLog(REDIS_NOTICE
,"Connecting to MASTER..."); 
1358         if (syncWithMaster() == REDIS_OK
) { 
1359             redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded"); 
1365 /* This function gets called every time Redis is entering the 
1366  * main loop of the event driven library, that is, before to sleep 
1367  * for ready file descriptors. */ 
1368 static void beforeSleep(struct aeEventLoop 
*eventLoop
) { 
1369     REDIS_NOTUSED(eventLoop
); 
1371     if (server
.vm_enabled 
&& listLength(server
.io_ready_clients
)) { 
1375         listRewind(server
.io_ready_clients
,&li
); 
1376         while((ln 
= listNext(&li
))) { 
1377             redisClient 
*c 
= ln
->value
; 
1378             struct redisCommand 
*cmd
; 
1380             /* Resume the client. */ 
1381             listDelNode(server
.io_ready_clients
,ln
); 
1382             c
->flags 
&= (~REDIS_IO_WAIT
); 
1383             server
.vm_blocked_clients
--; 
1384             aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
, 
1385                 readQueryFromClient
, c
); 
1386             cmd 
= lookupCommand(c
->argv
[0]->ptr
); 
1387             assert(cmd 
!= NULL
); 
1390             /* There may be more data to process in the input buffer. */ 
1391             if (c
->querybuf 
&& sdslen(c
->querybuf
) > 0) 
1392                 processInputBuffer(c
); 
1397 static void createSharedObjects(void) { 
1398     shared
.crlf 
= createObject(REDIS_STRING
,sdsnew("\r\n")); 
1399     shared
.ok 
= createObject(REDIS_STRING
,sdsnew("+OK\r\n")); 
1400     shared
.err 
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n")); 
1401     shared
.emptybulk 
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n")); 
1402     shared
.czero 
= createObject(REDIS_STRING
,sdsnew(":0\r\n")); 
1403     shared
.cone 
= createObject(REDIS_STRING
,sdsnew(":1\r\n")); 
1404     shared
.nullbulk 
= createObject(REDIS_STRING
,sdsnew("$-1\r\n")); 
1405     shared
.nullmultibulk 
= createObject(REDIS_STRING
,sdsnew("*-1\r\n")); 
1406     shared
.emptymultibulk 
= createObject(REDIS_STRING
,sdsnew("*0\r\n")); 
1407     shared
.pong 
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n")); 
1408     shared
.queued 
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n")); 
1409     shared
.wrongtypeerr 
= createObject(REDIS_STRING
,sdsnew( 
1410         "-ERR Operation against a key holding the wrong kind of value\r\n")); 
1411     shared
.nokeyerr 
= createObject(REDIS_STRING
,sdsnew( 
1412         "-ERR no such key\r\n")); 
1413     shared
.syntaxerr 
= createObject(REDIS_STRING
,sdsnew( 
1414         "-ERR syntax error\r\n")); 
1415     shared
.sameobjecterr 
= createObject(REDIS_STRING
,sdsnew( 
1416         "-ERR source and destination objects are the same\r\n")); 
1417     shared
.outofrangeerr 
= createObject(REDIS_STRING
,sdsnew( 
1418         "-ERR index out of range\r\n")); 
1419     shared
.space 
= createObject(REDIS_STRING
,sdsnew(" ")); 
1420     shared
.colon 
= createObject(REDIS_STRING
,sdsnew(":")); 
1421     shared
.plus 
= createObject(REDIS_STRING
,sdsnew("+")); 
1422     shared
.select0 
= createStringObject("select 0\r\n",10); 
1423     shared
.select1 
= createStringObject("select 1\r\n",10); 
1424     shared
.select2 
= createStringObject("select 2\r\n",10); 
1425     shared
.select3 
= createStringObject("select 3\r\n",10); 
1426     shared
.select4 
= createStringObject("select 4\r\n",10); 
1427     shared
.select5 
= createStringObject("select 5\r\n",10); 
1428     shared
.select6 
= createStringObject("select 6\r\n",10); 
1429     shared
.select7 
= createStringObject("select 7\r\n",10); 
1430     shared
.select8 
= createStringObject("select 8\r\n",10); 
1431     shared
.select9 
= createStringObject("select 9\r\n",10); 
1434 static void appendServerSaveParams(time_t seconds
, int changes
) { 
1435     server
.saveparams 
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1)); 
1436     server
.saveparams
[server
.saveparamslen
].seconds 
= seconds
; 
1437     server
.saveparams
[server
.saveparamslen
].changes 
= changes
; 
1438     server
.saveparamslen
++; 
1441 static void resetServerSaveParams() { 
1442     zfree(server
.saveparams
); 
1443     server
.saveparams 
= NULL
; 
1444     server
.saveparamslen 
= 0; 
1447 static void initServerConfig() { 
1448     server
.dbnum 
= REDIS_DEFAULT_DBNUM
; 
1449     server
.port 
= REDIS_SERVERPORT
; 
1450     server
.verbosity 
= REDIS_VERBOSE
; 
1451     server
.maxidletime 
= REDIS_MAXIDLETIME
; 
1452     server
.saveparams 
= NULL
; 
1453     server
.logfile 
= NULL
; /* NULL = log on standard output */ 
1454     server
.bindaddr 
= NULL
; 
1455     server
.glueoutputbuf 
= 1; 
1456     server
.daemonize 
= 0; 
1457     server
.appendonly 
= 0; 
1458     server
.appendfsync 
= APPENDFSYNC_ALWAYS
; 
1459     server
.lastfsync 
= time(NULL
); 
1460     server
.appendfd 
= -1; 
1461     server
.appendseldb 
= -1; /* Make sure the first time will not match */ 
1462     server
.pidfile 
= "/var/run/redis.pid"; 
1463     server
.dbfilename 
= "dump.rdb"; 
1464     server
.appendfilename 
= "appendonly.aof"; 
1465     server
.requirepass 
= NULL
; 
1466     server
.shareobjects 
= 0; 
1467     server
.rdbcompression 
= 1; 
1468     server
.sharingpoolsize 
= 1024; 
1469     server
.maxclients 
= 0; 
1470     server
.blpop_blocked_clients 
= 0; 
1471     server
.maxmemory 
= 0; 
1472     server
.vm_enabled 
= 0; 
1473     server
.vm_swap_file 
= zstrdup("/tmp/redis-%p.vm"); 
1474     server
.vm_page_size 
= 256;          /* 256 bytes per page */ 
1475     server
.vm_pages 
= 1024*1024*100;    /* 104 millions of pages */ 
1476     server
.vm_max_memory 
= 1024LL*1024*1024*1; /* 1 GB of RAM */ 
1477     server
.vm_max_threads 
= 4; 
1478     server
.vm_blocked_clients 
= 0; 
1479     server
.hash_max_zipmap_entries 
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
; 
1480     server
.hash_max_zipmap_value 
= REDIS_HASH_MAX_ZIPMAP_VALUE
; 
1482     resetServerSaveParams(); 
1484     appendServerSaveParams(60*60,1);  /* save after 1 hour and 1 change */ 
1485     appendServerSaveParams(300,100);  /* save after 5 minutes and 100 changes */ 
1486     appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */ 
1487     /* Replication related */ 
1489     server
.masterauth 
= NULL
; 
1490     server
.masterhost 
= NULL
; 
1491     server
.masterport 
= 6379; 
1492     server
.master 
= NULL
; 
1493     server
.replstate 
= REDIS_REPL_NONE
; 
1495     /* Double constants initialization */ 
1497     R_PosInf 
= 1.0/R_Zero
; 
1498     R_NegInf 
= -1.0/R_Zero
; 
1499     R_Nan 
= R_Zero
/R_Zero
; 
1502 static void initServer() { 
1505     signal(SIGHUP
, SIG_IGN
); 
1506     signal(SIGPIPE
, SIG_IGN
); 
1507     setupSigSegvAction(); 
1509     server
.devnull 
= fopen("/dev/null","w"); 
1510     if (server
.devnull 
== NULL
) { 
1511         redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
); 
1514     server
.clients 
= listCreate(); 
1515     server
.slaves 
= listCreate(); 
1516     server
.monitors 
= listCreate(); 
1517     server
.objfreelist 
= listCreate(); 
1518     createSharedObjects(); 
1519     server
.el 
= aeCreateEventLoop(); 
1520     server
.db 
= zmalloc(sizeof(redisDb
)*server
.dbnum
); 
1521     server
.sharingpool 
= dictCreate(&setDictType
,NULL
); 
1522     server
.fd 
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
); 
1523     if (server
.fd 
== -1) { 
1524         redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
); 
1527     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1528         server
.db
[j
].dict 
= dictCreate(&dbDictType
,NULL
); 
1529         server
.db
[j
].expires 
= dictCreate(&keyptrDictType
,NULL
); 
1530         server
.db
[j
].blockingkeys 
= dictCreate(&keylistDictType
,NULL
); 
1531         if (server
.vm_enabled
) 
1532             server
.db
[j
].io_keys 
= dictCreate(&keylistDictType
,NULL
); 
1533         server
.db
[j
].id 
= j
; 
1535     server
.cronloops 
= 0; 
1536     server
.bgsavechildpid 
= -1; 
1537     server
.bgrewritechildpid 
= -1; 
1538     server
.bgrewritebuf 
= sdsempty(); 
1539     server
.lastsave 
= time(NULL
); 
1541     server
.stat_numcommands 
= 0; 
1542     server
.stat_numconnections 
= 0; 
1543     server
.stat_starttime 
= time(NULL
); 
1544     server
.unixtime 
= time(NULL
); 
1545     aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
); 
1546     if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
, 
1547         acceptHandler
, NULL
) == AE_ERR
) oom("creating file event"); 
1549     if (server
.appendonly
) { 
1550         server
.appendfd 
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644); 
1551         if (server
.appendfd 
== -1) { 
1552             redisLog(REDIS_WARNING
, "Can't open the append-only file: %s", 
1558     if (server
.vm_enabled
) vmInit(); 
1561 /* Empty the whole database */ 
1562 static long long emptyDb() { 
1564     long long removed 
= 0; 
1566     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1567         removed 
+= dictSize(server
.db
[j
].dict
); 
1568         dictEmpty(server
.db
[j
].dict
); 
1569         dictEmpty(server
.db
[j
].expires
); 
1574 static int yesnotoi(char *s
) { 
1575     if (!strcasecmp(s
,"yes")) return 1; 
1576     else if (!strcasecmp(s
,"no")) return 0; 
1580 /* I agree, this is a very rudimental way to load a configuration... 
1581    will improve later if the config gets more complex */ 
1582 static void loadServerConfig(char *filename
) { 
1584     char buf
[REDIS_CONFIGLINE_MAX
+1], *err 
= NULL
; 
1588     if (filename
[0] == '-' && filename
[1] == '\0') 
1591         if ((fp 
= fopen(filename
,"r")) == NULL
) { 
1592             redisLog(REDIS_WARNING
,"Fatal error, can't open config file"); 
1597     while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) { 
1603         line 
= sdstrim(line
," \t\r\n"); 
1605         /* Skip comments and blank lines*/ 
1606         if (line
[0] == '#' || line
[0] == '\0') { 
1611         /* Split into arguments */ 
1612         argv 
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
); 
1613         sdstolower(argv
[0]); 
1615         /* Execute config directives */ 
1616         if (!strcasecmp(argv
[0],"timeout") && argc 
== 2) { 
1617             server
.maxidletime 
= atoi(argv
[1]); 
1618             if (server
.maxidletime 
< 0) { 
1619                 err 
= "Invalid timeout value"; goto loaderr
; 
1621         } else if (!strcasecmp(argv
[0],"port") && argc 
== 2) { 
1622             server
.port 
= atoi(argv
[1]); 
1623             if (server
.port 
< 1 || server
.port 
> 65535) { 
1624                 err 
= "Invalid port"; goto loaderr
; 
1626         } else if (!strcasecmp(argv
[0],"bind") && argc 
== 2) { 
1627             server
.bindaddr 
= zstrdup(argv
[1]); 
1628         } else if (!strcasecmp(argv
[0],"save") && argc 
== 3) { 
1629             int seconds 
= atoi(argv
[1]); 
1630             int changes 
= atoi(argv
[2]); 
1631             if (seconds 
< 1 || changes 
< 0) { 
1632                 err 
= "Invalid save parameters"; goto loaderr
; 
1634             appendServerSaveParams(seconds
,changes
); 
1635         } else if (!strcasecmp(argv
[0],"dir") && argc 
== 2) { 
1636             if (chdir(argv
[1]) == -1) { 
1637                 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s", 
1638                     argv
[1], strerror(errno
)); 
1641         } else if (!strcasecmp(argv
[0],"loglevel") && argc 
== 2) { 
1642             if (!strcasecmp(argv
[1],"debug")) server
.verbosity 
= REDIS_DEBUG
; 
1643             else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity 
= REDIS_VERBOSE
; 
1644             else if (!strcasecmp(argv
[1],"notice")) server
.verbosity 
= REDIS_NOTICE
; 
1645             else if (!strcasecmp(argv
[1],"warning")) server
.verbosity 
= REDIS_WARNING
; 
1647                 err 
= "Invalid log level. Must be one of debug, notice, warning"; 
1650         } else if (!strcasecmp(argv
[0],"logfile") && argc 
== 2) { 
1653             server
.logfile 
= zstrdup(argv
[1]); 
1654             if (!strcasecmp(server
.logfile
,"stdout")) { 
1655                 zfree(server
.logfile
); 
1656                 server
.logfile 
= NULL
; 
1658             if (server
.logfile
) { 
1659                 /* Test if we are able to open the file. The server will not 
1660                  * be able to abort just for this problem later... */ 
1661                 logfp 
= fopen(server
.logfile
,"a"); 
1662                 if (logfp 
== NULL
) { 
1663                     err 
= sdscatprintf(sdsempty(), 
1664                         "Can't open the log file: %s", strerror(errno
)); 
1669         } else if (!strcasecmp(argv
[0],"databases") && argc 
== 2) { 
1670             server
.dbnum 
= atoi(argv
[1]); 
1671             if (server
.dbnum 
< 1) { 
1672                 err 
= "Invalid number of databases"; goto loaderr
; 
1674         } else if (!strcasecmp(argv
[0],"maxclients") && argc 
== 2) { 
1675             server
.maxclients 
= atoi(argv
[1]); 
1676         } else if (!strcasecmp(argv
[0],"maxmemory") && argc 
== 2) { 
1677             server
.maxmemory 
= strtoll(argv
[1], NULL
, 10); 
1678         } else if (!strcasecmp(argv
[0],"slaveof") && argc 
== 3) { 
1679             server
.masterhost 
= sdsnew(argv
[1]); 
1680             server
.masterport 
= atoi(argv
[2]); 
1681             server
.replstate 
= REDIS_REPL_CONNECT
; 
1682         } else if (!strcasecmp(argv
[0],"masterauth") && argc 
== 2) { 
1683                 server
.masterauth 
= zstrdup(argv
[1]); 
1684         } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc 
== 2) { 
1685             if ((server
.glueoutputbuf 
= yesnotoi(argv
[1])) == -1) { 
1686                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1688         } else if (!strcasecmp(argv
[0],"shareobjects") && argc 
== 2) { 
1689             if ((server
.shareobjects 
= yesnotoi(argv
[1])) == -1) { 
1690                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1692         } else if (!strcasecmp(argv
[0],"rdbcompression") && argc 
== 2) { 
1693             if ((server
.rdbcompression 
= yesnotoi(argv
[1])) == -1) { 
1694                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1696         } else if (!strcasecmp(argv
[0],"shareobjectspoolsize") && argc 
== 2) { 
1697             server
.sharingpoolsize 
= atoi(argv
[1]); 
1698             if (server
.sharingpoolsize 
< 1) { 
1699                 err 
= "invalid object sharing pool size"; goto loaderr
; 
1701         } else if (!strcasecmp(argv
[0],"daemonize") && argc 
== 2) { 
1702             if ((server
.daemonize 
= yesnotoi(argv
[1])) == -1) { 
1703                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1705         } else if (!strcasecmp(argv
[0],"appendonly") && argc 
== 2) { 
1706             if ((server
.appendonly 
= yesnotoi(argv
[1])) == -1) { 
1707                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1709         } else if (!strcasecmp(argv
[0],"appendfsync") && argc 
== 2) { 
1710             if (!strcasecmp(argv
[1],"no")) { 
1711                 server
.appendfsync 
= APPENDFSYNC_NO
; 
1712             } else if (!strcasecmp(argv
[1],"always")) { 
1713                 server
.appendfsync 
= APPENDFSYNC_ALWAYS
; 
1714             } else if (!strcasecmp(argv
[1],"everysec")) { 
1715                 server
.appendfsync 
= APPENDFSYNC_EVERYSEC
; 
1717                 err 
= "argument must be 'no', 'always' or 'everysec'"; 
1720         } else if (!strcasecmp(argv
[0],"requirepass") && argc 
== 2) { 
1721             server
.requirepass 
= zstrdup(argv
[1]); 
1722         } else if (!strcasecmp(argv
[0],"pidfile") && argc 
== 2) { 
1723             server
.pidfile 
= zstrdup(argv
[1]); 
1724         } else if (!strcasecmp(argv
[0],"dbfilename") && argc 
== 2) { 
1725             server
.dbfilename 
= zstrdup(argv
[1]); 
1726         } else if (!strcasecmp(argv
[0],"vm-enabled") && argc 
== 2) { 
1727             if ((server
.vm_enabled 
= yesnotoi(argv
[1])) == -1) { 
1728                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1730         } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc 
== 2) { 
1731             zfree(server
.vm_swap_file
); 
1732             server
.vm_swap_file 
= zstrdup(argv
[1]); 
1733         } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc 
== 2) { 
1734             server
.vm_max_memory 
= strtoll(argv
[1], NULL
, 10); 
1735         } else if (!strcasecmp(argv
[0],"vm-page-size") && argc 
== 2) { 
1736             server
.vm_page_size 
= strtoll(argv
[1], NULL
, 10); 
1737         } else if (!strcasecmp(argv
[0],"vm-pages") && argc 
== 2) { 
1738             server
.vm_pages 
= strtoll(argv
[1], NULL
, 10); 
1739         } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc 
== 2) { 
1740             server
.vm_max_threads 
= strtoll(argv
[1], NULL
, 10); 
1741         } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc 
== 2){ 
1742             server
.hash_max_zipmap_entries 
= strtol(argv
[1], NULL
, 10); 
1743         } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc 
== 2){ 
1744             server
.hash_max_zipmap_value 
= strtol(argv
[1], NULL
, 10); 
1745         } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc 
== 2) { 
1746             server
.vm_max_threads 
= strtoll(argv
[1], NULL
, 10); 
1748             err 
= "Bad directive or wrong number of arguments"; goto loaderr
; 
1750         for (j 
= 0; j 
< argc
; j
++) 
1755     if (fp 
!= stdin
) fclose(fp
); 
1759     fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n"); 
1760     fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
); 
1761     fprintf(stderr
, ">>> '%s'\n", line
); 
1762     fprintf(stderr
, "%s\n", err
); 
1766 static void freeClientArgv(redisClient 
*c
) { 
1769     for (j 
= 0; j 
< c
->argc
; j
++) 
1770         decrRefCount(c
->argv
[j
]); 
1771     for (j 
= 0; j 
< c
->mbargc
; j
++) 
1772         decrRefCount(c
->mbargv
[j
]); 
1777 static void freeClient(redisClient 
*c
) { 
1780     /* Note that if the client we are freeing is blocked into a blocking 
1781      * call, we have to set querybuf to NULL *before* to call 
1782      * unblockClientWaitingData() to avoid processInputBuffer() will get 
1783      * called. Also it is important to remove the file events after 
1784      * this, because this call adds the READABLE event. */ 
1785     sdsfree(c
->querybuf
); 
1787     if (c
->flags 
& REDIS_BLOCKED
) 
1788         unblockClientWaitingData(c
); 
1790     aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
); 
1791     aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
); 
1792     listRelease(c
->reply
); 
1795     /* Remove from the list of clients */ 
1796     ln 
= listSearchKey(server
.clients
,c
); 
1797     redisAssert(ln 
!= NULL
); 
1798     listDelNode(server
.clients
,ln
); 
1799     /* Remove from the list of clients waiting for swapped keys */ 
1800     if (c
->flags 
& REDIS_IO_WAIT 
&& listLength(c
->io_keys
) == 0) { 
1801         ln 
= listSearchKey(server
.io_ready_clients
,c
); 
1803             listDelNode(server
.io_ready_clients
,ln
); 
1804             server
.vm_blocked_clients
--; 
1807     while (server
.vm_enabled 
&& listLength(c
->io_keys
)) { 
1808         ln 
= listFirst(c
->io_keys
); 
1809         dontWaitForSwappedKey(c
,ln
->value
); 
1811     listRelease(c
->io_keys
); 
1813     if (c
->flags 
& REDIS_SLAVE
) { 
1814         if (c
->replstate 
== REDIS_REPL_SEND_BULK 
&& c
->repldbfd 
!= -1) 
1816         list 
*l 
= (c
->flags 
& REDIS_MONITOR
) ? server
.monitors 
: server
.slaves
; 
1817         ln 
= listSearchKey(l
,c
); 
1818         redisAssert(ln 
!= NULL
); 
1821     if (c
->flags 
& REDIS_MASTER
) { 
1822         server
.master 
= NULL
; 
1823         server
.replstate 
= REDIS_REPL_CONNECT
; 
1827     freeClientMultiState(c
); 
1831 #define GLUEREPLY_UP_TO (1024) 
1832 static void glueReplyBuffersIfNeeded(redisClient 
*c
) { 
1834     char buf
[GLUEREPLY_UP_TO
]; 
1839     listRewind(c
->reply
,&li
); 
1840     while((ln 
= listNext(&li
))) { 
1844         objlen 
= sdslen(o
->ptr
); 
1845         if (copylen 
+ objlen 
<= GLUEREPLY_UP_TO
) { 
1846             memcpy(buf
+copylen
,o
->ptr
,objlen
); 
1848             listDelNode(c
->reply
,ln
); 
1850             if (copylen 
== 0) return; 
1854     /* Now the output buffer is empty, add the new single element */ 
1855     o 
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
)); 
1856     listAddNodeHead(c
->reply
,o
); 
1859 static void sendReplyToClient(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
1860     redisClient 
*c 
= privdata
; 
1861     int nwritten 
= 0, totwritten 
= 0, objlen
; 
1864     REDIS_NOTUSED(mask
); 
1866     /* Use writev() if we have enough buffers to send */ 
1867     if (!server
.glueoutputbuf 
&& 
1868         listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD 
&&  
1869         !(c
->flags 
& REDIS_MASTER
)) 
1871         sendReplyToClientWritev(el
, fd
, privdata
, mask
); 
1875     while(listLength(c
->reply
)) { 
1876         if (server
.glueoutputbuf 
&& listLength(c
->reply
) > 1) 
1877             glueReplyBuffersIfNeeded(c
); 
1879         o 
= listNodeValue(listFirst(c
->reply
)); 
1880         objlen 
= sdslen(o
->ptr
); 
1883             listDelNode(c
->reply
,listFirst(c
->reply
)); 
1887         if (c
->flags 
& REDIS_MASTER
) { 
1888             /* Don't reply to a master */ 
1889             nwritten 
= objlen 
- c
->sentlen
; 
1891             nwritten 
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen 
- c
->sentlen
); 
1892             if (nwritten 
<= 0) break; 
1894         c
->sentlen 
+= nwritten
; 
1895         totwritten 
+= nwritten
; 
1896         /* If we fully sent the object on head go to the next one */ 
1897         if (c
->sentlen 
== objlen
) { 
1898             listDelNode(c
->reply
,listFirst(c
->reply
)); 
1901         /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT 
1902          * bytes, in a single threaded server it's a good idea to serve 
1903          * other clients as well, even if a very large request comes from 
1904          * super fast link that is always able to accept data (in real world 
1905          * scenario think about 'KEYS *' against the loopback interfae) */ 
1906         if (totwritten 
> REDIS_MAX_WRITE_PER_EVENT
) break; 
1908     if (nwritten 
== -1) { 
1909         if (errno 
== EAGAIN
) { 
1912             redisLog(REDIS_VERBOSE
, 
1913                 "Error writing to client: %s", strerror(errno
)); 
1918     if (totwritten 
> 0) c
->lastinteraction 
= time(NULL
); 
1919     if (listLength(c
->reply
) == 0) { 
1921         aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
); 
1925 static void sendReplyToClientWritev(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) 
1927     redisClient 
*c 
= privdata
; 
1928     int nwritten 
= 0, totwritten 
= 0, objlen
, willwrite
; 
1930     struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
]; 
1931     int offset
, ion 
= 0; 
1933     REDIS_NOTUSED(mask
); 
1936     while (listLength(c
->reply
)) { 
1937         offset 
= c
->sentlen
; 
1941         /* fill-in the iov[] array */ 
1942         for(node 
= listFirst(c
->reply
); node
; node 
= listNextNode(node
)) { 
1943             o 
= listNodeValue(node
); 
1944             objlen 
= sdslen(o
->ptr
); 
1946             if (totwritten 
+ objlen 
- offset 
> REDIS_MAX_WRITE_PER_EVENT
)  
1949             if(ion 
== REDIS_WRITEV_IOVEC_COUNT
) 
1950                 break; /* no more iovecs */ 
1952             iov
[ion
].iov_base 
= ((char*)o
->ptr
) + offset
; 
1953             iov
[ion
].iov_len 
= objlen 
- offset
; 
1954             willwrite 
+= objlen 
- offset
; 
1955             offset 
= 0; /* just for the first item */ 
1962         /* write all collected blocks at once */ 
1963         if((nwritten 
= writev(fd
, iov
, ion
)) < 0) { 
1964             if (errno 
!= EAGAIN
) { 
1965                 redisLog(REDIS_VERBOSE
, 
1966                          "Error writing to client: %s", strerror(errno
)); 
1973         totwritten 
+= nwritten
; 
1974         offset 
= c
->sentlen
; 
1976         /* remove written robjs from c->reply */ 
1977         while (nwritten 
&& listLength(c
->reply
)) { 
1978             o 
= listNodeValue(listFirst(c
->reply
)); 
1979             objlen 
= sdslen(o
->ptr
); 
1981             if(nwritten 
>= objlen 
- offset
) { 
1982                 listDelNode(c
->reply
, listFirst(c
->reply
)); 
1983                 nwritten 
-= objlen 
- offset
; 
1987                 c
->sentlen 
+= nwritten
; 
1995         c
->lastinteraction 
= time(NULL
); 
1997     if (listLength(c
->reply
) == 0) { 
1999         aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
); 
2003 static struct redisCommand 
*lookupCommand(char *name
) { 
2005     while(cmdTable
[j
].name 
!= NULL
) { 
2006         if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
]; 
2012 /* resetClient prepare the client to process the next command */ 
2013 static void resetClient(redisClient 
*c
) { 
2019 /* Call() is the core of Redis execution of a command */ 
2020 static void call(redisClient 
*c
, struct redisCommand 
*cmd
) { 
2023     dirty 
= server
.dirty
; 
2025     if (server
.appendonly 
&& server
.dirty
-dirty
) 
2026         feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
); 
2027     if (server
.dirty
-dirty 
&& listLength(server
.slaves
)) 
2028         replicationFeedSlaves(server
.slaves
,cmd
,c
->db
->id
,c
->argv
,c
->argc
); 
2029     if (listLength(server
.monitors
)) 
2030         replicationFeedSlaves(server
.monitors
,cmd
,c
->db
->id
,c
->argv
,c
->argc
); 
2031     server
.stat_numcommands
++; 
2034 /* If this function gets called we already read a whole 
2035  * command, argments are in the client argv/argc fields. 
2036  * processCommand() execute the command or prepare the 
2037  * server for a bulk read from the client. 
2039  * If 1 is returned the client is still alive and valid and 
2040  * and other operations can be performed by the caller. Otherwise 
2041  * if 0 is returned the client was destroied (i.e. after QUIT). */ 
2042 static int processCommand(redisClient 
*c
) { 
2043     struct redisCommand 
*cmd
; 
2045     /* Free some memory if needed (maxmemory setting) */ 
2046     if (server
.maxmemory
) freeMemoryIfNeeded(); 
2048     /* Handle the multi bulk command type. This is an alternative protocol 
2049      * supported by Redis in order to receive commands that are composed of 
2050      * multiple binary-safe "bulk" arguments. The latency of processing is 
2051      * a bit higher but this allows things like multi-sets, so if this 
2052      * protocol is used only for MSET and similar commands this is a big win. */ 
2053     if (c
->multibulk 
== 0 && c
->argc 
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') { 
2054         c
->multibulk 
= atoi(((char*)c
->argv
[0]->ptr
)+1); 
2055         if (c
->multibulk 
<= 0) { 
2059             decrRefCount(c
->argv
[c
->argc
-1]); 
2063     } else if (c
->multibulk
) { 
2064         if (c
->bulklen 
== -1) { 
2065             if (((char*)c
->argv
[0]->ptr
)[0] != '$') { 
2066                 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n")); 
2070                 int bulklen 
= atoi(((char*)c
->argv
[0]->ptr
)+1); 
2071                 decrRefCount(c
->argv
[0]); 
2072                 if (bulklen 
< 0 || bulklen 
> 1024*1024*1024) { 
2074                     addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n")); 
2079                 c
->bulklen 
= bulklen
+2; /* add two bytes for CR+LF */ 
2083             c
->mbargv 
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1)); 
2084             c
->mbargv
[c
->mbargc
] = c
->argv
[0]; 
2088             if (c
->multibulk 
== 0) { 
2092                 /* Here we need to swap the multi-bulk argc/argv with the 
2093                  * normal argc/argv of the client structure. */ 
2095                 c
->argv 
= c
->mbargv
; 
2096                 c
->mbargv 
= auxargv
; 
2099                 c
->argc 
= c
->mbargc
; 
2100                 c
->mbargc 
= auxargc
; 
2102                 /* We need to set bulklen to something different than -1 
2103                  * in order for the code below to process the command without 
2104                  * to try to read the last argument of a bulk command as 
2105                  * a special argument. */ 
2107                 /* continue below and process the command */ 
2114     /* -- end of multi bulk commands processing -- */ 
2116     /* The QUIT command is handled as a special case. Normal command 
2117      * procs are unable to close the client connection safely */ 
2118     if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) { 
2123     /* Now lookup the command and check ASAP about trivial error conditions 
2124      * such wrong arity, bad command name and so forth. */ 
2125     cmd 
= lookupCommand(c
->argv
[0]->ptr
); 
2128             sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n", 
2129                 (char*)c
->argv
[0]->ptr
)); 
2132     } else if ((cmd
->arity 
> 0 && cmd
->arity 
!= c
->argc
) || 
2133                (c
->argc 
< -cmd
->arity
)) { 
2135             sdscatprintf(sdsempty(), 
2136                 "-ERR wrong number of arguments for '%s' command\r\n", 
2140     } else if (server
.maxmemory 
&& cmd
->flags 
& REDIS_CMD_DENYOOM 
&& zmalloc_used_memory() > server
.maxmemory
) { 
2141         addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n")); 
2144     } else if (cmd
->flags 
& REDIS_CMD_BULK 
&& c
->bulklen 
== -1) { 
2145         /* This is a bulk command, we have to read the last argument yet. */ 
2146         int bulklen 
= atoi(c
->argv
[c
->argc
-1]->ptr
); 
2148         decrRefCount(c
->argv
[c
->argc
-1]); 
2149         if (bulklen 
< 0 || bulklen 
> 1024*1024*1024) { 
2151             addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n")); 
2156         c
->bulklen 
= bulklen
+2; /* add two bytes for CR+LF */ 
2157         /* It is possible that the bulk read is already in the 
2158          * buffer. Check this condition and handle it accordingly. 
2159          * This is just a fast path, alternative to call processInputBuffer(). 
2160          * It's a good idea since the code is small and this condition 
2161          * happens most of the times. */ 
2162         if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) { 
2163             c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2); 
2165             c
->querybuf 
= sdsrange(c
->querybuf
,c
->bulklen
,-1); 
2167             /* Otherwise return... there is to read the last argument 
2168              * from the socket. */ 
2172     /* Let's try to share objects on the command arguments vector */ 
2173     if (server
.shareobjects
) { 
2175         for(j 
= 1; j 
< c
->argc
; j
++) 
2176             c
->argv
[j
] = tryObjectSharing(c
->argv
[j
]); 
2178     /* Let's try to encode the bulk object to save space. */ 
2179     if (cmd
->flags 
& REDIS_CMD_BULK
) 
2180         tryObjectEncoding(c
->argv
[c
->argc
-1]); 
2182     /* Check if the user is authenticated */ 
2183     if (server
.requirepass 
&& !c
->authenticated 
&& cmd
->proc 
!= authCommand
) { 
2184         addReplySds(c
,sdsnew("-ERR operation not permitted\r\n")); 
2189     /* Exec the command */ 
2190     if (c
->flags 
& REDIS_MULTI 
&& cmd
->proc 
!= execCommand 
&& cmd
->proc 
!= discardCommand
) { 
2191         queueMultiCommand(c
,cmd
); 
2192         addReply(c
,shared
.queued
); 
2194         if (server
.vm_enabled 
&& server
.vm_max_threads 
> 0 && 
2195             blockClientOnSwappedKeys(cmd
,c
)) return 1; 
2199     /* Prepare the client for the next command */ 
2204 static void replicationFeedSlaves(list 
*slaves
, struct redisCommand 
*cmd
, int dictid
, robj 
**argv
, int argc
) { 
2209     /* (args*2)+1 is enough room for args, spaces, newlines */ 
2210     robj 
*static_outv
[REDIS_STATIC_ARGS
*2+1]; 
2212     if (argc 
<= REDIS_STATIC_ARGS
) { 
2215         outv 
= zmalloc(sizeof(robj
*)*(argc
*2+1)); 
2218     for (j 
= 0; j 
< argc
; j
++) { 
2219         if (j 
!= 0) outv
[outc
++] = shared
.space
; 
2220         if ((cmd
->flags 
& REDIS_CMD_BULK
) && j 
== argc
-1) { 
2223             lenobj 
= createObject(REDIS_STRING
, 
2224                 sdscatprintf(sdsempty(),"%lu\r\n", 
2225                     (unsigned long) stringObjectLen(argv
[j
]))); 
2226             lenobj
->refcount 
= 0; 
2227             outv
[outc
++] = lenobj
; 
2229         outv
[outc
++] = argv
[j
]; 
2231     outv
[outc
++] = shared
.crlf
; 
2233     /* Increment all the refcounts at start and decrement at end in order to 
2234      * be sure to free objects if there is no slave in a replication state 
2235      * able to be feed with commands */ 
2236     for (j 
= 0; j 
< outc
; j
++) incrRefCount(outv
[j
]); 
2237     listRewind(slaves
,&li
); 
2238     while((ln 
= listNext(&li
))) { 
2239         redisClient 
*slave 
= ln
->value
; 
2241         /* Don't feed slaves that are still waiting for BGSAVE to start */ 
2242         if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_START
) continue; 
2244         /* Feed all the other slaves, MONITORs and so on */ 
2245         if (slave
->slaveseldb 
!= dictid
) { 
2249             case 0: selectcmd 
= shared
.select0
; break; 
2250             case 1: selectcmd 
= shared
.select1
; break; 
2251             case 2: selectcmd 
= shared
.select2
; break; 
2252             case 3: selectcmd 
= shared
.select3
; break; 
2253             case 4: selectcmd 
= shared
.select4
; break; 
2254             case 5: selectcmd 
= shared
.select5
; break; 
2255             case 6: selectcmd 
= shared
.select6
; break; 
2256             case 7: selectcmd 
= shared
.select7
; break; 
2257             case 8: selectcmd 
= shared
.select8
; break; 
2258             case 9: selectcmd 
= shared
.select9
; break; 
2260                 selectcmd 
= createObject(REDIS_STRING
, 
2261                     sdscatprintf(sdsempty(),"select %d\r\n",dictid
)); 
2262                 selectcmd
->refcount 
= 0; 
2265             addReply(slave
,selectcmd
); 
2266             slave
->slaveseldb 
= dictid
; 
2268         for (j 
= 0; j 
< outc
; j
++) addReply(slave
,outv
[j
]); 
2270     for (j 
= 0; j 
< outc
; j
++) decrRefCount(outv
[j
]); 
2271     if (outv 
!= static_outv
) zfree(outv
); 
2274 static void processInputBuffer(redisClient 
*c
) { 
2276     /* Before to process the input buffer, make sure the client is not 
2277      * waitig for a blocking operation such as BLPOP. Note that the first 
2278      * iteration the client is never blocked, otherwise the processInputBuffer 
2279      * would not be called at all, but after the execution of the first commands 
2280      * in the input buffer the client may be blocked, and the "goto again" 
2281      * will try to reiterate. The following line will make it return asap. */ 
2282     if (c
->flags 
& REDIS_BLOCKED 
|| c
->flags 
& REDIS_IO_WAIT
) return; 
2283     if (c
->bulklen 
== -1) { 
2284         /* Read the first line of the query */ 
2285         char *p 
= strchr(c
->querybuf
,'\n'); 
2292             query 
= c
->querybuf
; 
2293             c
->querybuf 
= sdsempty(); 
2294             querylen 
= 1+(p
-(query
)); 
2295             if (sdslen(query
) > querylen
) { 
2296                 /* leave data after the first line of the query in the buffer */ 
2297                 c
->querybuf 
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
); 
2299             *p 
= '\0'; /* remove "\n" */ 
2300             if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */ 
2301             sdsupdatelen(query
); 
2303             /* Now we can split the query in arguments */ 
2304             argv 
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
); 
2307             if (c
->argv
) zfree(c
->argv
); 
2308             c
->argv 
= zmalloc(sizeof(robj
*)*argc
); 
2310             for (j 
= 0; j 
< argc
; j
++) { 
2311                 if (sdslen(argv
[j
])) { 
2312                     c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]); 
2320                 /* Execute the command. If the client is still valid 
2321                  * after processCommand() return and there is something 
2322                  * on the query buffer try to process the next command. */ 
2323                 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
; 
2325                 /* Nothing to process, argc == 0. Just process the query 
2326                  * buffer if it's not empty or return to the caller */ 
2327                 if (sdslen(c
->querybuf
)) goto again
; 
2330         } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) { 
2331             redisLog(REDIS_VERBOSE
, "Client protocol error"); 
2336         /* Bulk read handling. Note that if we are at this point 
2337            the client already sent a command terminated with a newline, 
2338            we are reading the bulk data that is actually the last 
2339            argument of the command. */ 
2340         int qbl 
= sdslen(c
->querybuf
); 
2342         if (c
->bulklen 
<= qbl
) { 
2343             /* Copy everything but the final CRLF as final argument */ 
2344             c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2); 
2346             c
->querybuf 
= sdsrange(c
->querybuf
,c
->bulklen
,-1); 
2347             /* Process the command. If the client is still valid after 
2348              * the processing and there is more data in the buffer 
2349              * try to parse it. */ 
2350             if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
; 
2356 static void readQueryFromClient(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
2357     redisClient 
*c 
= (redisClient
*) privdata
; 
2358     char buf
[REDIS_IOBUF_LEN
]; 
2361     REDIS_NOTUSED(mask
); 
2363     nread 
= read(fd
, buf
, REDIS_IOBUF_LEN
); 
2365         if (errno 
== EAGAIN
) { 
2368             redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
)); 
2372     } else if (nread 
== 0) { 
2373         redisLog(REDIS_VERBOSE
, "Client closed connection"); 
2378         c
->querybuf 
= sdscatlen(c
->querybuf
, buf
, nread
); 
2379         c
->lastinteraction 
= time(NULL
); 
2383     if (!(c
->flags 
& REDIS_BLOCKED
)) 
2384         processInputBuffer(c
); 
2387 static int selectDb(redisClient 
*c
, int id
) { 
2388     if (id 
< 0 || id 
>= server
.dbnum
) 
2390     c
->db 
= &server
.db
[id
]; 
2394 static void *dupClientReplyValue(void *o
) { 
2395     incrRefCount((robj
*)o
); 
2399 static redisClient 
*createClient(int fd
) { 
2400     redisClient 
*c 
= zmalloc(sizeof(*c
)); 
2402     anetNonBlock(NULL
,fd
); 
2403     anetTcpNoDelay(NULL
,fd
); 
2404     if (!c
) return NULL
; 
2407     c
->querybuf 
= sdsempty(); 
2416     c
->lastinteraction 
= time(NULL
); 
2417     c
->authenticated 
= 0; 
2418     c
->replstate 
= REDIS_REPL_NONE
; 
2419     c
->reply 
= listCreate(); 
2420     listSetFreeMethod(c
->reply
,decrRefCount
); 
2421     listSetDupMethod(c
->reply
,dupClientReplyValue
); 
2422     c
->blockingkeys 
= NULL
; 
2423     c
->blockingkeysnum 
= 0; 
2424     c
->io_keys 
= listCreate(); 
2425     listSetFreeMethod(c
->io_keys
,decrRefCount
); 
2426     if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
, 
2427         readQueryFromClient
, c
) == AE_ERR
) { 
2431     listAddNodeTail(server
.clients
,c
); 
2432     initClientMultiState(c
); 
2436 static void addReply(redisClient 
*c
, robj 
*obj
) { 
2437     if (listLength(c
->reply
) == 0 && 
2438         (c
->replstate 
== REDIS_REPL_NONE 
|| 
2439          c
->replstate 
== REDIS_REPL_ONLINE
) && 
2440         aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
, 
2441         sendReplyToClient
, c
) == AE_ERR
) return; 
2443     if (server
.vm_enabled 
&& obj
->storage 
!= REDIS_VM_MEMORY
) { 
2444         obj 
= dupStringObject(obj
); 
2445         obj
->refcount 
= 0; /* getDecodedObject() will increment the refcount */ 
2447     listAddNodeTail(c
->reply
,getDecodedObject(obj
)); 
2450 static void addReplySds(redisClient 
*c
, sds s
) { 
2451     robj 
*o 
= createObject(REDIS_STRING
,s
); 
2456 static void addReplyDouble(redisClient 
*c
, double d
) { 
2459     snprintf(buf
,sizeof(buf
),"%.17g",d
); 
2460     addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n", 
2461         (unsigned long) strlen(buf
),buf
)); 
2464 static void addReplyLong(redisClient 
*c
, long l
) { 
2468     len 
= snprintf(buf
,sizeof(buf
),":%ld\r\n",l
); 
2469     addReplySds(c
,sdsnewlen(buf
,len
)); 
2472 static void addReplyBulkLen(redisClient 
*c
, robj 
*obj
) { 
2475     if (obj
->encoding 
== REDIS_ENCODING_RAW
) { 
2476         len 
= sdslen(obj
->ptr
); 
2478         long n 
= (long)obj
->ptr
; 
2480         /* Compute how many bytes will take this integer as a radix 10 string */ 
2486         while((n 
= n
/10) != 0) { 
2490     addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",(unsigned long)len
)); 
2493 static void acceptHandler(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
2498     REDIS_NOTUSED(mask
); 
2499     REDIS_NOTUSED(privdata
); 
2501     cfd 
= anetAccept(server
.neterr
, fd
, cip
, &cport
); 
2502     if (cfd 
== AE_ERR
) { 
2503         redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
); 
2506     redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
); 
2507     if ((c 
= createClient(cfd
)) == NULL
) { 
2508         redisLog(REDIS_WARNING
,"Error allocating resoures for the client"); 
2509         close(cfd
); /* May be already closed, just ingore errors */ 
2512     /* If maxclient directive is set and this is one client more... close the 
2513      * connection. Note that we create the client instead to check before 
2514      * for this condition, since now the socket is already set in nonblocking 
2515      * mode and we can send an error for free using the Kernel I/O */ 
2516     if (server
.maxclients 
&& listLength(server
.clients
) > server
.maxclients
) { 
2517         char *err 
= "-ERR max number of clients reached\r\n"; 
2519         /* That's a best effort error message, don't check write errors */ 
2520         if (write(c
->fd
,err
,strlen(err
)) == -1) { 
2521             /* Nothing to do, Just to avoid the warning... */ 
2526     server
.stat_numconnections
++; 
2529 /* ======================= Redis objects implementation ===================== */ 
2531 static robj 
*createObject(int type
, void *ptr
) { 
2534     if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
); 
2535     if (listLength(server
.objfreelist
)) { 
2536         listNode 
*head 
= listFirst(server
.objfreelist
); 
2537         o 
= listNodeValue(head
); 
2538         listDelNode(server
.objfreelist
,head
); 
2539         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
2541         if (server
.vm_enabled
) { 
2542             pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
2543             o 
= zmalloc(sizeof(*o
)); 
2545             o 
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
)); 
2549     o
->encoding 
= REDIS_ENCODING_RAW
; 
2552     if (server
.vm_enabled
) { 
2553         /* Note that this code may run in the context of an I/O thread 
2554          * and accessing to server.unixtime in theory is an error 
2555          * (no locks). But in practice this is safe, and even if we read 
2556          * garbage Redis will not fail, as it's just a statistical info */ 
2557         o
->vm
.atime 
= server
.unixtime
; 
2558         o
->storage 
= REDIS_VM_MEMORY
; 
2563 static robj 
*createStringObject(char *ptr
, size_t len
) { 
2564     return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
)); 
2567 static robj 
*dupStringObject(robj 
*o
) { 
2568     assert(o
->encoding 
== REDIS_ENCODING_RAW
); 
2569     return createStringObject(o
->ptr
,sdslen(o
->ptr
)); 
2572 static robj 
*createListObject(void) { 
2573     list 
*l 
= listCreate(); 
2575     listSetFreeMethod(l
,decrRefCount
); 
2576     return createObject(REDIS_LIST
,l
); 
2579 static robj 
*createSetObject(void) { 
2580     dict 
*d 
= dictCreate(&setDictType
,NULL
); 
2581     return createObject(REDIS_SET
,d
); 
2584 static robj 
*createHashObject(void) { 
2585     /* All the Hashes start as zipmaps. Will be automatically converted 
2586      * into hash tables if there are enough elements or big elements 
2588     unsigned char *zm 
= zipmapNew(); 
2589     robj 
*o 
= createObject(REDIS_HASH
,zm
); 
2590     o
->encoding 
= REDIS_ENCODING_ZIPMAP
; 
2594 static robj 
*createZsetObject(void) { 
2595     zset 
*zs 
= zmalloc(sizeof(*zs
)); 
2597     zs
->dict 
= dictCreate(&zsetDictType
,NULL
); 
2598     zs
->zsl 
= zslCreate(); 
2599     return createObject(REDIS_ZSET
,zs
); 
2602 static void freeStringObject(robj 
*o
) { 
2603     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
2608 static void freeListObject(robj 
*o
) { 
2609     listRelease((list
*) o
->ptr
); 
2612 static void freeSetObject(robj 
*o
) { 
2613     dictRelease((dict
*) o
->ptr
); 
2616 static void freeZsetObject(robj 
*o
) { 
2619     dictRelease(zs
->dict
); 
2624 static void freeHashObject(robj 
*o
) { 
2625     switch (o
->encoding
) { 
2626     case REDIS_ENCODING_HT
: 
2627         dictRelease((dict
*) o
->ptr
); 
2629     case REDIS_ENCODING_ZIPMAP
: 
2638 static void incrRefCount(robj 
*o
) { 
2639     redisAssert(!server
.vm_enabled 
|| o
->storage 
== REDIS_VM_MEMORY
); 
2643 static void decrRefCount(void *obj
) { 
2646     /* Object is a key of a swapped out value, or in the process of being 
2648     if (server
.vm_enabled 
&& 
2649         (o
->storage 
== REDIS_VM_SWAPPED 
|| o
->storage 
== REDIS_VM_LOADING
)) 
2651         if (o
->storage 
== REDIS_VM_SWAPPED 
|| o
->storage 
== REDIS_VM_LOADING
) { 
2652             redisAssert(o
->refcount 
== 1); 
2654         if (o
->storage 
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
); 
2655         redisAssert(o
->type 
== REDIS_STRING
); 
2656         freeStringObject(o
); 
2657         vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
); 
2658         pthread_mutex_lock(&server
.obj_freelist_mutex
); 
2659         if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX 
|| 
2660             !listAddNodeHead(server
.objfreelist
,o
)) 
2662         pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
2663         server
.vm_stats_swapped_objects
--; 
2666     /* Object is in memory, or in the process of being swapped out. */ 
2667     if (--(o
->refcount
) == 0) { 
2668         if (server
.vm_enabled 
&& o
->storage 
== REDIS_VM_SWAPPING
) 
2669             vmCancelThreadedIOJob(obj
); 
2671         case REDIS_STRING
: freeStringObject(o
); break; 
2672         case REDIS_LIST
: freeListObject(o
); break; 
2673         case REDIS_SET
: freeSetObject(o
); break; 
2674         case REDIS_ZSET
: freeZsetObject(o
); break; 
2675         case REDIS_HASH
: freeHashObject(o
); break; 
2676         default: redisAssert(0 != 0); break; 
2678         if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
); 
2679         if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX 
|| 
2680             !listAddNodeHead(server
.objfreelist
,o
)) 
2682         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
2686 static robj 
*lookupKey(redisDb 
*db
, robj 
*key
) { 
2687     dictEntry 
*de 
= dictFind(db
->dict
,key
); 
2689         robj 
*key 
= dictGetEntryKey(de
); 
2690         robj 
*val 
= dictGetEntryVal(de
); 
2692         if (server
.vm_enabled
) { 
2693             if (key
->storage 
== REDIS_VM_MEMORY 
|| 
2694                 key
->storage 
== REDIS_VM_SWAPPING
) 
2696                 /* If we were swapping the object out, stop it, this key 
2698                 if (key
->storage 
== REDIS_VM_SWAPPING
) 
2699                     vmCancelThreadedIOJob(key
); 
2700                 /* Update the access time of the key for the aging algorithm. */ 
2701                 key
->vm
.atime 
= server
.unixtime
; 
2703                 int notify 
= (key
->storage 
== REDIS_VM_LOADING
); 
2705                 /* Our value was swapped on disk. Bring it at home. */ 
2706                 redisAssert(val 
== NULL
); 
2707                 val 
= vmLoadObject(key
); 
2708                 dictGetEntryVal(de
) = val
; 
2710                 /* Clients blocked by the VM subsystem may be waiting for 
2712                 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
); 
2721 static robj 
*lookupKeyRead(redisDb 
*db
, robj 
*key
) { 
2722     expireIfNeeded(db
,key
); 
2723     return lookupKey(db
,key
); 
2726 static robj 
*lookupKeyWrite(redisDb 
*db
, robj 
*key
) { 
2727     deleteIfVolatile(db
,key
); 
2728     return lookupKey(db
,key
); 
2731 static int deleteKey(redisDb 
*db
, robj 
*key
) { 
2734     /* We need to protect key from destruction: after the first dictDelete() 
2735      * it may happen that 'key' is no longer valid if we don't increment 
2736      * it's count. This may happen when we get the object reference directly 
2737      * from the hash table with dictRandomKey() or dict iterators */ 
2739     if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
); 
2740     retval 
= dictDelete(db
->dict
,key
); 
2743     return retval 
== DICT_OK
; 
2746 /* Try to share an object against the shared objects pool */ 
2747 static robj 
*tryObjectSharing(robj 
*o
) { 
2748     struct dictEntry 
*de
; 
2751     if (o 
== NULL 
|| server
.shareobjects 
== 0) return o
; 
2753     redisAssert(o
->type 
== REDIS_STRING
); 
2754     de 
= dictFind(server
.sharingpool
,o
); 
2756         robj 
*shared 
= dictGetEntryKey(de
); 
2758         c 
= ((unsigned long) dictGetEntryVal(de
))+1; 
2759         dictGetEntryVal(de
) = (void*) c
; 
2760         incrRefCount(shared
); 
2764         /* Here we are using a stream algorihtm: Every time an object is 
2765          * shared we increment its count, everytime there is a miss we 
2766          * recrement the counter of a random object. If this object reaches 
2767          * zero we remove the object and put the current object instead. */ 
2768         if (dictSize(server
.sharingpool
) >= 
2769                 server
.sharingpoolsize
) { 
2770             de 
= dictGetRandomKey(server
.sharingpool
); 
2771             redisAssert(de 
!= NULL
); 
2772             c 
= ((unsigned long) dictGetEntryVal(de
))-1; 
2773             dictGetEntryVal(de
) = (void*) c
; 
2775                 dictDelete(server
.sharingpool
,de
->key
); 
2778             c 
= 0; /* If the pool is empty we want to add this object */ 
2783             retval 
= dictAdd(server
.sharingpool
,o
,(void*)1); 
2784             redisAssert(retval 
== DICT_OK
); 
2791 /* Check if the nul-terminated string 's' can be represented by a long 
2792  * (that is, is a number that fits into long without any other space or 
2793  * character before or after the digits). 
2795  * If so, the function returns REDIS_OK and *longval is set to the value 
2796  * of the number. Otherwise REDIS_ERR is returned */ 
2797 static int isStringRepresentableAsLong(sds s
, long *longval
) { 
2798     char buf
[32], *endptr
; 
2802     value 
= strtol(s
, &endptr
, 10); 
2803     if (endptr
[0] != '\0') return REDIS_ERR
; 
2804     slen 
= snprintf(buf
,32,"%ld",value
); 
2806     /* If the number converted back into a string is not identical 
2807      * then it's not possible to encode the string as integer */ 
2808     if (sdslen(s
) != (unsigned)slen 
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
; 
2809     if (longval
) *longval 
= value
; 
2813 /* Try to encode a string object in order to save space */ 
2814 static int tryObjectEncoding(robj 
*o
) { 
2818     if (o
->encoding 
!= REDIS_ENCODING_RAW
) 
2819         return REDIS_ERR
; /* Already encoded */ 
2821     /* It's not save to encode shared objects: shared objects can be shared 
2822      * everywhere in the "object space" of Redis. Encoded objects can only 
2823      * appear as "values" (and not, for instance, as keys) */ 
2824      if (o
->refcount 
> 1) return REDIS_ERR
; 
2826     /* Currently we try to encode only strings */ 
2827     redisAssert(o
->type 
== REDIS_STRING
); 
2829     /* Check if we can represent this string as a long integer */ 
2830     if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return REDIS_ERR
; 
2832     /* Ok, this object can be encoded */ 
2833     o
->encoding 
= REDIS_ENCODING_INT
; 
2835     o
->ptr 
= (void*) value
; 
2839 /* Get a decoded version of an encoded object (returned as a new object). 
2840  * If the object is already raw-encoded just increment the ref count. */ 
2841 static robj 
*getDecodedObject(robj 
*o
) { 
2844     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
2848     if (o
->type 
== REDIS_STRING 
&& o
->encoding 
== REDIS_ENCODING_INT
) { 
2851         snprintf(buf
,32,"%ld",(long)o
->ptr
); 
2852         dec 
= createStringObject(buf
,strlen(buf
)); 
2855         redisAssert(1 != 1); 
2859 /* Compare two string objects via strcmp() or alike. 
2860  * Note that the objects may be integer-encoded. In such a case we 
2861  * use snprintf() to get a string representation of the numbers on the stack 
2862  * and compare the strings, it's much faster than calling getDecodedObject(). 
2864  * Important note: if objects are not integer encoded, but binary-safe strings, 
2865  * sdscmp() from sds.c will apply memcmp() so this function ca be considered 
2867 static int compareStringObjects(robj 
*a
, robj 
*b
) { 
2868     redisAssert(a
->type 
== REDIS_STRING 
&& b
->type 
== REDIS_STRING
); 
2869     char bufa
[128], bufb
[128], *astr
, *bstr
; 
2872     if (a 
== b
) return 0; 
2873     if (a
->encoding 
!= REDIS_ENCODING_RAW
) { 
2874         snprintf(bufa
,sizeof(bufa
),"%ld",(long) a
->ptr
); 
2880     if (b
->encoding 
!= REDIS_ENCODING_RAW
) { 
2881         snprintf(bufb
,sizeof(bufb
),"%ld",(long) b
->ptr
); 
2887     return bothsds 
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
); 
2890 static size_t stringObjectLen(robj 
*o
) { 
2891     redisAssert(o
->type 
== REDIS_STRING
); 
2892     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
2893         return sdslen(o
->ptr
); 
2897         return snprintf(buf
,32,"%ld",(long)o
->ptr
); 
2901 /*============================ RDB saving/loading =========================== */ 
2903 static int rdbSaveType(FILE *fp
, unsigned char type
) { 
2904     if (fwrite(&type
,1,1,fp
) == 0) return -1; 
2908 static int rdbSaveTime(FILE *fp
, time_t t
) { 
2909     int32_t t32 
= (int32_t) t
; 
2910     if (fwrite(&t32
,4,1,fp
) == 0) return -1; 
2914 /* check rdbLoadLen() comments for more info */ 
2915 static int rdbSaveLen(FILE *fp
, uint32_t len
) { 
2916     unsigned char buf
[2]; 
2919         /* Save a 6 bit len */ 
2920         buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6); 
2921         if (fwrite(buf
,1,1,fp
) == 0) return -1; 
2922     } else if (len 
< (1<<14)) { 
2923         /* Save a 14 bit len */ 
2924         buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6); 
2926         if (fwrite(buf
,2,1,fp
) == 0) return -1; 
2928         /* Save a 32 bit len */ 
2929         buf
[0] = (REDIS_RDB_32BITLEN
<<6); 
2930         if (fwrite(buf
,1,1,fp
) == 0) return -1; 
2932         if (fwrite(&len
,4,1,fp
) == 0) return -1; 
2937 /* String objects in the form "2391" "-100" without any space and with a 
2938  * range of values that can fit in an 8, 16 or 32 bit signed value can be 
2939  * encoded as integers to save space */ 
2940 static int rdbTryIntegerEncoding(sds s
, unsigned char *enc
) { 
2942     char *endptr
, buf
[32]; 
2944     /* Check if it's possible to encode this value as a number */ 
2945     value 
= strtoll(s
, &endptr
, 10); 
2946     if (endptr
[0] != '\0') return 0; 
2947     snprintf(buf
,32,"%lld",value
); 
2949     /* If the number converted back into a string is not identical 
2950      * then it's not possible to encode the string as integer */ 
2951     if (strlen(buf
) != sdslen(s
) || memcmp(buf
,s
,sdslen(s
))) return 0; 
2953     /* Finally check if it fits in our ranges */ 
2954     if (value 
>= -(1<<7) && value 
<= (1<<7)-1) { 
2955         enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
; 
2956         enc
[1] = value
&0xFF; 
2958     } else if (value 
>= -(1<<15) && value 
<= (1<<15)-1) { 
2959         enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
; 
2960         enc
[1] = value
&0xFF; 
2961         enc
[2] = (value
>>8)&0xFF; 
2963     } else if (value 
>= -((long long)1<<31) && value 
<= ((long long)1<<31)-1) { 
2964         enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
; 
2965         enc
[1] = value
&0xFF; 
2966         enc
[2] = (value
>>8)&0xFF; 
2967         enc
[3] = (value
>>16)&0xFF; 
2968         enc
[4] = (value
>>24)&0xFF; 
2975 static int rdbSaveLzfStringObject(FILE *fp
, robj 
*obj
) { 
2976     unsigned int comprlen
, outlen
; 
2980     /* We require at least four bytes compression for this to be worth it */ 
2981     outlen 
= sdslen(obj
->ptr
)-4; 
2982     if (outlen 
<= 0) return 0; 
2983     if ((out 
= zmalloc(outlen
+1)) == NULL
) return 0; 
2984     comprlen 
= lzf_compress(obj
->ptr
, sdslen(obj
->ptr
), out
, outlen
); 
2985     if (comprlen 
== 0) { 
2989     /* Data compressed! Let's save it on disk */ 
2990     byte 
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
; 
2991     if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
; 
2992     if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
; 
2993     if (rdbSaveLen(fp
,sdslen(obj
->ptr
)) == -1) goto writeerr
; 
2994     if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
; 
3003 /* Save a string objet as [len][data] on disk. If the object is a string 
3004  * representation of an integer value we try to safe it in a special form */ 
3005 static int rdbSaveStringObjectRaw(FILE *fp
, robj 
*obj
) { 
3009     len 
= sdslen(obj
->ptr
); 
3011     /* Try integer encoding */ 
3013         unsigned char buf
[5]; 
3014         if ((enclen 
= rdbTryIntegerEncoding(obj
->ptr
,buf
)) > 0) { 
3015             if (fwrite(buf
,enclen
,1,fp
) == 0) return -1; 
3020     /* Try LZF compression - under 20 bytes it's unable to compress even 
3021      * aaaaaaaaaaaaaaaaaa so skip it */ 
3022     if (server
.rdbcompression 
&& len 
> 20) { 
3025         retval 
= rdbSaveLzfStringObject(fp
,obj
); 
3026         if (retval 
== -1) return -1; 
3027         if (retval 
> 0) return 0; 
3028         /* retval == 0 means data can't be compressed, save the old way */ 
3031     /* Store verbatim */ 
3032     if (rdbSaveLen(fp
,len
) == -1) return -1; 
3033     if (len 
&& fwrite(obj
->ptr
,len
,1,fp
) == 0) return -1; 
3037 /* Like rdbSaveStringObjectRaw() but handle encoded objects */ 
3038 static int rdbSaveStringObject(FILE *fp
, robj 
*obj
) { 
3041     /* Avoid incr/decr ref count business when possible. 
3042      * This plays well with copy-on-write given that we are probably 
3043      * in a child process (BGSAVE). Also this makes sure key objects 
3044      * of swapped objects are not incRefCount-ed (an assert does not allow 
3045      * this in order to avoid bugs) */ 
3046     if (obj
->encoding 
!= REDIS_ENCODING_RAW
) { 
3047         obj 
= getDecodedObject(obj
); 
3048         retval 
= rdbSaveStringObjectRaw(fp
,obj
); 
3051         retval 
= rdbSaveStringObjectRaw(fp
,obj
); 
3056 /* Save a double value. Doubles are saved as strings prefixed by an unsigned 
3057  * 8 bit integer specifing the length of the representation. 
3058  * This 8 bit integer has special values in order to specify the following 
3064 static int rdbSaveDoubleValue(FILE *fp
, double val
) { 
3065     unsigned char buf
[128]; 
3071     } else if (!isfinite(val
)) { 
3073         buf
[0] = (val 
< 0) ? 255 : 254; 
3075         snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
); 
3076         buf
[0] = strlen((char*)buf
+1); 
3079     if (fwrite(buf
,len
,1,fp
) == 0) return -1; 
3083 /* Save a Redis object. */ 
3084 static int rdbSaveObject(FILE *fp
, robj 
*o
) { 
3085     if (o
->type 
== REDIS_STRING
) { 
3086         /* Save a string value */ 
3087         if (rdbSaveStringObject(fp
,o
) == -1) return -1; 
3088     } else if (o
->type 
== REDIS_LIST
) { 
3089         /* Save a list value */ 
3090         list 
*list 
= o
->ptr
; 
3094         if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1; 
3095         listRewind(list
,&li
); 
3096         while((ln 
= listNext(&li
))) { 
3097             robj 
*eleobj 
= listNodeValue(ln
); 
3099             if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1; 
3101     } else if (o
->type 
== REDIS_SET
) { 
3102         /* Save a set value */ 
3104         dictIterator 
*di 
= dictGetIterator(set
); 
3107         if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1; 
3108         while((de 
= dictNext(di
)) != NULL
) { 
3109             robj 
*eleobj 
= dictGetEntryKey(de
); 
3111             if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1; 
3113         dictReleaseIterator(di
); 
3114     } else if (o
->type 
== REDIS_ZSET
) { 
3115         /* Save a set value */ 
3117         dictIterator 
*di 
= dictGetIterator(zs
->dict
); 
3120         if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1; 
3121         while((de 
= dictNext(di
)) != NULL
) { 
3122             robj 
*eleobj 
= dictGetEntryKey(de
); 
3123             double *score 
= dictGetEntryVal(de
); 
3125             if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1; 
3126             if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1; 
3128         dictReleaseIterator(di
); 
3130         redisAssert(0 != 0); 
3135 /* Return the length the object will have on disk if saved with 
3136  * the rdbSaveObject() function. Currently we use a trick to get 
3137  * this length with very little changes to the code. In the future 
3138  * we could switch to a faster solution. */ 
3139 static off_t 
rdbSavedObjectLen(robj 
*o
, FILE *fp
) { 
3140     if (fp 
== NULL
) fp 
= server
.devnull
; 
3142     assert(rdbSaveObject(fp
,o
) != 1); 
3146 /* Return the number of pages required to save this object in the swap file */ 
3147 static off_t 
rdbSavedObjectPages(robj 
*o
, FILE *fp
) { 
3148     off_t bytes 
= rdbSavedObjectLen(o
,fp
); 
3150     return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
; 
3153 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */ 
3154 static int rdbSave(char *filename
) { 
3155     dictIterator 
*di 
= NULL
; 
3160     time_t now 
= time(NULL
); 
3162     /* Wait for I/O therads to terminate, just in case this is a 
3163      * foreground-saving, to avoid seeking the swap file descriptor at the 
3165     if (server
.vm_enabled
) 
3166         waitEmptyIOJobsQueue(); 
3168     snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid()); 
3169     fp 
= fopen(tmpfile
,"w"); 
3171         redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
)); 
3174     if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
; 
3175     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
3176         redisDb 
*db 
= server
.db
+j
; 
3178         if (dictSize(d
) == 0) continue; 
3179         di 
= dictGetIterator(d
); 
3185         /* Write the SELECT DB opcode */ 
3186         if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
; 
3187         if (rdbSaveLen(fp
,j
) == -1) goto werr
; 
3189         /* Iterate this DB writing every entry */ 
3190         while((de 
= dictNext(di
)) != NULL
) { 
3191             robj 
*key 
= dictGetEntryKey(de
); 
3192             robj 
*o 
= dictGetEntryVal(de
); 
3193             time_t expiretime 
= getExpire(db
,key
); 
3195             /* Save the expire time */ 
3196             if (expiretime 
!= -1) { 
3197                 /* If this key is already expired skip it */ 
3198                 if (expiretime 
< now
) continue; 
3199                 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
; 
3200                 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
; 
3202             /* Save the key and associated value. This requires special 
3203              * handling if the value is swapped out. */ 
3204             if (!server
.vm_enabled 
|| key
->storage 
== REDIS_VM_MEMORY 
|| 
3205                                       key
->storage 
== REDIS_VM_SWAPPING
) { 
3206                 /* Save type, key, value */ 
3207                 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
; 
3208                 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
; 
3209                 if (rdbSaveObject(fp
,o
) == -1) goto werr
; 
3211                 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */ 
3213                 /* Get a preview of the object in memory */ 
3214                 po 
= vmPreviewObject(key
); 
3215                 /* Save type, key, value */ 
3216                 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
; 
3217                 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
; 
3218                 if (rdbSaveObject(fp
,po
) == -1) goto werr
; 
3219                 /* Remove the loaded object from memory */ 
3223         dictReleaseIterator(di
); 
3226     if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
; 
3228     /* Make sure data will not remain on the OS's output buffers */ 
3233     /* Use RENAME to make sure the DB file is changed atomically only 
3234      * if the generate DB file is ok. */ 
3235     if (rename(tmpfile
,filename
) == -1) { 
3236         redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
)); 
3240     redisLog(REDIS_NOTICE
,"DB saved on disk"); 
3242     server
.lastsave 
= time(NULL
); 
3248     redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
)); 
3249     if (di
) dictReleaseIterator(di
); 
3253 static int rdbSaveBackground(char *filename
) { 
3256     if (server
.bgsavechildpid 
!= -1) return REDIS_ERR
; 
3257     if (server
.vm_enabled
) waitEmptyIOJobsQueue(); 
3258     if ((childpid 
= fork()) == 0) { 
3260         if (server
.vm_enabled
) vmReopenSwapFile(); 
3262         if (rdbSave(filename
) == REDIS_OK
) { 
3269         if (childpid 
== -1) { 
3270             redisLog(REDIS_WARNING
,"Can't save in background: fork: %s", 
3274         redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
); 
3275         server
.bgsavechildpid 
= childpid
; 
3278     return REDIS_OK
; /* unreached */ 
3281 static void rdbRemoveTempFile(pid_t childpid
) { 
3284     snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
); 
3288 static int rdbLoadType(FILE *fp
) { 
3290     if (fread(&type
,1,1,fp
) == 0) return -1; 
3294 static time_t rdbLoadTime(FILE *fp
) { 
3296     if (fread(&t32
,4,1,fp
) == 0) return -1; 
3297     return (time_t) t32
; 
3300 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top 
3301  * of this file for a description of how this are stored on disk. 
3303  * isencoded is set to 1 if the readed length is not actually a length but 
3304  * an "encoding type", check the above comments for more info */ 
3305 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) { 
3306     unsigned char buf
[2]; 
3310     if (isencoded
) *isencoded 
= 0; 
3311     if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
; 
3312     type 
= (buf
[0]&0xC0)>>6; 
3313     if (type 
== REDIS_RDB_6BITLEN
) { 
3314         /* Read a 6 bit len */ 
3316     } else if (type 
== REDIS_RDB_ENCVAL
) { 
3317         /* Read a 6 bit len encoding type */ 
3318         if (isencoded
) *isencoded 
= 1; 
3320     } else if (type 
== REDIS_RDB_14BITLEN
) { 
3321         /* Read a 14 bit len */ 
3322         if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
; 
3323         return ((buf
[0]&0x3F)<<8)|buf
[1]; 
3325         /* Read a 32 bit len */ 
3326         if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
; 
3331 static robj 
*rdbLoadIntegerObject(FILE *fp
, int enctype
) { 
3332     unsigned char enc
[4]; 
3335     if (enctype 
== REDIS_RDB_ENC_INT8
) { 
3336         if (fread(enc
,1,1,fp
) == 0) return NULL
; 
3337         val 
= (signed char)enc
[0]; 
3338     } else if (enctype 
== REDIS_RDB_ENC_INT16
) { 
3340         if (fread(enc
,2,1,fp
) == 0) return NULL
; 
3341         v 
= enc
[0]|(enc
[1]<<8); 
3343     } else if (enctype 
== REDIS_RDB_ENC_INT32
) { 
3345         if (fread(enc
,4,1,fp
) == 0) return NULL
; 
3346         v 
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24); 
3349         val 
= 0; /* anti-warning */ 
3352     return createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",val
)); 
3355 static robj 
*rdbLoadLzfStringObject(FILE*fp
) { 
3356     unsigned int len
, clen
; 
3357     unsigned char *c 
= NULL
; 
3360     if ((clen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
3361     if ((len 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
3362     if ((c 
= zmalloc(clen
)) == NULL
) goto err
; 
3363     if ((val 
= sdsnewlen(NULL
,len
)) == NULL
) goto err
; 
3364     if (fread(c
,clen
,1,fp
) == 0) goto err
; 
3365     if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
; 
3367     return createObject(REDIS_STRING
,val
); 
3374 static robj 
*rdbLoadStringObject(FILE*fp
) { 
3379     len 
= rdbLoadLen(fp
,&isencoded
); 
3382         case REDIS_RDB_ENC_INT8
: 
3383         case REDIS_RDB_ENC_INT16
: 
3384         case REDIS_RDB_ENC_INT32
: 
3385             return tryObjectSharing(rdbLoadIntegerObject(fp
,len
)); 
3386         case REDIS_RDB_ENC_LZF
: 
3387             return tryObjectSharing(rdbLoadLzfStringObject(fp
)); 
3393     if (len 
== REDIS_RDB_LENERR
) return NULL
; 
3394     val 
= sdsnewlen(NULL
,len
); 
3395     if (len 
&& fread(val
,len
,1,fp
) == 0) { 
3399     return tryObjectSharing(createObject(REDIS_STRING
,val
)); 
3402 /* For information about double serialization check rdbSaveDoubleValue() */ 
3403 static int rdbLoadDoubleValue(FILE *fp
, double *val
) { 
3407     if (fread(&len
,1,1,fp
) == 0) return -1; 
3409     case 255: *val 
= R_NegInf
; return 0; 
3410     case 254: *val 
= R_PosInf
; return 0; 
3411     case 253: *val 
= R_Nan
; return 0; 
3413         if (fread(buf
,len
,1,fp
) == 0) return -1; 
3415         sscanf(buf
, "%lg", val
); 
3420 /* Load a Redis object of the specified type from the specified file. 
3421  * On success a newly allocated object is returned, otherwise NULL. */ 
3422 static robj 
*rdbLoadObject(int type
, FILE *fp
) { 
3425     if (type 
== REDIS_STRING
) { 
3426         /* Read string value */ 
3427         if ((o 
= rdbLoadStringObject(fp
)) == NULL
) return NULL
; 
3428         tryObjectEncoding(o
); 
3429     } else if (type 
== REDIS_LIST 
|| type 
== REDIS_SET
) { 
3430         /* Read list/set value */ 
3433         if ((listlen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
3434         o 
= (type 
== REDIS_LIST
) ? createListObject() : createSetObject(); 
3435         /* It's faster to expand the dict to the right size asap in order 
3436          * to avoid rehashing */ 
3437         if (type 
== REDIS_SET 
&& listlen 
> DICT_HT_INITIAL_SIZE
) 
3438             dictExpand(o
->ptr
,listlen
); 
3439         /* Load every single element of the list/set */ 
3443             if ((ele 
= rdbLoadStringObject(fp
)) == NULL
) return NULL
; 
3444             tryObjectEncoding(ele
); 
3445             if (type 
== REDIS_LIST
) { 
3446                 listAddNodeTail((list
*)o
->ptr
,ele
); 
3448                 dictAdd((dict
*)o
->ptr
,ele
,NULL
); 
3451     } else if (type 
== REDIS_ZSET
) { 
3452         /* Read list/set value */ 
3456         if ((zsetlen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
3457         o 
= createZsetObject(); 
3459         /* Load every single element of the list/set */ 
3462             double *score 
= zmalloc(sizeof(double)); 
3464             if ((ele 
= rdbLoadStringObject(fp
)) == NULL
) return NULL
; 
3465             tryObjectEncoding(ele
); 
3466             if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
; 
3467             dictAdd(zs
->dict
,ele
,score
); 
3468             zslInsert(zs
->zsl
,*score
,ele
); 
3469             incrRefCount(ele
); /* added to skiplist */ 
3472         redisAssert(0 != 0); 
3477 static int rdbLoad(char *filename
) { 
3479     robj 
*keyobj 
= NULL
; 
3481     int type
, retval
, rdbver
; 
3482     dict 
*d 
= server
.db
[0].dict
; 
3483     redisDb 
*db 
= server
.db
+0; 
3485     time_t expiretime 
= -1, now 
= time(NULL
); 
3486     long long loadedkeys 
= 0; 
3488     fp 
= fopen(filename
,"r"); 
3489     if (!fp
) return REDIS_ERR
; 
3490     if (fread(buf
,9,1,fp
) == 0) goto eoferr
; 
3492     if (memcmp(buf
,"REDIS",5) != 0) { 
3494         redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file"); 
3497     rdbver 
= atoi(buf
+5); 
3500         redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
); 
3507         if ((type 
= rdbLoadType(fp
)) == -1) goto eoferr
; 
3508         if (type 
== REDIS_EXPIRETIME
) { 
3509             if ((expiretime 
= rdbLoadTime(fp
)) == -1) goto eoferr
; 
3510             /* We read the time so we need to read the object type again */ 
3511             if ((type 
= rdbLoadType(fp
)) == -1) goto eoferr
; 
3513         if (type 
== REDIS_EOF
) break; 
3514         /* Handle SELECT DB opcode as a special case */ 
3515         if (type 
== REDIS_SELECTDB
) { 
3516             if ((dbid 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) 
3518             if (dbid 
>= (unsigned)server
.dbnum
) { 
3519                 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
); 
3522             db 
= server
.db
+dbid
; 
3527         if ((keyobj 
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
; 
3529         if ((o 
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
; 
3530         /* Add the new object in the hash table */ 
3531         retval 
= dictAdd(d
,keyobj
,o
); 
3532         if (retval 
== DICT_ERR
) { 
3533             redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj
->ptr
); 
3536         /* Set the expire time if needed */ 
3537         if (expiretime 
!= -1) { 
3538             setExpire(db
,keyobj
,expiretime
); 
3539             /* Delete this key if already expired */ 
3540             if (expiretime 
< now
) deleteKey(db
,keyobj
); 
3544         /* Handle swapping while loading big datasets when VM is on */ 
3546         if (server
.vm_enabled 
&& (loadedkeys 
% 5000) == 0) { 
3547             while (zmalloc_used_memory() > server
.vm_max_memory
) { 
3548                 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break; 
3555 eoferr
: /* unexpected end of file is handled here with a fatal exit */ 
3556     if (keyobj
) decrRefCount(keyobj
); 
3557     redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now."); 
3559     return REDIS_ERR
; /* Just to avoid warning */ 
3562 /*================================== Commands =============================== */ 
3564 static void authCommand(redisClient 
*c
) { 
3565     if (!server
.requirepass 
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) { 
3566       c
->authenticated 
= 1; 
3567       addReply(c
,shared
.ok
); 
3569       c
->authenticated 
= 0; 
3570       addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n")); 
3574 static void pingCommand(redisClient 
*c
) { 
3575     addReply(c
,shared
.pong
); 
3578 static void echoCommand(redisClient 
*c
) { 
3579     addReplyBulkLen(c
,c
->argv
[1]); 
3580     addReply(c
,c
->argv
[1]); 
3581     addReply(c
,shared
.crlf
); 
3584 /*=================================== Strings =============================== */ 
3586 static void setGenericCommand(redisClient 
*c
, int nx
) { 
3589     if (nx
) deleteIfVolatile(c
->db
,c
->argv
[1]); 
3590     retval 
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]); 
3591     if (retval 
== DICT_ERR
) { 
3593             /* If the key is about a swapped value, we want a new key object 
3594              * to overwrite the old. So we delete the old key in the database. 
3595              * This will also make sure that swap pages about the old object 
3596              * will be marked as free. */ 
3597             if (server
.vm_enabled 
&& deleteIfSwapped(c
->db
,c
->argv
[1])) 
3598                 incrRefCount(c
->argv
[1]); 
3599             dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]); 
3600             incrRefCount(c
->argv
[2]); 
3602             addReply(c
,shared
.czero
); 
3606         incrRefCount(c
->argv
[1]); 
3607         incrRefCount(c
->argv
[2]); 
3610     removeExpire(c
->db
,c
->argv
[1]); 
3611     addReply(c
, nx 
? shared
.cone 
: shared
.ok
); 
3614 static void setCommand(redisClient 
*c
) { 
3615     setGenericCommand(c
,0); 
3618 static void setnxCommand(redisClient 
*c
) { 
3619     setGenericCommand(c
,1); 
3622 static int getGenericCommand(redisClient 
*c
) { 
3623     robj 
*o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
3626         addReply(c
,shared
.nullbulk
); 
3629         if (o
->type 
!= REDIS_STRING
) { 
3630             addReply(c
,shared
.wrongtypeerr
); 
3633             addReplyBulkLen(c
,o
); 
3635             addReply(c
,shared
.crlf
); 
3641 static void getCommand(redisClient 
*c
) { 
3642     getGenericCommand(c
); 
3645 static void getsetCommand(redisClient 
*c
) { 
3646     if (getGenericCommand(c
) == REDIS_ERR
) return; 
3647     if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) { 
3648         dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]); 
3650         incrRefCount(c
->argv
[1]); 
3652     incrRefCount(c
->argv
[2]); 
3654     removeExpire(c
->db
,c
->argv
[1]); 
3657 static void mgetCommand(redisClient 
*c
) { 
3660     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1)); 
3661     for (j 
= 1; j 
< c
->argc
; j
++) { 
3662         robj 
*o 
= lookupKeyRead(c
->db
,c
->argv
[j
]); 
3664             addReply(c
,shared
.nullbulk
); 
3666             if (o
->type 
!= REDIS_STRING
) { 
3667                 addReply(c
,shared
.nullbulk
); 
3669                 addReplyBulkLen(c
,o
); 
3671                 addReply(c
,shared
.crlf
); 
3677 static void msetGenericCommand(redisClient 
*c
, int nx
) { 
3678     int j
, busykeys 
= 0; 
3680     if ((c
->argc 
% 2) == 0) { 
3681         addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n")); 
3684     /* Handle the NX flag. The MSETNX semantic is to return zero and don't 
3685      * set nothing at all if at least one already key exists. */ 
3687         for (j 
= 1; j 
< c
->argc
; j 
+= 2) { 
3688             if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) { 
3694         addReply(c
, shared
.czero
); 
3698     for (j 
= 1; j 
< c
->argc
; j 
+= 2) { 
3701         tryObjectEncoding(c
->argv
[j
+1]); 
3702         retval 
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]); 
3703         if (retval 
== DICT_ERR
) { 
3704             dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]); 
3705             incrRefCount(c
->argv
[j
+1]); 
3707             incrRefCount(c
->argv
[j
]); 
3708             incrRefCount(c
->argv
[j
+1]); 
3710         removeExpire(c
->db
,c
->argv
[j
]); 
3712     server
.dirty 
+= (c
->argc
-1)/2; 
3713     addReply(c
, nx 
? shared
.cone 
: shared
.ok
); 
3716 static void msetCommand(redisClient 
*c
) { 
3717     msetGenericCommand(c
,0); 
3720 static void msetnxCommand(redisClient 
*c
) { 
3721     msetGenericCommand(c
,1); 
3724 static void incrDecrCommand(redisClient 
*c
, long long incr
) { 
3729     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
3733         if (o
->type 
!= REDIS_STRING
) { 
3738             if (o
->encoding 
== REDIS_ENCODING_RAW
) 
3739                 value 
= strtoll(o
->ptr
, &eptr
, 10); 
3740             else if (o
->encoding 
== REDIS_ENCODING_INT
) 
3741                 value 
= (long)o
->ptr
; 
3743                 redisAssert(1 != 1); 
3748     o 
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
)); 
3749     tryObjectEncoding(o
); 
3750     retval 
= dictAdd(c
->db
->dict
,c
->argv
[1],o
); 
3751     if (retval 
== DICT_ERR
) { 
3752         dictReplace(c
->db
->dict
,c
->argv
[1],o
); 
3753         removeExpire(c
->db
,c
->argv
[1]); 
3755         incrRefCount(c
->argv
[1]); 
3758     addReply(c
,shared
.colon
); 
3760     addReply(c
,shared
.crlf
); 
3763 static void incrCommand(redisClient 
*c
) { 
3764     incrDecrCommand(c
,1); 
3767 static void decrCommand(redisClient 
*c
) { 
3768     incrDecrCommand(c
,-1); 
3771 static void incrbyCommand(redisClient 
*c
) { 
3772     long long incr 
= strtoll(c
->argv
[2]->ptr
, NULL
, 10); 
3773     incrDecrCommand(c
,incr
); 
3776 static void decrbyCommand(redisClient 
*c
) { 
3777     long long incr 
= strtoll(c
->argv
[2]->ptr
, NULL
, 10); 
3778     incrDecrCommand(c
,-incr
); 
3781 static void appendCommand(redisClient 
*c
) { 
3786     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
3788         /* Create the key */ 
3789         retval 
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]); 
3790         incrRefCount(c
->argv
[1]); 
3791         incrRefCount(c
->argv
[2]); 
3792         totlen 
= stringObjectLen(c
->argv
[2]); 
3796         de 
= dictFind(c
->db
->dict
,c
->argv
[1]); 
3799         o 
= dictGetEntryVal(de
); 
3800         if (o
->type 
!= REDIS_STRING
) { 
3801             addReply(c
,shared
.wrongtypeerr
); 
3804         /* If the object is specially encoded or shared we have to make 
3806         if (o
->refcount 
!= 1 || o
->encoding 
!= REDIS_ENCODING_RAW
) { 
3807             robj 
*decoded 
= getDecodedObject(o
); 
3809             o 
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
)); 
3810             decrRefCount(decoded
); 
3811             dictReplace(c
->db
->dict
,c
->argv
[1],o
); 
3814         if (c
->argv
[2]->encoding 
== REDIS_ENCODING_RAW
) { 
3815             o
->ptr 
= sdscatlen(o
->ptr
, 
3816                 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
)); 
3818             o
->ptr 
= sdscatprintf(o
->ptr
, "%ld", 
3819                 (unsigned long) c
->argv
[2]->ptr
); 
3821         totlen 
= sdslen(o
->ptr
); 
3824     addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
)); 
3827 static void substrCommand(redisClient 
*c
) { 
3829     long start 
= atoi(c
->argv
[2]->ptr
); 
3830     long end 
= atoi(c
->argv
[3]->ptr
); 
3832     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
3834         addReply(c
,shared
.nullbulk
); 
3836         if (o
->type 
!= REDIS_STRING
) { 
3837             addReply(c
,shared
.wrongtypeerr
); 
3839             size_t rangelen
, strlen
; 
3842             o 
= getDecodedObject(o
); 
3843             strlen 
= sdslen(o
->ptr
); 
3845             /* convert negative indexes */ 
3846             if (start 
< 0) start 
= strlen
+start
; 
3847             if (end 
< 0) end 
= strlen
+end
; 
3848             if (start 
< 0) start 
= 0; 
3849             if (end 
< 0) end 
= 0; 
3851             /* indexes sanity checks */ 
3852             if (start 
> end 
|| (size_t)start 
>= strlen
) { 
3853                 /* Out of range start or start > end result in null reply */ 
3854                 addReply(c
,shared
.nullbulk
); 
3858             if ((size_t)end 
>= strlen
) end 
= strlen
-1; 
3859             rangelen 
= (end
-start
)+1; 
3861             /* Return the result */ 
3862             addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",rangelen
)); 
3863             range 
= sdsnewlen((char*)o
->ptr
+start
,rangelen
); 
3864             addReplySds(c
,range
); 
3865             addReply(c
,shared
.crlf
); 
3871 /* ========================= Type agnostic commands ========================= */ 
3873 static void delCommand(redisClient 
*c
) { 
3876     for (j 
= 1; j 
< c
->argc
; j
++) { 
3877         if (deleteKey(c
->db
,c
->argv
[j
])) { 
3884         addReply(c
,shared
.czero
); 
3887         addReply(c
,shared
.cone
); 
3890         addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",deleted
)); 
3895 static void existsCommand(redisClient 
*c
) { 
3896     addReply(c
,lookupKeyRead(c
->db
,c
->argv
[1]) ? shared
.cone 
: shared
.czero
); 
3899 static void selectCommand(redisClient 
*c
) { 
3900     int id 
= atoi(c
->argv
[1]->ptr
); 
3902     if (selectDb(c
,id
) == REDIS_ERR
) { 
3903         addReplySds(c
,sdsnew("-ERR invalid DB index\r\n")); 
3905         addReply(c
,shared
.ok
); 
3909 static void randomkeyCommand(redisClient 
*c
) { 
3913         de 
= dictGetRandomKey(c
->db
->dict
); 
3914         if (!de 
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break; 
3917         addReply(c
,shared
.plus
); 
3918         addReply(c
,shared
.crlf
); 
3920         addReply(c
,shared
.plus
); 
3921         addReply(c
,dictGetEntryKey(de
)); 
3922         addReply(c
,shared
.crlf
); 
3926 static void keysCommand(redisClient 
*c
) { 
3929     sds pattern 
= c
->argv
[1]->ptr
; 
3930     int plen 
= sdslen(pattern
); 
3931     unsigned long numkeys 
= 0; 
3932     robj 
*lenobj 
= createObject(REDIS_STRING
,NULL
); 
3934     di 
= dictGetIterator(c
->db
->dict
); 
3936     decrRefCount(lenobj
); 
3937     while((de 
= dictNext(di
)) != NULL
) { 
3938         robj 
*keyobj 
= dictGetEntryKey(de
); 
3940         sds key 
= keyobj
->ptr
; 
3941         if ((pattern
[0] == '*' && pattern
[1] == '\0') || 
3942             stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) { 
3943             if (expireIfNeeded(c
->db
,keyobj
) == 0) { 
3944                 addReplyBulkLen(c
,keyobj
); 
3946                 addReply(c
,shared
.crlf
); 
3951     dictReleaseIterator(di
); 
3952     lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
); 
3955 static void dbsizeCommand(redisClient 
*c
) { 
3957         sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
))); 
3960 static void lastsaveCommand(redisClient 
*c
) { 
3962         sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
)); 
3965 static void typeCommand(redisClient 
*c
) { 
3969     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
3974         case REDIS_STRING
: type 
= "+string"; break; 
3975         case REDIS_LIST
: type 
= "+list"; break; 
3976         case REDIS_SET
: type 
= "+set"; break; 
3977         case REDIS_ZSET
: type 
= "+zset"; break; 
3978         default: type 
= "unknown"; break; 
3981     addReplySds(c
,sdsnew(type
)); 
3982     addReply(c
,shared
.crlf
); 
3985 static void saveCommand(redisClient 
*c
) { 
3986     if (server
.bgsavechildpid 
!= -1) { 
3987         addReplySds(c
,sdsnew("-ERR background save in progress\r\n")); 
3990     if (rdbSave(server
.dbfilename
) == REDIS_OK
) { 
3991         addReply(c
,shared
.ok
); 
3993         addReply(c
,shared
.err
); 
3997 static void bgsaveCommand(redisClient 
*c
) { 
3998     if (server
.bgsavechildpid 
!= -1) { 
3999         addReplySds(c
,sdsnew("-ERR background save already in progress\r\n")); 
4002     if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) { 
4003         char *status 
= "+Background saving started\r\n"; 
4004         addReplySds(c
,sdsnew(status
)); 
4006         addReply(c
,shared
.err
); 
4010 static void shutdownCommand(redisClient 
*c
) { 
4011     redisLog(REDIS_WARNING
,"User requested shutdown, saving DB..."); 
4012     /* Kill the saving child if there is a background saving in progress. 
4013        We want to avoid race conditions, for instance our saving child may 
4014        overwrite the synchronous saving did by SHUTDOWN. */ 
4015     if (server
.bgsavechildpid 
!= -1) { 
4016         redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!"); 
4017         kill(server
.bgsavechildpid
,SIGKILL
); 
4018         rdbRemoveTempFile(server
.bgsavechildpid
); 
4020     if (server
.appendonly
) { 
4021         /* Append only file: fsync() the AOF and exit */ 
4022         fsync(server
.appendfd
); 
4023         if (server
.vm_enabled
) unlink(server
.vm_swap_file
); 
4026         /* Snapshotting. Perform a SYNC SAVE and exit */ 
4027         if (rdbSave(server
.dbfilename
) == REDIS_OK
) { 
4028             if (server
.daemonize
) 
4029                 unlink(server
.pidfile
); 
4030             redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory()); 
4031             redisLog(REDIS_WARNING
,"Server exit now, bye bye..."); 
4032             if (server
.vm_enabled
) unlink(server
.vm_swap_file
); 
4035             /* Ooops.. error saving! The best we can do is to continue operating. 
4036              * Note that if there was a background saving process, in the next 
4037              * cron() Redis will be notified that the background saving aborted, 
4038              * handling special stuff like slaves pending for synchronization... */ 
4039             redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");  
4040             addReplySds(c
,sdsnew("-ERR can't quit, problems saving the DB\r\n")); 
4045 static void renameGenericCommand(redisClient 
*c
, int nx
) { 
4048     /* To use the same key as src and dst is probably an error */ 
4049     if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) { 
4050         addReply(c
,shared
.sameobjecterr
); 
4054     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4056         addReply(c
,shared
.nokeyerr
); 
4060     deleteIfVolatile(c
->db
,c
->argv
[2]); 
4061     if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) { 
4064             addReply(c
,shared
.czero
); 
4067         dictReplace(c
->db
->dict
,c
->argv
[2],o
); 
4069         incrRefCount(c
->argv
[2]); 
4071     deleteKey(c
->db
,c
->argv
[1]); 
4073     addReply(c
,nx 
? shared
.cone 
: shared
.ok
); 
4076 static void renameCommand(redisClient 
*c
) { 
4077     renameGenericCommand(c
,0); 
4080 static void renamenxCommand(redisClient 
*c
) { 
4081     renameGenericCommand(c
,1); 
4084 static void moveCommand(redisClient 
*c
) { 
4089     /* Obtain source and target DB pointers */ 
4092     if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) { 
4093         addReply(c
,shared
.outofrangeerr
); 
4097     selectDb(c
,srcid
); /* Back to the source DB */ 
4099     /* If the user is moving using as target the same 
4100      * DB as the source DB it is probably an error. */ 
4102         addReply(c
,shared
.sameobjecterr
); 
4106     /* Check if the element exists and get a reference */ 
4107     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4109         addReply(c
,shared
.czero
); 
4113     /* Try to add the element to the target DB */ 
4114     deleteIfVolatile(dst
,c
->argv
[1]); 
4115     if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) { 
4116         addReply(c
,shared
.czero
); 
4119     incrRefCount(c
->argv
[1]); 
4122     /* OK! key moved, free the entry in the source DB */ 
4123     deleteKey(src
,c
->argv
[1]); 
4125     addReply(c
,shared
.cone
); 
4128 /* =================================== Lists ================================ */ 
4129 static void pushGenericCommand(redisClient 
*c
, int where
) { 
4133     lobj 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4135         if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) { 
4136             addReply(c
,shared
.cone
); 
4139         lobj 
= createListObject(); 
4141         if (where 
== REDIS_HEAD
) { 
4142             listAddNodeHead(list
,c
->argv
[2]); 
4144             listAddNodeTail(list
,c
->argv
[2]); 
4146         dictAdd(c
->db
->dict
,c
->argv
[1],lobj
); 
4147         incrRefCount(c
->argv
[1]); 
4148         incrRefCount(c
->argv
[2]); 
4150         if (lobj
->type 
!= REDIS_LIST
) { 
4151             addReply(c
,shared
.wrongtypeerr
); 
4154         if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) { 
4155             addReply(c
,shared
.cone
); 
4159         if (where 
== REDIS_HEAD
) { 
4160             listAddNodeHead(list
,c
->argv
[2]); 
4162             listAddNodeTail(list
,c
->argv
[2]); 
4164         incrRefCount(c
->argv
[2]); 
4167     addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",listLength(list
))); 
4170 static void lpushCommand(redisClient 
*c
) { 
4171     pushGenericCommand(c
,REDIS_HEAD
); 
4174 static void rpushCommand(redisClient 
*c
) { 
4175     pushGenericCommand(c
,REDIS_TAIL
); 
4178 static void llenCommand(redisClient 
*c
) { 
4182     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
4184         addReply(c
,shared
.czero
); 
4187         if (o
->type 
!= REDIS_LIST
) { 
4188             addReply(c
,shared
.wrongtypeerr
); 
4191             addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",listLength(l
))); 
4196 static void lindexCommand(redisClient 
*c
) { 
4198     int index 
= atoi(c
->argv
[2]->ptr
); 
4200     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
4202         addReply(c
,shared
.nullbulk
); 
4204         if (o
->type 
!= REDIS_LIST
) { 
4205             addReply(c
,shared
.wrongtypeerr
); 
4207             list 
*list 
= o
->ptr
; 
4210             ln 
= listIndex(list
, index
); 
4212                 addReply(c
,shared
.nullbulk
); 
4214                 robj 
*ele 
= listNodeValue(ln
); 
4215                 addReplyBulkLen(c
,ele
); 
4217                 addReply(c
,shared
.crlf
); 
4223 static void lsetCommand(redisClient 
*c
) { 
4225     int index 
= atoi(c
->argv
[2]->ptr
); 
4227     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4229         addReply(c
,shared
.nokeyerr
); 
4231         if (o
->type 
!= REDIS_LIST
) { 
4232             addReply(c
,shared
.wrongtypeerr
); 
4234             list 
*list 
= o
->ptr
; 
4237             ln 
= listIndex(list
, index
); 
4239                 addReply(c
,shared
.outofrangeerr
); 
4241                 robj 
*ele 
= listNodeValue(ln
); 
4244                 listNodeValue(ln
) = c
->argv
[3]; 
4245                 incrRefCount(c
->argv
[3]); 
4246                 addReply(c
,shared
.ok
); 
4253 static void popGenericCommand(redisClient 
*c
, int where
) { 
4256     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4258         addReply(c
,shared
.nullbulk
); 
4260         if (o
->type 
!= REDIS_LIST
) { 
4261             addReply(c
,shared
.wrongtypeerr
); 
4263             list 
*list 
= o
->ptr
; 
4266             if (where 
== REDIS_HEAD
) 
4267                 ln 
= listFirst(list
); 
4269                 ln 
= listLast(list
); 
4272                 addReply(c
,shared
.nullbulk
); 
4274                 robj 
*ele 
= listNodeValue(ln
); 
4275                 addReplyBulkLen(c
,ele
); 
4277                 addReply(c
,shared
.crlf
); 
4278                 listDelNode(list
,ln
); 
4285 static void lpopCommand(redisClient 
*c
) { 
4286     popGenericCommand(c
,REDIS_HEAD
); 
4289 static void rpopCommand(redisClient 
*c
) { 
4290     popGenericCommand(c
,REDIS_TAIL
); 
4293 static void lrangeCommand(redisClient 
*c
) { 
4295     int start 
= atoi(c
->argv
[2]->ptr
); 
4296     int end 
= atoi(c
->argv
[3]->ptr
); 
4298     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
4300         addReply(c
,shared
.nullmultibulk
); 
4302         if (o
->type 
!= REDIS_LIST
) { 
4303             addReply(c
,shared
.wrongtypeerr
); 
4305             list 
*list 
= o
->ptr
; 
4307             int llen 
= listLength(list
); 
4311             /* convert negative indexes */ 
4312             if (start 
< 0) start 
= llen
+start
; 
4313             if (end 
< 0) end 
= llen
+end
; 
4314             if (start 
< 0) start 
= 0; 
4315             if (end 
< 0) end 
= 0; 
4317             /* indexes sanity checks */ 
4318             if (start 
> end 
|| start 
>= llen
) { 
4319                 /* Out of range start or start > end result in empty list */ 
4320                 addReply(c
,shared
.emptymultibulk
); 
4323             if (end 
>= llen
) end 
= llen
-1; 
4324             rangelen 
= (end
-start
)+1; 
4326             /* Return the result in form of a multi-bulk reply */ 
4327             ln 
= listIndex(list
, start
); 
4328             addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
)); 
4329             for (j 
= 0; j 
< rangelen
; j
++) { 
4330                 ele 
= listNodeValue(ln
); 
4331                 addReplyBulkLen(c
,ele
); 
4333                 addReply(c
,shared
.crlf
); 
4340 static void ltrimCommand(redisClient 
*c
) { 
4342     int start 
= atoi(c
->argv
[2]->ptr
); 
4343     int end 
= atoi(c
->argv
[3]->ptr
); 
4345     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4347         addReply(c
,shared
.ok
); 
4349         if (o
->type 
!= REDIS_LIST
) { 
4350             addReply(c
,shared
.wrongtypeerr
); 
4352             list 
*list 
= o
->ptr
; 
4354             int llen 
= listLength(list
); 
4355             int j
, ltrim
, rtrim
; 
4357             /* convert negative indexes */ 
4358             if (start 
< 0) start 
= llen
+start
; 
4359             if (end 
< 0) end 
= llen
+end
; 
4360             if (start 
< 0) start 
= 0; 
4361             if (end 
< 0) end 
= 0; 
4363             /* indexes sanity checks */ 
4364             if (start 
> end 
|| start 
>= llen
) { 
4365                 /* Out of range start or start > end result in empty list */ 
4369                 if (end 
>= llen
) end 
= llen
-1; 
4374             /* Remove list elements to perform the trim */ 
4375             for (j 
= 0; j 
< ltrim
; j
++) { 
4376                 ln 
= listFirst(list
); 
4377                 listDelNode(list
,ln
); 
4379             for (j 
= 0; j 
< rtrim
; j
++) { 
4380                 ln 
= listLast(list
); 
4381                 listDelNode(list
,ln
); 
4384             addReply(c
,shared
.ok
); 
4389 static void lremCommand(redisClient 
*c
) { 
4392     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4394         addReply(c
,shared
.czero
); 
4396         if (o
->type 
!= REDIS_LIST
) { 
4397             addReply(c
,shared
.wrongtypeerr
); 
4399             list 
*list 
= o
->ptr
; 
4400             listNode 
*ln
, *next
; 
4401             int toremove 
= atoi(c
->argv
[2]->ptr
); 
4406                 toremove 
= -toremove
; 
4409             ln 
= fromtail 
? list
->tail 
: list
->head
; 
4411                 robj 
*ele 
= listNodeValue(ln
); 
4413                 next 
= fromtail 
? ln
->prev 
: ln
->next
; 
4414                 if (compareStringObjects(ele
,c
->argv
[3]) == 0) { 
4415                     listDelNode(list
,ln
); 
4418                     if (toremove 
&& removed 
== toremove
) break; 
4422             addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
)); 
4427 /* This is the semantic of this command: 
4428  *  RPOPLPUSH srclist dstlist: 
4429  *   IF LLEN(srclist) > 0 
4430  *     element = RPOP srclist 
4431  *     LPUSH dstlist element 
4438  * The idea is to be able to get an element from a list in a reliable way 
4439  * since the element is not just returned but pushed against another list 
4440  * as well. This command was originally proposed by Ezra Zygmuntowicz. 
4442 static void rpoplpushcommand(redisClient 
*c
) { 
4445     sobj 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4447         addReply(c
,shared
.nullbulk
); 
4449         if (sobj
->type 
!= REDIS_LIST
) { 
4450             addReply(c
,shared
.wrongtypeerr
); 
4452             list 
*srclist 
= sobj
->ptr
; 
4453             listNode 
*ln 
= listLast(srclist
); 
4456                 addReply(c
,shared
.nullbulk
); 
4458                 robj 
*dobj 
= lookupKeyWrite(c
->db
,c
->argv
[2]); 
4459                 robj 
*ele 
= listNodeValue(ln
); 
4462                 if (dobj 
&& dobj
->type 
!= REDIS_LIST
) { 
4463                     addReply(c
,shared
.wrongtypeerr
); 
4467                 /* Add the element to the target list (unless it's directly 
4468                  * passed to some BLPOP-ing client */ 
4469                 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) { 
4471                         /* Create the list if the key does not exist */ 
4472                         dobj 
= createListObject(); 
4473                         dictAdd(c
->db
->dict
,c
->argv
[2],dobj
); 
4474                         incrRefCount(c
->argv
[2]); 
4476                     dstlist 
= dobj
->ptr
; 
4477                     listAddNodeHead(dstlist
,ele
); 
4481                 /* Send the element to the client as reply as well */ 
4482                 addReplyBulkLen(c
,ele
); 
4484                 addReply(c
,shared
.crlf
); 
4486                 /* Finally remove the element from the source list */ 
4487                 listDelNode(srclist
,ln
); 
4495 /* ==================================== Sets ================================ */ 
4497 static void saddCommand(redisClient 
*c
) { 
4500     set 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4502         set 
= createSetObject(); 
4503         dictAdd(c
->db
->dict
,c
->argv
[1],set
); 
4504         incrRefCount(c
->argv
[1]); 
4506         if (set
->type 
!= REDIS_SET
) { 
4507             addReply(c
,shared
.wrongtypeerr
); 
4511     if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) { 
4512         incrRefCount(c
->argv
[2]); 
4514         addReply(c
,shared
.cone
); 
4516         addReply(c
,shared
.czero
); 
4520 static void sremCommand(redisClient 
*c
) { 
4523     set 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4525         addReply(c
,shared
.czero
); 
4527         if (set
->type 
!= REDIS_SET
) { 
4528             addReply(c
,shared
.wrongtypeerr
); 
4531         if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) { 
4533             if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
); 
4534             addReply(c
,shared
.cone
); 
4536             addReply(c
,shared
.czero
); 
4541 static void smoveCommand(redisClient 
*c
) { 
4542     robj 
*srcset
, *dstset
; 
4544     srcset 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4545     dstset 
= lookupKeyWrite(c
->db
,c
->argv
[2]); 
4547     /* If the source key does not exist return 0, if it's of the wrong type 
4549     if (srcset 
== NULL 
|| srcset
->type 
!= REDIS_SET
) { 
4550         addReply(c
, srcset 
? shared
.wrongtypeerr 
: shared
.czero
); 
4553     /* Error if the destination key is not a set as well */ 
4554     if (dstset 
&& dstset
->type 
!= REDIS_SET
) { 
4555         addReply(c
,shared
.wrongtypeerr
); 
4558     /* Remove the element from the source set */ 
4559     if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) { 
4560         /* Key not found in the src set! return zero */ 
4561         addReply(c
,shared
.czero
); 
4565     /* Add the element to the destination set */ 
4567         dstset 
= createSetObject(); 
4568         dictAdd(c
->db
->dict
,c
->argv
[2],dstset
); 
4569         incrRefCount(c
->argv
[2]); 
4571     if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
) 
4572         incrRefCount(c
->argv
[3]); 
4573     addReply(c
,shared
.cone
); 
4576 static void sismemberCommand(redisClient 
*c
) { 
4579     set 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
4581         addReply(c
,shared
.czero
); 
4583         if (set
->type 
!= REDIS_SET
) { 
4584             addReply(c
,shared
.wrongtypeerr
); 
4587         if (dictFind(set
->ptr
,c
->argv
[2])) 
4588             addReply(c
,shared
.cone
); 
4590             addReply(c
,shared
.czero
); 
4594 static void scardCommand(redisClient 
*c
) { 
4598     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
4600         addReply(c
,shared
.czero
); 
4603         if (o
->type 
!= REDIS_SET
) { 
4604             addReply(c
,shared
.wrongtypeerr
); 
4607             addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n", 
4613 static void spopCommand(redisClient 
*c
) { 
4617     set 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4619         addReply(c
,shared
.nullbulk
); 
4621         if (set
->type 
!= REDIS_SET
) { 
4622             addReply(c
,shared
.wrongtypeerr
); 
4625         de 
= dictGetRandomKey(set
->ptr
); 
4627             addReply(c
,shared
.nullbulk
); 
4629             robj 
*ele 
= dictGetEntryKey(de
); 
4631             addReplyBulkLen(c
,ele
); 
4633             addReply(c
,shared
.crlf
); 
4634             dictDelete(set
->ptr
,ele
); 
4635             if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
); 
4641 static void srandmemberCommand(redisClient 
*c
) { 
4645     set 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
4647         addReply(c
,shared
.nullbulk
); 
4649         if (set
->type 
!= REDIS_SET
) { 
4650             addReply(c
,shared
.wrongtypeerr
); 
4653         de 
= dictGetRandomKey(set
->ptr
); 
4655             addReply(c
,shared
.nullbulk
); 
4657             robj 
*ele 
= dictGetEntryKey(de
); 
4659             addReplyBulkLen(c
,ele
); 
4661             addReply(c
,shared
.crlf
); 
4666 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) { 
4667     dict 
**d1 
= (void*) s1
, **d2 
= (void*) s2
; 
4669     return dictSize(*d1
)-dictSize(*d2
); 
4672 static void sinterGenericCommand(redisClient 
*c
, robj 
**setskeys
, unsigned long setsnum
, robj 
*dstkey
) { 
4673     dict 
**dv 
= zmalloc(sizeof(dict
*)*setsnum
); 
4676     robj 
*lenobj 
= NULL
, *dstset 
= NULL
; 
4677     unsigned long j
, cardinality 
= 0; 
4679     for (j 
= 0; j 
< setsnum
; j
++) { 
4683                     lookupKeyWrite(c
->db
,setskeys
[j
]) : 
4684                     lookupKeyRead(c
->db
,setskeys
[j
]); 
4688                 if (deleteKey(c
->db
,dstkey
)) 
4690                 addReply(c
,shared
.czero
); 
4692                 addReply(c
,shared
.nullmultibulk
); 
4696         if (setobj
->type 
!= REDIS_SET
) { 
4698             addReply(c
,shared
.wrongtypeerr
); 
4701         dv
[j
] = setobj
->ptr
; 
4703     /* Sort sets from the smallest to largest, this will improve our 
4704      * algorithm's performace */ 
4705     qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
); 
4707     /* The first thing we should output is the total number of elements... 
4708      * since this is a multi-bulk write, but at this stage we don't know 
4709      * the intersection set size, so we use a trick, append an empty object 
4710      * to the output list and save the pointer to later modify it with the 
4713         lenobj 
= createObject(REDIS_STRING
,NULL
); 
4715         decrRefCount(lenobj
); 
4717         /* If we have a target key where to store the resulting set 
4718          * create this key with an empty set inside */ 
4719         dstset 
= createSetObject(); 
4722     /* Iterate all the elements of the first (smallest) set, and test 
4723      * the element against all the other sets, if at least one set does 
4724      * not include the element it is discarded */ 
4725     di 
= dictGetIterator(dv
[0]); 
4727     while((de 
= dictNext(di
)) != NULL
) { 
4730         for (j 
= 1; j 
< setsnum
; j
++) 
4731             if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break; 
4733             continue; /* at least one set does not contain the member */ 
4734         ele 
= dictGetEntryKey(de
); 
4736             addReplyBulkLen(c
,ele
); 
4738             addReply(c
,shared
.crlf
); 
4741             dictAdd(dstset
->ptr
,ele
,NULL
); 
4745     dictReleaseIterator(di
); 
4748         /* Store the resulting set into the target */ 
4749         deleteKey(c
->db
,dstkey
); 
4750         dictAdd(c
->db
->dict
,dstkey
,dstset
); 
4751         incrRefCount(dstkey
); 
4755         lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
); 
4757         addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n", 
4758             dictSize((dict
*)dstset
->ptr
))); 
4764 static void sinterCommand(redisClient 
*c
) { 
4765     sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
); 
4768 static void sinterstoreCommand(redisClient 
*c
) { 
4769     sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]); 
4772 #define REDIS_OP_UNION 0 
4773 #define REDIS_OP_DIFF 1 
4774 #define REDIS_OP_INTER 2 
4776 static void sunionDiffGenericCommand(redisClient 
*c
, robj 
**setskeys
, int setsnum
, robj 
*dstkey
, int op
) { 
4777     dict 
**dv 
= zmalloc(sizeof(dict
*)*setsnum
); 
4780     robj 
*dstset 
= NULL
; 
4781     int j
, cardinality 
= 0; 
4783     for (j 
= 0; j 
< setsnum
; j
++) { 
4787                     lookupKeyWrite(c
->db
,setskeys
[j
]) : 
4788                     lookupKeyRead(c
->db
,setskeys
[j
]); 
4793         if (setobj
->type 
!= REDIS_SET
) { 
4795             addReply(c
,shared
.wrongtypeerr
); 
4798         dv
[j
] = setobj
->ptr
; 
4801     /* We need a temp set object to store our union. If the dstkey 
4802      * is not NULL (that is, we are inside an SUNIONSTORE operation) then 
4803      * this set object will be the resulting object to set into the target key*/ 
4804     dstset 
= createSetObject(); 
4806     /* Iterate all the elements of all the sets, add every element a single 
4807      * time to the result set */ 
4808     for (j 
= 0; j 
< setsnum
; j
++) { 
4809         if (op 
== REDIS_OP_DIFF 
&& j 
== 0 && !dv
[j
]) break; /* result set is empty */ 
4810         if (!dv
[j
]) continue; /* non existing keys are like empty sets */ 
4812         di 
= dictGetIterator(dv
[j
]); 
4814         while((de 
= dictNext(di
)) != NULL
) { 
4817             /* dictAdd will not add the same element multiple times */ 
4818             ele 
= dictGetEntryKey(de
); 
4819             if (op 
== REDIS_OP_UNION 
|| j 
== 0) { 
4820                 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) { 
4824             } else if (op 
== REDIS_OP_DIFF
) { 
4825                 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) { 
4830         dictReleaseIterator(di
); 
4832         if (op 
== REDIS_OP_DIFF 
&& cardinality 
== 0) break; /* result set is empty */ 
4835     /* Output the content of the resulting set, if not in STORE mode */ 
4837         addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
)); 
4838         di 
= dictGetIterator(dstset
->ptr
); 
4839         while((de 
= dictNext(di
)) != NULL
) { 
4842             ele 
= dictGetEntryKey(de
); 
4843             addReplyBulkLen(c
,ele
); 
4845             addReply(c
,shared
.crlf
); 
4847         dictReleaseIterator(di
); 
4849         /* If we have a target key where to store the resulting set 
4850          * create this key with the result set inside */ 
4851         deleteKey(c
->db
,dstkey
); 
4852         dictAdd(c
->db
->dict
,dstkey
,dstset
); 
4853         incrRefCount(dstkey
); 
4858         decrRefCount(dstset
); 
4860         addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n", 
4861             dictSize((dict
*)dstset
->ptr
))); 
4867 static void sunionCommand(redisClient 
*c
) { 
4868     sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
); 
4871 static void sunionstoreCommand(redisClient 
*c
) { 
4872     sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
); 
4875 static void sdiffCommand(redisClient 
*c
) { 
4876     sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
); 
4879 static void sdiffstoreCommand(redisClient 
*c
) { 
4880     sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
); 
4883 /* ==================================== ZSets =============================== */ 
4885 /* ZSETs are ordered sets using two data structures to hold the same elements 
4886  * in order to get O(log(N)) INSERT and REMOVE operations into a sorted 
4889  * The elements are added to an hash table mapping Redis objects to scores. 
4890  * At the same time the elements are added to a skip list mapping scores 
4891  * to Redis objects (so objects are sorted by scores in this "view"). */ 
4893 /* This skiplist implementation is almost a C translation of the original 
4894  * algorithm described by William Pugh in "Skip Lists: A Probabilistic 
4895  * Alternative to Balanced Trees", modified in three ways: 
4896  * a) this implementation allows for repeated values. 
4897  * b) the comparison is not just by key (our 'score') but by satellite data. 
4898  * c) there is a back pointer, so it's a doubly linked list with the back 
4899  * pointers being only at "level 1". This allows to traverse the list 
4900  * from tail to head, useful for ZREVRANGE. */ 
4902 static zskiplistNode 
*zslCreateNode(int level
, double score
, robj 
*obj
) { 
4903     zskiplistNode 
*zn 
= zmalloc(sizeof(*zn
)); 
4905     zn
->forward 
= zmalloc(sizeof(zskiplistNode
*) * level
); 
4907         zn
->span 
= zmalloc(sizeof(unsigned int) * (level 
- 1)); 
4913 static zskiplist 
*zslCreate(void) { 
4917     zsl 
= zmalloc(sizeof(*zsl
)); 
4920     zsl
->header 
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
); 
4921     for (j 
= 0; j 
< ZSKIPLIST_MAXLEVEL
; j
++) { 
4922         zsl
->header
->forward
[j
] = NULL
; 
4924         /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */ 
4925         if (j 
< ZSKIPLIST_MAXLEVEL
-1) 
4926             zsl
->header
->span
[j
] = 0; 
4928     zsl
->header
->backward 
= NULL
; 
4933 static void zslFreeNode(zskiplistNode 
*node
) { 
4934     decrRefCount(node
->obj
); 
4935     zfree(node
->forward
); 
4940 static void zslFree(zskiplist 
*zsl
) { 
4941     zskiplistNode 
*node 
= zsl
->header
->forward
[0], *next
; 
4943     zfree(zsl
->header
->forward
); 
4944     zfree(zsl
->header
->span
); 
4947         next 
= node
->forward
[0]; 
4954 static int zslRandomLevel(void) { 
4956     while ((random()&0xFFFF) < (ZSKIPLIST_P 
* 0xFFFF)) 
4961 static void zslInsert(zskiplist 
*zsl
, double score
, robj 
*obj
) { 
4962     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
4963     unsigned int rank
[ZSKIPLIST_MAXLEVEL
]; 
4967     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
4968         /* store rank that is crossed to reach the insert position */ 
4969         rank
[i
] = i 
== (zsl
->level
-1) ? 0 : rank
[i
+1]; 
4971         while (x
->forward
[i
] && 
4972             (x
->forward
[i
]->score 
< score 
|| 
4973                 (x
->forward
[i
]->score 
== score 
&& 
4974                 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) { 
4975             rank
[i
] += i 
> 0 ? x
->span
[i
-1] : 1; 
4980     /* we assume the key is not already inside, since we allow duplicated 
4981      * scores, and the re-insertion of score and redis object should never 
4982      * happpen since the caller of zslInsert() should test in the hash table 
4983      * if the element is already inside or not. */ 
4984     level 
= zslRandomLevel(); 
4985     if (level 
> zsl
->level
) { 
4986         for (i 
= zsl
->level
; i 
< level
; i
++) { 
4988             update
[i
] = zsl
->header
; 
4989             update
[i
]->span
[i
-1] = zsl
->length
; 
4993     x 
= zslCreateNode(level
,score
,obj
); 
4994     for (i 
= 0; i 
< level
; i
++) { 
4995         x
->forward
[i
] = update
[i
]->forward
[i
]; 
4996         update
[i
]->forward
[i
] = x
; 
4998         /* update span covered by update[i] as x is inserted here */ 
5000             x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]); 
5001             update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1; 
5005     /* increment span for untouched levels */ 
5006     for (i 
= level
; i 
< zsl
->level
; i
++) { 
5007         update
[i
]->span
[i
-1]++; 
5010     x
->backward 
= (update
[0] == zsl
->header
) ? NULL 
: update
[0]; 
5012         x
->forward
[0]->backward 
= x
; 
5018 /* Delete an element with matching score/object from the skiplist. */ 
5019 static int zslDelete(zskiplist 
*zsl
, double score
, robj 
*obj
) { 
5020     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
5024     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5025         while (x
->forward
[i
] && 
5026             (x
->forward
[i
]->score 
< score 
|| 
5027                 (x
->forward
[i
]->score 
== score 
&& 
5028                 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) 
5032     /* We may have multiple elements with the same score, what we need 
5033      * is to find the element with both the right score and object. */ 
5035     if (x 
&& score 
== x
->score 
&& compareStringObjects(x
->obj
,obj
) == 0) { 
5036         for (i 
= 0; i 
< zsl
->level
; i
++) { 
5037             if (update
[i
]->forward
[i
] == x
) { 
5039                     update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1; 
5041                 update
[i
]->forward
[i
] = x
->forward
[i
]; 
5043                 /* invariant: i > 0, because update[0]->forward[0] 
5044                  * is always equal to x */ 
5045                 update
[i
]->span
[i
-1] -= 1; 
5048         if (x
->forward
[0]) { 
5049             x
->forward
[0]->backward 
= x
->backward
; 
5051             zsl
->tail 
= x
->backward
; 
5054         while(zsl
->level 
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
) 
5059         return 0; /* not found */ 
5061     return 0; /* not found */ 
5064 /* Delete all the elements with score between min and max from the skiplist. 
5065  * Min and mx are inclusive, so a score >= min || score <= max is deleted. 
5066  * Note that this function takes the reference to the hash table view of the 
5067  * sorted set, in order to remove the elements from the hash table too. */ 
5068 static unsigned long zslDeleteRange(zskiplist 
*zsl
, double min
, double max
, dict 
*dict
) { 
5069     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
5070     unsigned long removed 
= 0; 
5074     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5075         while (x
->forward
[i
] && x
->forward
[i
]->score 
< min
) 
5079     /* We may have multiple elements with the same score, what we need 
5080      * is to find the element with both the right score and object. */ 
5082     while (x 
&& x
->score 
<= max
) { 
5083         zskiplistNode 
*next
; 
5085         for (i 
= 0; i 
< zsl
->level
; i
++) { 
5086             if (update
[i
]->forward
[i
] == x
) { 
5088                     update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1; 
5090                 update
[i
]->forward
[i
] = x
->forward
[i
]; 
5092                 /* invariant: i > 0, because update[0]->forward[0] 
5093                  * is always equal to x */ 
5094                 update
[i
]->span
[i
-1] -= 1; 
5097         if (x
->forward
[0]) { 
5098             x
->forward
[0]->backward 
= x
->backward
; 
5100             zsl
->tail 
= x
->backward
; 
5102         next 
= x
->forward
[0]; 
5103         dictDelete(dict
,x
->obj
); 
5105         while(zsl
->level 
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
) 
5111     return removed
; /* not found */ 
5114 /* Find the first node having a score equal or greater than the specified one. 
5115  * Returns NULL if there is no match. */ 
5116 static zskiplistNode 
*zslFirstWithScore(zskiplist 
*zsl
, double score
) { 
5121     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5122         while (x
->forward
[i
] && x
->forward
[i
]->score 
< score
) 
5125     /* We may have multiple elements with the same score, what we need 
5126      * is to find the element with both the right score and object. */ 
5127     return x
->forward
[0]; 
5130 /* Find the rank for an element by both score and key. 
5131  * Returns 0 when the element cannot be found, rank otherwise. 
5132  * Note that the rank is 1-based due to the span of zsl->header to the 
5134 static unsigned long zslGetRank(zskiplist 
*zsl
, double score
, robj 
*o
) { 
5136     unsigned long rank 
= 0; 
5140     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5141         while (x
->forward
[i
] && 
5142             (x
->forward
[i
]->score 
< score 
|| 
5143                 (x
->forward
[i
]->score 
== score 
&& 
5144                 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) { 
5145             rank 
+= i 
> 0 ? x
->span
[i
-1] : 1; 
5149         /* x might be equal to zsl->header, so test if obj is non-NULL */ 
5150         if (x
->obj 
&& compareStringObjects(x
->obj
,o
) == 0) { 
5157 /* Finds an element by its rank. The rank argument needs to be 1-based. */ 
5158 zskiplistNode
* zslGetElementByRank(zskiplist 
*zsl
, unsigned long rank
) { 
5160     unsigned long traversed 
= 0; 
5164     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5165         while (x
->forward
[i
] && (traversed 
+ (i 
> 0 ? x
->span
[i
-1] : 1)) <= rank
) { 
5166             traversed 
+= i 
> 0 ? x
->span
[i
-1] : 1; 
5170         if (traversed 
== rank
) { 
5177 /* The actual Z-commands implementations */ 
5179 /* This generic command implements both ZADD and ZINCRBY. 
5180  * scoreval is the score if the operation is a ZADD (doincrement == 0) or 
5181  * the increment if the operation is a ZINCRBY (doincrement == 1). */ 
5182 static void zaddGenericCommand(redisClient 
*c
, robj 
*key
, robj 
*ele
, double scoreval
, int doincrement
) { 
5187     zsetobj 
= lookupKeyWrite(c
->db
,key
); 
5188     if (zsetobj 
== NULL
) { 
5189         zsetobj 
= createZsetObject(); 
5190         dictAdd(c
->db
->dict
,key
,zsetobj
); 
5193         if (zsetobj
->type 
!= REDIS_ZSET
) { 
5194             addReply(c
,shared
.wrongtypeerr
); 
5200     /* Ok now since we implement both ZADD and ZINCRBY here the code 
5201      * needs to handle the two different conditions. It's all about setting 
5202      * '*score', that is, the new score to set, to the right value. */ 
5203     score 
= zmalloc(sizeof(double)); 
5207         /* Read the old score. If the element was not present starts from 0 */ 
5208         de 
= dictFind(zs
->dict
,ele
); 
5210             double *oldscore 
= dictGetEntryVal(de
); 
5211             *score 
= *oldscore 
+ scoreval
; 
5219     /* What follows is a simple remove and re-insert operation that is common 
5220      * to both ZADD and ZINCRBY... */ 
5221     if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) { 
5222         /* case 1: New element */ 
5223         incrRefCount(ele
); /* added to hash */ 
5224         zslInsert(zs
->zsl
,*score
,ele
); 
5225         incrRefCount(ele
); /* added to skiplist */ 
5228             addReplyDouble(c
,*score
); 
5230             addReply(c
,shared
.cone
); 
5235         /* case 2: Score update operation */ 
5236         de 
= dictFind(zs
->dict
,ele
); 
5237         redisAssert(de 
!= NULL
); 
5238         oldscore 
= dictGetEntryVal(de
); 
5239         if (*score 
!= *oldscore
) { 
5242             /* Remove and insert the element in the skip list with new score */ 
5243             deleted 
= zslDelete(zs
->zsl
,*oldscore
,ele
); 
5244             redisAssert(deleted 
!= 0); 
5245             zslInsert(zs
->zsl
,*score
,ele
); 
5247             /* Update the score in the hash table */ 
5248             dictReplace(zs
->dict
,ele
,score
); 
5254             addReplyDouble(c
,*score
); 
5256             addReply(c
,shared
.czero
); 
5260 static void zaddCommand(redisClient 
*c
) { 
5263     scoreval 
= strtod(c
->argv
[2]->ptr
,NULL
); 
5264     zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0); 
5267 static void zincrbyCommand(redisClient 
*c
) { 
5270     scoreval 
= strtod(c
->argv
[2]->ptr
,NULL
); 
5271     zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1); 
5274 static void zremCommand(redisClient 
*c
) { 
5278     zsetobj 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
5279     if (zsetobj 
== NULL
) { 
5280         addReply(c
,shared
.czero
); 
5286         if (zsetobj
->type 
!= REDIS_ZSET
) { 
5287             addReply(c
,shared
.wrongtypeerr
); 
5291         de 
= dictFind(zs
->dict
,c
->argv
[2]); 
5293             addReply(c
,shared
.czero
); 
5296         /* Delete from the skiplist */ 
5297         oldscore 
= dictGetEntryVal(de
); 
5298         deleted 
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]); 
5299         redisAssert(deleted 
!= 0); 
5301         /* Delete from the hash table */ 
5302         dictDelete(zs
->dict
,c
->argv
[2]); 
5303         if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
); 
5305         addReply(c
,shared
.cone
); 
5309 static void zremrangebyscoreCommand(redisClient 
*c
) { 
5310     double min 
= strtod(c
->argv
[2]->ptr
,NULL
); 
5311     double max 
= strtod(c
->argv
[3]->ptr
,NULL
); 
5315     zsetobj 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
5316     if (zsetobj 
== NULL
) { 
5317         addReply(c
,shared
.czero
); 
5321         if (zsetobj
->type 
!= REDIS_ZSET
) { 
5322             addReply(c
,shared
.wrongtypeerr
); 
5326         deleted 
= zslDeleteRange(zs
->zsl
,min
,max
,zs
->dict
); 
5327         if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
); 
5328         server
.dirty 
+= deleted
; 
5329         addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",deleted
)); 
5333 static void zunionInterGenericCommand(redisClient 
*c
, robj 
*dstkey
, int op
) { 
5334     int i
, j
, k
, zsetnum
; 
5342     /* expect zsetnum input keys to be given */ 
5343     zsetnum 
= atoi(c
->argv
[2]->ptr
); 
5345         addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNION/ZINTER\r\n")); 
5349     /* test if the expected number of keys would overflow */ 
5350     if (3+zsetnum 
> c
->argc
) { 
5351         addReply(c
,shared
.syntaxerr
); 
5355     /* read keys to be used for input */ 
5356     srcdicts 
= zmalloc(sizeof(dict
*) * zsetnum
); 
5357     weights 
= zmalloc(sizeof(double) * zsetnum
); 
5358     for (i 
= 0, j 
= 3; i 
< zsetnum
; i
++, j
++) { 
5359         robj 
*zsetobj 
= lookupKeyWrite(c
->db
,c
->argv
[j
]); 
5363             if (zsetobj
->type 
!= REDIS_ZSET
) { 
5366                 addReply(c
,shared
.wrongtypeerr
); 
5369             srcdicts
[i
] = ((zset
*)zsetobj
->ptr
)->dict
; 
5372         /* default all weights to 1 */ 
5376     /* parse optional extra arguments */ 
5378         int remaining 
= c
->argc
-j
; 
5381             if (!strcasecmp(c
->argv
[j
]->ptr
,"weights")) { 
5383                 if (remaining 
< zsetnum
) { 
5386                     addReplySds(c
,sdsnew("-ERR not enough weights for ZUNION/ZINTER\r\n")); 
5389                 for (i 
= 0; i 
< zsetnum
; i
++, j
++, remaining
--) { 
5390                     weights
[i
] = strtod(c
->argv
[j
]->ptr
, NULL
); 
5395                 addReply(c
,shared
.syntaxerr
); 
5401     dstobj 
= createZsetObject(); 
5402     dstzset 
= dstobj
->ptr
; 
5404     if (op 
== REDIS_OP_INTER
) { 
5405         /* store index of smallest zset in variable j */ 
5406         for (i 
= 0, j 
= 0; i 
< zsetnum
; i
++) { 
5407             if (!srcdicts
[i
] || dictSize(srcdicts
[i
]) == 0) { 
5410             if (dictSize(srcdicts
[i
]) < dictSize(srcdicts
[j
])) { 
5414         /* skip going over all entries if at least one dict was NULL or empty */ 
5416             /* precondition: all srcdicts are non-NULL and non-empty */ 
5417             di 
= dictGetIterator(srcdicts
[j
]); 
5418             while((de 
= dictNext(di
)) != NULL
) { 
5419                 double *score 
= zmalloc(sizeof(double)); 
5422                 for (k 
= 0; k 
< zsetnum
; k
++) { 
5423                     dictEntry 
*other 
= (k 
== j
) ? de 
: dictFind(srcdicts
[k
],dictGetEntryKey(de
)); 
5425                         *score 
= *score 
+ weights
[k
] * (*(double*)dictGetEntryVal(other
)); 
5431                 /* skip entry when not present in every source dict */ 
5435                     robj 
*o 
= dictGetEntryKey(de
); 
5436                     dictAdd(dstzset
->dict
,o
,score
); 
5437                     incrRefCount(o
); /* added to dictionary */ 
5438                     zslInsert(dstzset
->zsl
,*score
,o
); 
5439                     incrRefCount(o
); /* added to skiplist */ 
5442             dictReleaseIterator(di
); 
5444     } else if (op 
== REDIS_OP_UNION
) { 
5445         for (i 
= 0; i 
< zsetnum
; i
++) { 
5446             if (!srcdicts
[i
]) continue; 
5448             di 
= dictGetIterator(srcdicts
[i
]); 
5449             while((de 
= dictNext(di
)) != NULL
) { 
5450                 /* skip key when already processed */ 
5451                 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue; 
5453                 double *score 
= zmalloc(sizeof(double)); 
5455                 for (j 
= 0; j 
< zsetnum
; j
++) { 
5456                     if (!srcdicts
[j
]) continue; 
5458                     dictEntry 
*other 
= (i 
== j
) ? de 
: dictFind(srcdicts
[j
],dictGetEntryKey(de
)); 
5460                         *score 
= *score 
+ weights
[j
] * (*(double*)dictGetEntryVal(other
)); 
5464                 robj 
*o 
= dictGetEntryKey(de
); 
5465                 dictAdd(dstzset
->dict
,o
,score
); 
5466                 incrRefCount(o
); /* added to dictionary */ 
5467                 zslInsert(dstzset
->zsl
,*score
,o
); 
5468                 incrRefCount(o
); /* added to skiplist */ 
5470             dictReleaseIterator(di
); 
5473         /* unknown operator */ 
5474         redisAssert(op 
== REDIS_OP_INTER 
|| op 
== REDIS_OP_UNION
); 
5477     deleteKey(c
->db
,dstkey
); 
5478     dictAdd(c
->db
->dict
,dstkey
,dstobj
); 
5479     incrRefCount(dstkey
); 
5481     addReplyLong(c
, dstzset
->zsl
->length
); 
5487 static void zunionCommand(redisClient 
*c
) { 
5488     zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
); 
5491 static void zinterCommand(redisClient 
*c
) { 
5492     zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
); 
5495 static void zrangeGenericCommand(redisClient 
*c
, int reverse
) { 
5497     int start 
= atoi(c
->argv
[2]->ptr
); 
5498     int end 
= atoi(c
->argv
[3]->ptr
); 
5501     if (c
->argc 
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) { 
5503     } else if (c
->argc 
>= 5) { 
5504         addReply(c
,shared
.syntaxerr
); 
5508     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
5510         addReply(c
,shared
.nullmultibulk
); 
5512         if (o
->type 
!= REDIS_ZSET
) { 
5513             addReply(c
,shared
.wrongtypeerr
); 
5515             zset 
*zsetobj 
= o
->ptr
; 
5516             zskiplist 
*zsl 
= zsetobj
->zsl
; 
5519             int llen 
= zsl
->length
; 
5523             /* convert negative indexes */ 
5524             if (start 
< 0) start 
= llen
+start
; 
5525             if (end 
< 0) end 
= llen
+end
; 
5526             if (start 
< 0) start 
= 0; 
5527             if (end 
< 0) end 
= 0; 
5529             /* indexes sanity checks */ 
5530             if (start 
> end 
|| start 
>= llen
) { 
5531                 /* Out of range start or start > end result in empty list */ 
5532                 addReply(c
,shared
.emptymultibulk
); 
5535             if (end 
>= llen
) end 
= llen
-1; 
5536             rangelen 
= (end
-start
)+1; 
5538             /* check if starting point is trivial, before searching 
5539              * the element in log(N) time */ 
5541                 ln 
= start 
== 0 ? zsl
->tail 
: zslGetElementByRank(zsl
, llen 
- start
); 
5543                 ln 
= start 
== 0 ? zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start 
+ 1); 
5546             /* Return the result in form of a multi-bulk reply */ 
5547             addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n", 
5548                 withscores 
? (rangelen
*2) : rangelen
)); 
5549             for (j 
= 0; j 
< rangelen
; j
++) { 
5551                 addReplyBulkLen(c
,ele
); 
5553                 addReply(c
,shared
.crlf
); 
5555                     addReplyDouble(c
,ln
->score
); 
5556                 ln 
= reverse 
? ln
->backward 
: ln
->forward
[0]; 
5562 static void zrangeCommand(redisClient 
*c
) { 
5563     zrangeGenericCommand(c
,0); 
5566 static void zrevrangeCommand(redisClient 
*c
) { 
5567     zrangeGenericCommand(c
,1); 
5570 /* This command implements both ZRANGEBYSCORE and ZCOUNT. 
5571  * If justcount is non-zero, just the count is returned. */ 
5572 static void genericZrangebyscoreCommand(redisClient 
*c
, int justcount
) { 
5575     int minex 
= 0, maxex 
= 0; /* are min or max exclusive? */ 
5576     int offset 
= 0, limit 
= -1; 
5580     /* Parse the min-max interval. If one of the values is prefixed 
5581      * by the "(" character, it's considered "open". For instance 
5582      * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max 
5583      * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */ 
5584     if (((char*)c
->argv
[2]->ptr
)[0] == '(') { 
5585         min 
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
); 
5588         min 
= strtod(c
->argv
[2]->ptr
,NULL
); 
5590     if (((char*)c
->argv
[3]->ptr
)[0] == '(') { 
5591         max 
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
); 
5594         max 
= strtod(c
->argv
[3]->ptr
,NULL
); 
5597     /* Parse "WITHSCORES": note that if the command was called with 
5598      * the name ZCOUNT then we are sure that c->argc == 4, so we'll never 
5599      * enter the following paths to parse WITHSCORES and LIMIT. */ 
5600     if (c
->argc 
== 5 || c
->argc 
== 8) { 
5601         if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0) 
5606     if (c
->argc 
!= (4 + withscores
) && c
->argc 
!= (7 + withscores
)) 
5610             sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n")); 
5615     if (c
->argc 
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) { 
5616         addReply(c
,shared
.syntaxerr
); 
5618     } else if (c
->argc 
== (7 + withscores
)) { 
5619         offset 
= atoi(c
->argv
[5]->ptr
); 
5620         limit 
= atoi(c
->argv
[6]->ptr
); 
5621         if (offset 
< 0) offset 
= 0; 
5624     /* Ok, lookup the key and get the range */ 
5625     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
5627         addReply(c
,justcount 
? shared
.czero 
: shared
.nullmultibulk
); 
5629         if (o
->type 
!= REDIS_ZSET
) { 
5630             addReply(c
,shared
.wrongtypeerr
); 
5632             zset 
*zsetobj 
= o
->ptr
; 
5633             zskiplist 
*zsl 
= zsetobj
->zsl
; 
5635             robj 
*ele
, *lenobj 
= NULL
; 
5636             unsigned long rangelen 
= 0; 
5638             /* Get the first node with the score >= min, or with 
5639              * score > min if 'minex' is true. */ 
5640             ln 
= zslFirstWithScore(zsl
,min
); 
5641             while (minex 
&& ln 
&& ln
->score 
== min
) ln 
= ln
->forward
[0]; 
5644                 /* No element matching the speciifed interval */ 
5645                 addReply(c
,justcount 
? shared
.czero 
: shared
.emptymultibulk
); 
5649             /* We don't know in advance how many matching elements there 
5650              * are in the list, so we push this object that will represent 
5651              * the multi-bulk length in the output buffer, and will "fix" 
5654                 lenobj 
= createObject(REDIS_STRING
,NULL
); 
5656                 decrRefCount(lenobj
); 
5659             while(ln 
&& (maxex 
? (ln
->score 
< max
) : (ln
->score 
<= max
))) { 
5662                     ln 
= ln
->forward
[0]; 
5665                 if (limit 
== 0) break; 
5668                     addReplyBulkLen(c
,ele
); 
5670                     addReply(c
,shared
.crlf
); 
5672                         addReplyDouble(c
,ln
->score
); 
5674                 ln 
= ln
->forward
[0]; 
5676                 if (limit 
> 0) limit
--; 
5679                 addReplyLong(c
,(long)rangelen
); 
5681                 lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n", 
5682                      withscores 
? (rangelen
*2) : rangelen
); 
5688 static void zrangebyscoreCommand(redisClient 
*c
) { 
5689     genericZrangebyscoreCommand(c
,0); 
5692 static void zcountCommand(redisClient 
*c
) { 
5693     genericZrangebyscoreCommand(c
,1); 
5696 static void zcardCommand(redisClient 
*c
) { 
5700     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
5702         addReply(c
,shared
.czero
); 
5705         if (o
->type 
!= REDIS_ZSET
) { 
5706             addReply(c
,shared
.wrongtypeerr
); 
5709             addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",zs
->zsl
->length
)); 
5714 static void zscoreCommand(redisClient 
*c
) { 
5718     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
5720         addReply(c
,shared
.nullbulk
); 
5723         if (o
->type 
!= REDIS_ZSET
) { 
5724             addReply(c
,shared
.wrongtypeerr
); 
5729             de 
= dictFind(zs
->dict
,c
->argv
[2]); 
5731                 addReply(c
,shared
.nullbulk
); 
5733                 double *score 
= dictGetEntryVal(de
); 
5735                 addReplyDouble(c
,*score
); 
5741 static void zrankCommand(redisClient 
*c
) { 
5743     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
5745         addReply(c
,shared
.nullbulk
); 
5748     if (o
->type 
!= REDIS_ZSET
) { 
5749         addReply(c
,shared
.wrongtypeerr
); 
5752         zskiplist 
*zsl 
= zs
->zsl
; 
5756         de 
= dictFind(zs
->dict
,c
->argv
[2]); 
5758             addReply(c
,shared
.nullbulk
); 
5762         double *score 
= dictGetEntryVal(de
); 
5763         rank 
= zslGetRank(zsl
, *score
, c
->argv
[2]); 
5765             addReplyLong(c
, rank
-1); 
5767             addReply(c
,shared
.nullbulk
); 
5772 /* =================================== Hashes =============================== */ 
5773 static void hsetCommand(redisClient 
*c
) { 
5775     robj 
*o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
5778         o 
= createHashObject(); 
5779         dictAdd(c
->db
->dict
,c
->argv
[1],o
); 
5780         incrRefCount(c
->argv
[1]); 
5782         if (o
->type 
!= REDIS_HASH
) { 
5783             addReply(c
,shared
.wrongtypeerr
); 
5787     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
5788         unsigned char *zm 
= o
->ptr
; 
5790         zm 
= zipmapSet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
), 
5791             c
->argv
[3]->ptr
,sdslen(c
->argv
[3]->ptr
),&update
); 
5794         if (dictAdd(o
->ptr
,c
->argv
[2],c
->argv
[3]) == DICT_OK
) { 
5795             incrRefCount(c
->argv
[2]); 
5799         incrRefCount(c
->argv
[3]); 
5802     addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",update 
== 0)); 
5805 static void hgetCommand(redisClient 
*c
) { 
5806     robj 
*o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
5809         addReply(c
,shared
.nullbulk
); 
5812         if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
5813             unsigned char *zm 
= o
->ptr
; 
5817             if (zipmapGet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
), &val
,&vlen
)) { 
5818                 addReplySds(c
,sdscatprintf(sdsempty(),"$%u\r\n", vlen
)); 
5819                 addReplySds(c
,sdsnewlen(val
,vlen
)); 
5820                 addReply(c
,shared
.crlf
); 
5823                 addReply(c
,shared
.nullbulk
); 
5827             struct dictEntry 
*de
; 
5829             de 
= dictFind(o
->ptr
,c
->argv
[2]); 
5831                 addReply(c
,shared
.nullbulk
); 
5833                 robj 
*e 
= dictGetEntryVal(de
); 
5835                 addReplyBulkLen(c
,e
); 
5837                 addReply(c
,shared
.crlf
); 
5843 /* ========================= Non type-specific commands  ==================== */ 
5845 static void flushdbCommand(redisClient 
*c
) { 
5846     server
.dirty 
+= dictSize(c
->db
->dict
); 
5847     dictEmpty(c
->db
->dict
); 
5848     dictEmpty(c
->db
->expires
); 
5849     addReply(c
,shared
.ok
); 
5852 static void flushallCommand(redisClient 
*c
) { 
5853     server
.dirty 
+= emptyDb(); 
5854     addReply(c
,shared
.ok
); 
5855     rdbSave(server
.dbfilename
); 
5859 static redisSortOperation 
*createSortOperation(int type
, robj 
*pattern
) { 
5860     redisSortOperation 
*so 
= zmalloc(sizeof(*so
)); 
5862     so
->pattern 
= pattern
; 
5866 /* Return the value associated to the key with a name obtained 
5867  * substituting the first occurence of '*' in 'pattern' with 'subst' */ 
5868 static robj 
*lookupKeyByPattern(redisDb 
*db
, robj 
*pattern
, robj 
*subst
) { 
5872     int prefixlen
, sublen
, postfixlen
; 
5873     /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */ 
5877         char buf
[REDIS_SORTKEY_MAX
+1]; 
5880     /* If the pattern is "#" return the substitution object itself in order 
5881      * to implement the "SORT ... GET #" feature. */ 
5882     spat 
= pattern
->ptr
; 
5883     if (spat
[0] == '#' && spat
[1] == '\0') { 
5887     /* The substitution object may be specially encoded. If so we create 
5888      * a decoded object on the fly. Otherwise getDecodedObject will just 
5889      * increment the ref count, that we'll decrement later. */ 
5890     subst 
= getDecodedObject(subst
); 
5893     if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
; 
5894     p 
= strchr(spat
,'*'); 
5896         decrRefCount(subst
); 
5901     sublen 
= sdslen(ssub
); 
5902     postfixlen 
= sdslen(spat
)-(prefixlen
+1); 
5903     memcpy(keyname
.buf
,spat
,prefixlen
); 
5904     memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
); 
5905     memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
); 
5906     keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0'; 
5907     keyname
.len 
= prefixlen
+sublen
+postfixlen
; 
5909     initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2)) 
5910     decrRefCount(subst
); 
5912     /* printf("lookup '%s' => %p\n", keyname.buf,de); */ 
5913     return lookupKeyRead(db
,&keyobj
); 
5916 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with 
5917  * the additional parameter is not standard but a BSD-specific we have to 
5918  * pass sorting parameters via the global 'server' structure */ 
5919 static int sortCompare(const void *s1
, const void *s2
) { 
5920     const redisSortObject 
*so1 
= s1
, *so2 
= s2
; 
5923     if (!server
.sort_alpha
) { 
5924         /* Numeric sorting. Here it's trivial as we precomputed scores */ 
5925         if (so1
->u
.score 
> so2
->u
.score
) { 
5927         } else if (so1
->u
.score 
< so2
->u
.score
) { 
5933         /* Alphanumeric sorting */ 
5934         if (server
.sort_bypattern
) { 
5935             if (!so1
->u
.cmpobj 
|| !so2
->u
.cmpobj
) { 
5936                 /* At least one compare object is NULL */ 
5937                 if (so1
->u
.cmpobj 
== so2
->u
.cmpobj
) 
5939                 else if (so1
->u
.cmpobj 
== NULL
) 
5944                 /* We have both the objects, use strcoll */ 
5945                 cmp 
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
); 
5948             /* Compare elements directly */ 
5951             dec1 
= getDecodedObject(so1
->obj
); 
5952             dec2 
= getDecodedObject(so2
->obj
); 
5953             cmp 
= strcoll(dec1
->ptr
,dec2
->ptr
); 
5958     return server
.sort_desc 
? -cmp 
: cmp
; 
5961 /* The SORT command is the most complex command in Redis. Warning: this code 
5962  * is optimized for speed and a bit less for readability */ 
5963 static void sortCommand(redisClient 
*c
) { 
5966     int desc 
= 0, alpha 
= 0; 
5967     int limit_start 
= 0, limit_count 
= -1, start
, end
; 
5968     int j
, dontsort 
= 0, vectorlen
; 
5969     int getop 
= 0; /* GET operation counter */ 
5970     robj 
*sortval
, *sortby 
= NULL
, *storekey 
= NULL
; 
5971     redisSortObject 
*vector
; /* Resulting vector to sort */ 
5973     /* Lookup the key to sort. It must be of the right types */ 
5974     sortval 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
5975     if (sortval 
== NULL
) { 
5976         addReply(c
,shared
.nullmultibulk
); 
5979     if (sortval
->type 
!= REDIS_SET 
&& sortval
->type 
!= REDIS_LIST 
&& 
5980         sortval
->type 
!= REDIS_ZSET
) 
5982         addReply(c
,shared
.wrongtypeerr
); 
5986     /* Create a list of operations to perform for every sorted element. 
5987      * Operations can be GET/DEL/INCR/DECR */ 
5988     operations 
= listCreate(); 
5989     listSetFreeMethod(operations
,zfree
); 
5992     /* Now we need to protect sortval incrementing its count, in the future 
5993      * SORT may have options able to overwrite/delete keys during the sorting 
5994      * and the sorted key itself may get destroied */ 
5995     incrRefCount(sortval
); 
5997     /* The SORT command has an SQL-alike syntax, parse it */ 
5998     while(j 
< c
->argc
) { 
5999         int leftargs 
= c
->argc
-j
-1; 
6000         if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) { 
6002         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) { 
6004         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) { 
6006         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs 
>= 2) { 
6007             limit_start 
= atoi(c
->argv
[j
+1]->ptr
); 
6008             limit_count 
= atoi(c
->argv
[j
+2]->ptr
); 
6010         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs 
>= 1) { 
6011             storekey 
= c
->argv
[j
+1]; 
6013         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs 
>= 1) { 
6014             sortby 
= c
->argv
[j
+1]; 
6015             /* If the BY pattern does not contain '*', i.e. it is constant, 
6016              * we don't need to sort nor to lookup the weight keys. */ 
6017             if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort 
= 1; 
6019         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs 
>= 1) { 
6020             listAddNodeTail(operations
,createSortOperation( 
6021                 REDIS_SORT_GET
,c
->argv
[j
+1])); 
6025             decrRefCount(sortval
); 
6026             listRelease(operations
); 
6027             addReply(c
,shared
.syntaxerr
); 
6033     /* Load the sorting vector with all the objects to sort */ 
6034     switch(sortval
->type
) { 
6035     case REDIS_LIST
: vectorlen 
= listLength((list
*)sortval
->ptr
); break; 
6036     case REDIS_SET
: vectorlen 
=  dictSize((dict
*)sortval
->ptr
); break; 
6037     case REDIS_ZSET
: vectorlen 
= dictSize(((zset
*)sortval
->ptr
)->dict
); break; 
6038     default: vectorlen 
= 0; redisAssert(0); /* Avoid GCC warning */ 
6040     vector 
= zmalloc(sizeof(redisSortObject
)*vectorlen
); 
6043     if (sortval
->type 
== REDIS_LIST
) { 
6044         list 
*list 
= sortval
->ptr
; 
6048         listRewind(list
,&li
); 
6049         while((ln 
= listNext(&li
))) { 
6050             robj 
*ele 
= ln
->value
; 
6051             vector
[j
].obj 
= ele
; 
6052             vector
[j
].u
.score 
= 0; 
6053             vector
[j
].u
.cmpobj 
= NULL
; 
6061         if (sortval
->type 
== REDIS_SET
) { 
6064             zset 
*zs 
= sortval
->ptr
; 
6068         di 
= dictGetIterator(set
); 
6069         while((setele 
= dictNext(di
)) != NULL
) { 
6070             vector
[j
].obj 
= dictGetEntryKey(setele
); 
6071             vector
[j
].u
.score 
= 0; 
6072             vector
[j
].u
.cmpobj 
= NULL
; 
6075         dictReleaseIterator(di
); 
6077     redisAssert(j 
== vectorlen
); 
6079     /* Now it's time to load the right scores in the sorting vector */ 
6080     if (dontsort 
== 0) { 
6081         for (j 
= 0; j 
< vectorlen
; j
++) { 
6085                 byval 
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
); 
6086                 if (!byval 
|| byval
->type 
!= REDIS_STRING
) continue; 
6088                     vector
[j
].u
.cmpobj 
= getDecodedObject(byval
); 
6090                     if (byval
->encoding 
== REDIS_ENCODING_RAW
) { 
6091                         vector
[j
].u
.score 
= strtod(byval
->ptr
,NULL
); 
6093                         /* Don't need to decode the object if it's 
6094                          * integer-encoded (the only encoding supported) so 
6095                          * far. We can just cast it */ 
6096                         if (byval
->encoding 
== REDIS_ENCODING_INT
) { 
6097                             vector
[j
].u
.score 
= (long)byval
->ptr
; 
6099                             redisAssert(1 != 1); 
6104                     if (vector
[j
].obj
->encoding 
== REDIS_ENCODING_RAW
) 
6105                         vector
[j
].u
.score 
= strtod(vector
[j
].obj
->ptr
,NULL
); 
6107                         if (vector
[j
].obj
->encoding 
== REDIS_ENCODING_INT
) 
6108                             vector
[j
].u
.score 
= (long) vector
[j
].obj
->ptr
; 
6110                             redisAssert(1 != 1); 
6117     /* We are ready to sort the vector... perform a bit of sanity check 
6118      * on the LIMIT option too. We'll use a partial version of quicksort. */ 
6119     start 
= (limit_start 
< 0) ? 0 : limit_start
; 
6120     end 
= (limit_count 
< 0) ? vectorlen
-1 : start
+limit_count
-1; 
6121     if (start 
>= vectorlen
) { 
6122         start 
= vectorlen
-1; 
6125     if (end 
>= vectorlen
) end 
= vectorlen
-1; 
6127     if (dontsort 
== 0) { 
6128         server
.sort_desc 
= desc
; 
6129         server
.sort_alpha 
= alpha
; 
6130         server
.sort_bypattern 
= sortby 
? 1 : 0; 
6131         if (sortby 
&& (start 
!= 0 || end 
!= vectorlen
-1)) 
6132             pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
); 
6134             qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
); 
6137     /* Send command output to the output buffer, performing the specified 
6138      * GET/DEL/INCR/DECR operations if any. */ 
6139     outputlen 
= getop 
? getop
*(end
-start
+1) : end
-start
+1; 
6140     if (storekey 
== NULL
) { 
6141         /* STORE option not specified, sent the sorting result to client */ 
6142         addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
)); 
6143         for (j 
= start
; j 
<= end
; j
++) { 
6148                 addReplyBulkLen(c
,vector
[j
].obj
); 
6149                 addReply(c
,vector
[j
].obj
); 
6150                 addReply(c
,shared
.crlf
); 
6152             listRewind(operations
,&li
); 
6153             while((ln 
= listNext(&li
))) { 
6154                 redisSortOperation 
*sop 
= ln
->value
; 
6155                 robj 
*val 
= lookupKeyByPattern(c
->db
,sop
->pattern
, 
6158                 if (sop
->type 
== REDIS_SORT_GET
) { 
6159                     if (!val 
|| val
->type 
!= REDIS_STRING
) { 
6160                         addReply(c
,shared
.nullbulk
); 
6162                         addReplyBulkLen(c
,val
); 
6164                         addReply(c
,shared
.crlf
); 
6167                     redisAssert(sop
->type 
== REDIS_SORT_GET
); /* always fails */ 
6172         robj 
*listObject 
= createListObject(); 
6173         list 
*listPtr 
= (list
*) listObject
->ptr
; 
6175         /* STORE option specified, set the sorting result as a List object */ 
6176         for (j 
= start
; j 
<= end
; j
++) { 
6181                 listAddNodeTail(listPtr
,vector
[j
].obj
); 
6182                 incrRefCount(vector
[j
].obj
); 
6184             listRewind(operations
,&li
); 
6185             while((ln 
= listNext(&li
))) { 
6186                 redisSortOperation 
*sop 
= ln
->value
; 
6187                 robj 
*val 
= lookupKeyByPattern(c
->db
,sop
->pattern
, 
6190                 if (sop
->type 
== REDIS_SORT_GET
) { 
6191                     if (!val 
|| val
->type 
!= REDIS_STRING
) { 
6192                         listAddNodeTail(listPtr
,createStringObject("",0)); 
6194                         listAddNodeTail(listPtr
,val
); 
6198                     redisAssert(sop
->type 
== REDIS_SORT_GET
); /* always fails */ 
6202         if (dictReplace(c
->db
->dict
,storekey
,listObject
)) { 
6203             incrRefCount(storekey
); 
6205         /* Note: we add 1 because the DB is dirty anyway since even if the 
6206          * SORT result is empty a new key is set and maybe the old content 
6208         server
.dirty 
+= 1+outputlen
; 
6209         addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
)); 
6213     decrRefCount(sortval
); 
6214     listRelease(operations
); 
6215     for (j 
= 0; j 
< vectorlen
; j
++) { 
6216         if (sortby 
&& alpha 
&& vector
[j
].u
.cmpobj
) 
6217             decrRefCount(vector
[j
].u
.cmpobj
); 
6222 /* Convert an amount of bytes into a human readable string in the form 
6223  * of 100B, 2G, 100M, 4K, and so forth. */ 
6224 static void bytesToHuman(char *s
, unsigned long long n
) { 
6229         sprintf(s
,"%lluB",n
); 
6231     } else if (n 
< (1024*1024)) { 
6232         d 
= (double)n
/(1024); 
6233         sprintf(s
,"%.2fK",d
); 
6234     } else if (n 
< (1024LL*1024*1024)) { 
6235         d 
= (double)n
/(1024*1024); 
6236         sprintf(s
,"%.2fM",d
); 
6237     } else if (n 
< (1024LL*1024*1024*1024)) { 
6238         d 
= (double)n
/(1024LL*1024*1024); 
6239         sprintf(s
,"%.2fG",d
); 
6243 /* Create the string returned by the INFO command. This is decoupled 
6244  * by the INFO command itself as we need to report the same information 
6245  * on memory corruption problems. */ 
6246 static sds 
genRedisInfoString(void) { 
6248     time_t uptime 
= time(NULL
)-server
.stat_starttime
; 
6252     bytesToHuman(hmem
,zmalloc_used_memory()); 
6253     info 
= sdscatprintf(sdsempty(), 
6254         "redis_version:%s\r\n" 
6256         "multiplexing_api:%s\r\n" 
6257         "process_id:%ld\r\n" 
6258         "uptime_in_seconds:%ld\r\n" 
6259         "uptime_in_days:%ld\r\n" 
6260         "connected_clients:%d\r\n" 
6261         "connected_slaves:%d\r\n" 
6262         "blocked_clients:%d\r\n" 
6263         "used_memory:%zu\r\n" 
6264         "used_memory_human:%s\r\n" 
6265         "changes_since_last_save:%lld\r\n" 
6266         "bgsave_in_progress:%d\r\n" 
6267         "last_save_time:%ld\r\n" 
6268         "bgrewriteaof_in_progress:%d\r\n" 
6269         "total_connections_received:%lld\r\n" 
6270         "total_commands_processed:%lld\r\n" 
6274         (sizeof(long) == 8) ? "64" : "32", 
6279         listLength(server
.clients
)-listLength(server
.slaves
), 
6280         listLength(server
.slaves
), 
6281         server
.blpop_blocked_clients
, 
6282         zmalloc_used_memory(), 
6285         server
.bgsavechildpid 
!= -1, 
6287         server
.bgrewritechildpid 
!= -1, 
6288         server
.stat_numconnections
, 
6289         server
.stat_numcommands
, 
6290         server
.vm_enabled 
!= 0, 
6291         server
.masterhost 
== NULL 
? "master" : "slave" 
6293     if (server
.masterhost
) { 
6294         info 
= sdscatprintf(info
, 
6295             "master_host:%s\r\n" 
6296             "master_port:%d\r\n" 
6297             "master_link_status:%s\r\n" 
6298             "master_last_io_seconds_ago:%d\r\n" 
6301             (server
.replstate 
== REDIS_REPL_CONNECTED
) ? 
6303             server
.master 
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1 
6306     if (server
.vm_enabled
) { 
6308         info 
= sdscatprintf(info
, 
6309             "vm_conf_max_memory:%llu\r\n" 
6310             "vm_conf_page_size:%llu\r\n" 
6311             "vm_conf_pages:%llu\r\n" 
6312             "vm_stats_used_pages:%llu\r\n" 
6313             "vm_stats_swapped_objects:%llu\r\n" 
6314             "vm_stats_swappin_count:%llu\r\n" 
6315             "vm_stats_swappout_count:%llu\r\n" 
6316             "vm_stats_io_newjobs_len:%lu\r\n" 
6317             "vm_stats_io_processing_len:%lu\r\n" 
6318             "vm_stats_io_processed_len:%lu\r\n" 
6319             "vm_stats_io_active_threads:%lu\r\n" 
6320             "vm_stats_blocked_clients:%lu\r\n" 
6321             ,(unsigned long long) server
.vm_max_memory
, 
6322             (unsigned long long) server
.vm_page_size
, 
6323             (unsigned long long) server
.vm_pages
, 
6324             (unsigned long long) server
.vm_stats_used_pages
, 
6325             (unsigned long long) server
.vm_stats_swapped_objects
, 
6326             (unsigned long long) server
.vm_stats_swapins
, 
6327             (unsigned long long) server
.vm_stats_swapouts
, 
6328             (unsigned long) listLength(server
.io_newjobs
), 
6329             (unsigned long) listLength(server
.io_processing
), 
6330             (unsigned long) listLength(server
.io_processed
), 
6331             (unsigned long) server
.io_active_threads
, 
6332             (unsigned long) server
.vm_blocked_clients
 
6336     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
6337         long long keys
, vkeys
; 
6339         keys 
= dictSize(server
.db
[j
].dict
); 
6340         vkeys 
= dictSize(server
.db
[j
].expires
); 
6341         if (keys 
|| vkeys
) { 
6342             info 
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n", 
6349 static void infoCommand(redisClient 
*c
) { 
6350     sds info 
= genRedisInfoString(); 
6351     addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n", 
6352         (unsigned long)sdslen(info
))); 
6353     addReplySds(c
,info
); 
6354     addReply(c
,shared
.crlf
); 
6357 static void monitorCommand(redisClient 
*c
) { 
6358     /* ignore MONITOR if aleady slave or in monitor mode */ 
6359     if (c
->flags 
& REDIS_SLAVE
) return; 
6361     c
->flags 
|= (REDIS_SLAVE
|REDIS_MONITOR
); 
6363     listAddNodeTail(server
.monitors
,c
); 
6364     addReply(c
,shared
.ok
); 
6367 /* ================================= Expire ================================= */ 
6368 static int removeExpire(redisDb 
*db
, robj 
*key
) { 
6369     if (dictDelete(db
->expires
,key
) == DICT_OK
) { 
6376 static int setExpire(redisDb 
*db
, robj 
*key
, time_t when
) { 
6377     if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) { 
6385 /* Return the expire time of the specified key, or -1 if no expire 
6386  * is associated with this key (i.e. the key is non volatile) */ 
6387 static time_t getExpire(redisDb 
*db
, robj 
*key
) { 
6390     /* No expire? return ASAP */ 
6391     if (dictSize(db
->expires
) == 0 || 
6392        (de 
= dictFind(db
->expires
,key
)) == NULL
) return -1; 
6394     return (time_t) dictGetEntryVal(de
); 
6397 static int expireIfNeeded(redisDb 
*db
, robj 
*key
) { 
6401     /* No expire? return ASAP */ 
6402     if (dictSize(db
->expires
) == 0 || 
6403        (de 
= dictFind(db
->expires
,key
)) == NULL
) return 0; 
6405     /* Lookup the expire */ 
6406     when 
= (time_t) dictGetEntryVal(de
); 
6407     if (time(NULL
) <= when
) return 0; 
6409     /* Delete the key */ 
6410     dictDelete(db
->expires
,key
); 
6411     return dictDelete(db
->dict
,key
) == DICT_OK
; 
6414 static int deleteIfVolatile(redisDb 
*db
, robj 
*key
) { 
6417     /* No expire? return ASAP */ 
6418     if (dictSize(db
->expires
) == 0 || 
6419        (de 
= dictFind(db
->expires
,key
)) == NULL
) return 0; 
6421     /* Delete the key */ 
6423     dictDelete(db
->expires
,key
); 
6424     return dictDelete(db
->dict
,key
) == DICT_OK
; 
6427 static void expireGenericCommand(redisClient 
*c
, robj 
*key
, time_t seconds
) { 
6430     de 
= dictFind(c
->db
->dict
,key
); 
6432         addReply(c
,shared
.czero
); 
6436         if (deleteKey(c
->db
,key
)) server
.dirty
++; 
6437         addReply(c
, shared
.cone
); 
6440         time_t when 
= time(NULL
)+seconds
; 
6441         if (setExpire(c
->db
,key
,when
)) { 
6442             addReply(c
,shared
.cone
); 
6445             addReply(c
,shared
.czero
); 
6451 static void expireCommand(redisClient 
*c
) { 
6452     expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10)); 
6455 static void expireatCommand(redisClient 
*c
) { 
6456     expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10)-time(NULL
)); 
6459 static void ttlCommand(redisClient 
*c
) { 
6463     expire 
= getExpire(c
->db
,c
->argv
[1]); 
6465         ttl 
= (int) (expire
-time(NULL
)); 
6466         if (ttl 
< 0) ttl 
= -1; 
6468     addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
)); 
6471 /* ================================ MULTI/EXEC ============================== */ 
6473 /* Client state initialization for MULTI/EXEC */ 
6474 static void initClientMultiState(redisClient 
*c
) { 
6475     c
->mstate
.commands 
= NULL
; 
6476     c
->mstate
.count 
= 0; 
6479 /* Release all the resources associated with MULTI/EXEC state */ 
6480 static void freeClientMultiState(redisClient 
*c
) { 
6483     for (j 
= 0; j 
< c
->mstate
.count
; j
++) { 
6485         multiCmd 
*mc 
= c
->mstate
.commands
+j
; 
6487         for (i 
= 0; i 
< mc
->argc
; i
++) 
6488             decrRefCount(mc
->argv
[i
]); 
6491     zfree(c
->mstate
.commands
); 
6494 /* Add a new command into the MULTI commands queue */ 
6495 static void queueMultiCommand(redisClient 
*c
, struct redisCommand 
*cmd
) { 
6499     c
->mstate
.commands 
= zrealloc(c
->mstate
.commands
, 
6500             sizeof(multiCmd
)*(c
->mstate
.count
+1)); 
6501     mc 
= c
->mstate
.commands
+c
->mstate
.count
; 
6504     mc
->argv 
= zmalloc(sizeof(robj
*)*c
->argc
); 
6505     memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
); 
6506     for (j 
= 0; j 
< c
->argc
; j
++) 
6507         incrRefCount(mc
->argv
[j
]); 
6511 static void multiCommand(redisClient 
*c
) { 
6512     c
->flags 
|= REDIS_MULTI
; 
6513     addReply(c
,shared
.ok
); 
6516 static void discardCommand(redisClient 
*c
) { 
6517     if (!(c
->flags 
& REDIS_MULTI
)) { 
6518         addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n")); 
6522     freeClientMultiState(c
); 
6523     initClientMultiState(c
); 
6524     c
->flags 
&= (~REDIS_MULTI
); 
6525     addReply(c
,shared
.ok
); 
6528 static void execCommand(redisClient 
*c
) { 
6533     if (!(c
->flags 
& REDIS_MULTI
)) { 
6534         addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n")); 
6538     orig_argv 
= c
->argv
; 
6539     orig_argc 
= c
->argc
; 
6540     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
)); 
6541     for (j 
= 0; j 
< c
->mstate
.count
; j
++) { 
6542         c
->argc 
= c
->mstate
.commands
[j
].argc
; 
6543         c
->argv 
= c
->mstate
.commands
[j
].argv
; 
6544         call(c
,c
->mstate
.commands
[j
].cmd
); 
6546     c
->argv 
= orig_argv
; 
6547     c
->argc 
= orig_argc
; 
6548     freeClientMultiState(c
); 
6549     initClientMultiState(c
); 
6550     c
->flags 
&= (~REDIS_MULTI
); 
6553 /* =========================== Blocking Operations  ========================= */ 
6555 /* Currently Redis blocking operations support is limited to list POP ops, 
6556  * so the current implementation is not fully generic, but it is also not 
6557  * completely specific so it will not require a rewrite to support new 
6558  * kind of blocking operations in the future. 
6560  * Still it's important to note that list blocking operations can be already 
6561  * used as a notification mechanism in order to implement other blocking 
6562  * operations at application level, so there must be a very strong evidence 
6563  * of usefulness and generality before new blocking operations are implemented. 
6565  * This is how the current blocking POP works, we use BLPOP as example: 
6566  * - If the user calls BLPOP and the key exists and contains a non empty list 
6567  *   then LPOP is called instead. So BLPOP is semantically the same as LPOP 
6568  *   if there is not to block. 
6569  * - If instead BLPOP is called and the key does not exists or the list is 
6570  *   empty we need to block. In order to do so we remove the notification for 
6571  *   new data to read in the client socket (so that we'll not serve new 
6572  *   requests if the blocking request is not served). Also we put the client 
6573  *   in a dictionary (db->blockingkeys) mapping keys to a list of clients 
6574  *   blocking for this keys. 
6575  * - If a PUSH operation against a key with blocked clients waiting is 
6576  *   performed, we serve the first in the list: basically instead to push 
6577  *   the new element inside the list we return it to the (first / oldest) 
6578  *   blocking client, unblock the client, and remove it form the list. 
6580  * The above comment and the source code should be enough in order to understand 
6581  * the implementation and modify / fix it later. 
6584 /* Set a client in blocking mode for the specified key, with the specified 
6586 static void blockForKeys(redisClient 
*c
, robj 
**keys
, int numkeys
, time_t timeout
) { 
6591     c
->blockingkeys 
= zmalloc(sizeof(robj
*)*numkeys
); 
6592     c
->blockingkeysnum 
= numkeys
; 
6593     c
->blockingto 
= timeout
; 
6594     for (j 
= 0; j 
< numkeys
; j
++) { 
6595         /* Add the key in the client structure, to map clients -> keys */ 
6596         c
->blockingkeys
[j
] = keys
[j
]; 
6597         incrRefCount(keys
[j
]); 
6599         /* And in the other "side", to map keys -> clients */ 
6600         de 
= dictFind(c
->db
->blockingkeys
,keys
[j
]); 
6604             /* For every key we take a list of clients blocked for it */ 
6606             retval 
= dictAdd(c
->db
->blockingkeys
,keys
[j
],l
); 
6607             incrRefCount(keys
[j
]); 
6608             assert(retval 
== DICT_OK
); 
6610             l 
= dictGetEntryVal(de
); 
6612         listAddNodeTail(l
,c
); 
6614     /* Mark the client as a blocked client */ 
6615     c
->flags 
|= REDIS_BLOCKED
; 
6616     server
.blpop_blocked_clients
++; 
6619 /* Unblock a client that's waiting in a blocking operation such as BLPOP */ 
6620 static void unblockClientWaitingData(redisClient 
*c
) { 
6625     assert(c
->blockingkeys 
!= NULL
); 
6626     /* The client may wait for multiple keys, so unblock it for every key. */ 
6627     for (j 
= 0; j 
< c
->blockingkeysnum
; j
++) { 
6628         /* Remove this client from the list of clients waiting for this key. */ 
6629         de 
= dictFind(c
->db
->blockingkeys
,c
->blockingkeys
[j
]); 
6631         l 
= dictGetEntryVal(de
); 
6632         listDelNode(l
,listSearchKey(l
,c
)); 
6633         /* If the list is empty we need to remove it to avoid wasting memory */ 
6634         if (listLength(l
) == 0) 
6635             dictDelete(c
->db
->blockingkeys
,c
->blockingkeys
[j
]); 
6636         decrRefCount(c
->blockingkeys
[j
]); 
6638     /* Cleanup the client structure */ 
6639     zfree(c
->blockingkeys
); 
6640     c
->blockingkeys 
= NULL
; 
6641     c
->flags 
&= (~REDIS_BLOCKED
); 
6642     server
.blpop_blocked_clients
--; 
6643     /* We want to process data if there is some command waiting 
6644      * in the input buffer. Note that this is safe even if 
6645      * unblockClientWaitingData() gets called from freeClient() because 
6646      * freeClient() will be smart enough to call this function 
6647      * *after* c->querybuf was set to NULL. */ 
6648     if (c
->querybuf 
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
); 
6651 /* This should be called from any function PUSHing into lists. 
6652  * 'c' is the "pushing client", 'key' is the key it is pushing data against, 
6653  * 'ele' is the element pushed. 
6655  * If the function returns 0 there was no client waiting for a list push 
6658  * If the function returns 1 there was a client waiting for a list push 
6659  * against this key, the element was passed to this client thus it's not 
6660  * needed to actually add it to the list and the caller should return asap. */ 
6661 static int handleClientsWaitingListPush(redisClient 
*c
, robj 
*key
, robj 
*ele
) { 
6662     struct dictEntry 
*de
; 
6663     redisClient 
*receiver
; 
6667     de 
= dictFind(c
->db
->blockingkeys
,key
); 
6668     if (de 
== NULL
) return 0; 
6669     l 
= dictGetEntryVal(de
); 
6672     receiver 
= ln
->value
; 
6674     addReplySds(receiver
,sdsnew("*2\r\n")); 
6675     addReplyBulkLen(receiver
,key
); 
6676     addReply(receiver
,key
); 
6677     addReply(receiver
,shared
.crlf
); 
6678     addReplyBulkLen(receiver
,ele
); 
6679     addReply(receiver
,ele
); 
6680     addReply(receiver
,shared
.crlf
); 
6681     unblockClientWaitingData(receiver
); 
6685 /* Blocking RPOP/LPOP */ 
6686 static void blockingPopGenericCommand(redisClient 
*c
, int where
) { 
6691     for (j 
= 1; j 
< c
->argc
-1; j
++) { 
6692         o 
= lookupKeyWrite(c
->db
,c
->argv
[j
]); 
6694             if (o
->type 
!= REDIS_LIST
) { 
6695                 addReply(c
,shared
.wrongtypeerr
); 
6698                 list 
*list 
= o
->ptr
; 
6699                 if (listLength(list
) != 0) { 
6700                     /* If the list contains elements fall back to the usual 
6701                      * non-blocking POP operation */ 
6702                     robj 
*argv
[2], **orig_argv
; 
6705                     /* We need to alter the command arguments before to call 
6706                      * popGenericCommand() as the command takes a single key. */ 
6707                     orig_argv 
= c
->argv
; 
6708                     orig_argc 
= c
->argc
; 
6709                     argv
[1] = c
->argv
[j
]; 
6713                     /* Also the return value is different, we need to output 
6714                      * the multi bulk reply header and the key name. The 
6715                      * "real" command will add the last element (the value) 
6716                      * for us. If this souds like an hack to you it's just 
6717                      * because it is... */ 
6718                     addReplySds(c
,sdsnew("*2\r\n")); 
6719                     addReplyBulkLen(c
,argv
[1]); 
6720                     addReply(c
,argv
[1]); 
6721                     addReply(c
,shared
.crlf
); 
6722                     popGenericCommand(c
,where
); 
6724                     /* Fix the client structure with the original stuff */ 
6725                     c
->argv 
= orig_argv
; 
6726                     c
->argc 
= orig_argc
; 
6732     /* If the list is empty or the key does not exists we must block */ 
6733     timeout 
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10); 
6734     if (timeout 
> 0) timeout 
+= time(NULL
); 
6735     blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
); 
6738 static void blpopCommand(redisClient 
*c
) { 
6739     blockingPopGenericCommand(c
,REDIS_HEAD
); 
6742 static void brpopCommand(redisClient 
*c
) { 
6743     blockingPopGenericCommand(c
,REDIS_TAIL
); 
6746 /* =============================== Replication  ============================= */ 
6748 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) { 
6749     ssize_t nwritten
, ret 
= size
; 
6750     time_t start 
= time(NULL
); 
6754         if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) { 
6755             nwritten 
= write(fd
,ptr
,size
); 
6756             if (nwritten 
== -1) return -1; 
6760         if ((time(NULL
)-start
) > timeout
) { 
6768 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) { 
6769     ssize_t nread
, totread 
= 0; 
6770     time_t start 
= time(NULL
); 
6774         if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) { 
6775             nread 
= read(fd
,ptr
,size
); 
6776             if (nread 
== -1) return -1; 
6781         if ((time(NULL
)-start
) > timeout
) { 
6789 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) { 
6796         if (syncRead(fd
,&c
,1,timeout
) == -1) return -1; 
6799             if (nread 
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0'; 
6810 static void syncCommand(redisClient 
*c
) { 
6811     /* ignore SYNC if aleady slave or in monitor mode */ 
6812     if (c
->flags 
& REDIS_SLAVE
) return; 
6814     /* SYNC can't be issued when the server has pending data to send to 
6815      * the client about already issued commands. We need a fresh reply 
6816      * buffer registering the differences between the BGSAVE and the current 
6817      * dataset, so that we can copy to other slaves if needed. */ 
6818     if (listLength(c
->reply
) != 0) { 
6819         addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n")); 
6823     redisLog(REDIS_NOTICE
,"Slave ask for synchronization"); 
6824     /* Here we need to check if there is a background saving operation 
6825      * in progress, or if it is required to start one */ 
6826     if (server
.bgsavechildpid 
!= -1) { 
6827         /* Ok a background save is in progress. Let's check if it is a good 
6828          * one for replication, i.e. if there is another slave that is 
6829          * registering differences since the server forked to save */ 
6834         listRewind(server
.slaves
,&li
); 
6835         while((ln 
= listNext(&li
))) { 
6837             if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_END
) break; 
6840             /* Perfect, the server is already registering differences for 
6841              * another slave. Set the right state, and copy the buffer. */ 
6842             listRelease(c
->reply
); 
6843             c
->reply 
= listDup(slave
->reply
); 
6844             c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_END
; 
6845             redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC"); 
6847             /* No way, we need to wait for the next BGSAVE in order to 
6848              * register differences */ 
6849             c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_START
; 
6850             redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC"); 
6853         /* Ok we don't have a BGSAVE in progress, let's start one */ 
6854         redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC"); 
6855         if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) { 
6856             redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE"); 
6857             addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n")); 
6860         c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_END
; 
6863     c
->flags 
|= REDIS_SLAVE
; 
6865     listAddNodeTail(server
.slaves
,c
); 
6869 static void sendBulkToSlave(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
6870     redisClient 
*slave 
= privdata
; 
6872     REDIS_NOTUSED(mask
); 
6873     char buf
[REDIS_IOBUF_LEN
]; 
6874     ssize_t nwritten
, buflen
; 
6876     if (slave
->repldboff 
== 0) { 
6877         /* Write the bulk write count before to transfer the DB. In theory here 
6878          * we don't know how much room there is in the output buffer of the 
6879          * socket, but in pratice SO_SNDLOWAT (the minimum count for output 
6880          * operations) will never be smaller than the few bytes we need. */ 
6883         bulkcount 
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long) 
6885         if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
)) 
6893     lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
); 
6894     buflen 
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
); 
6896         redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s", 
6897             (buflen 
== 0) ? "premature EOF" : strerror(errno
)); 
6901     if ((nwritten 
= write(fd
,buf
,buflen
)) == -1) { 
6902         redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s", 
6907     slave
->repldboff 
+= nwritten
; 
6908     if (slave
->repldboff 
== slave
->repldbsize
) { 
6909         close(slave
->repldbfd
); 
6910         slave
->repldbfd 
= -1; 
6911         aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
); 
6912         slave
->replstate 
= REDIS_REPL_ONLINE
; 
6913         if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, 
6914             sendReplyToClient
, slave
) == AE_ERR
) { 
6918         addReplySds(slave
,sdsempty()); 
6919         redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded"); 
6923 /* This function is called at the end of every backgrond saving. 
6924  * The argument bgsaveerr is REDIS_OK if the background saving succeeded 
6925  * otherwise REDIS_ERR is passed to the function. 
6927  * The goal of this function is to handle slaves waiting for a successful 
6928  * background saving in order to perform non-blocking synchronization. */ 
6929 static void updateSlavesWaitingBgsave(int bgsaveerr
) { 
6931     int startbgsave 
= 0; 
6934     listRewind(server
.slaves
,&li
); 
6935     while((ln 
= listNext(&li
))) { 
6936         redisClient 
*slave 
= ln
->value
; 
6938         if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_START
) { 
6940             slave
->replstate 
= REDIS_REPL_WAIT_BGSAVE_END
; 
6941         } else if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_END
) { 
6942             struct redis_stat buf
; 
6944             if (bgsaveerr 
!= REDIS_OK
) { 
6946                 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error"); 
6949             if ((slave
->repldbfd 
= open(server
.dbfilename
,O_RDONLY
)) == -1 || 
6950                 redis_fstat(slave
->repldbfd
,&buf
) == -1) { 
6952                 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
)); 
6955             slave
->repldboff 
= 0; 
6956             slave
->repldbsize 
= buf
.st_size
; 
6957             slave
->replstate 
= REDIS_REPL_SEND_BULK
; 
6958             aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
); 
6959             if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) { 
6966         if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) { 
6969             listRewind(server
.slaves
,&li
); 
6970             redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed"); 
6971             while((ln 
= listNext(&li
))) { 
6972                 redisClient 
*slave 
= ln
->value
; 
6974                 if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_START
) 
6981 static int syncWithMaster(void) { 
6982     char buf
[1024], tmpfile
[256], authcmd
[1024]; 
6984     int fd 
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
); 
6988         redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s", 
6993     /* AUTH with the master if required. */ 
6994     if(server
.masterauth
) { 
6995         snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
); 
6996         if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) { 
6998             redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s", 
7002         /* Read the AUTH result.  */ 
7003         if (syncReadLine(fd
,buf
,1024,3600) == -1) { 
7005             redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s", 
7009         if (buf
[0] != '+') { 
7011             redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?"); 
7016     /* Issue the SYNC command */ 
7017     if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) { 
7019         redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s", 
7023     /* Read the bulk write count */ 
7024     if (syncReadLine(fd
,buf
,1024,3600) == -1) { 
7026         redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s", 
7030     if (buf
[0] != '$') { 
7032         redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?"); 
7035     dumpsize 
= strtol(buf
+1,NULL
,10); 
7036     redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
); 
7037     /* Read the bulk write data on a temp file */ 
7038     snprintf(tmpfile
,256,"temp-%d.%ld.rdb",(int)time(NULL
),(long int)random()); 
7039     dfd 
= open(tmpfile
,O_CREAT
|O_WRONLY
,0644); 
7042         redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
)); 
7046         int nread
, nwritten
; 
7048         nread 
= read(fd
,buf
,(dumpsize 
< 1024)?dumpsize
:1024); 
7050             redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s", 
7056         nwritten 
= write(dfd
,buf
,nread
); 
7057         if (nwritten 
== -1) { 
7058             redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
)); 
7066     if (rename(tmpfile
,server
.dbfilename
) == -1) { 
7067         redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
)); 
7073     if (rdbLoad(server
.dbfilename
) != REDIS_OK
) { 
7074         redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk"); 
7078     server
.master 
= createClient(fd
); 
7079     server
.master
->flags 
|= REDIS_MASTER
; 
7080     server
.master
->authenticated 
= 1; 
7081     server
.replstate 
= REDIS_REPL_CONNECTED
; 
7085 static void slaveofCommand(redisClient 
*c
) { 
7086     if (!strcasecmp(c
->argv
[1]->ptr
,"no") && 
7087         !strcasecmp(c
->argv
[2]->ptr
,"one")) { 
7088         if (server
.masterhost
) { 
7089             sdsfree(server
.masterhost
); 
7090             server
.masterhost 
= NULL
; 
7091             if (server
.master
) freeClient(server
.master
); 
7092             server
.replstate 
= REDIS_REPL_NONE
; 
7093             redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)"); 
7096         sdsfree(server
.masterhost
); 
7097         server
.masterhost 
= sdsdup(c
->argv
[1]->ptr
); 
7098         server
.masterport 
= atoi(c
->argv
[2]->ptr
); 
7099         if (server
.master
) freeClient(server
.master
); 
7100         server
.replstate 
= REDIS_REPL_CONNECT
; 
7101         redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)", 
7102             server
.masterhost
, server
.masterport
); 
7104     addReply(c
,shared
.ok
); 
7107 /* ============================ Maxmemory directive  ======================== */ 
7109 /* Try to free one object form the pre-allocated objects free list. 
7110  * This is useful under low mem conditions as by default we take 1 million 
7111  * free objects allocated. On success REDIS_OK is returned, otherwise 
7113 static int tryFreeOneObjectFromFreelist(void) { 
7116     if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
); 
7117     if (listLength(server
.objfreelist
)) { 
7118         listNode 
*head 
= listFirst(server
.objfreelist
); 
7119         o 
= listNodeValue(head
); 
7120         listDelNode(server
.objfreelist
,head
); 
7121         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
7125         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
7130 /* This function gets called when 'maxmemory' is set on the config file to limit 
7131  * the max memory used by the server, and we are out of memory. 
7132  * This function will try to, in order: 
7134  * - Free objects from the free list 
7135  * - Try to remove keys with an EXPIRE set 
7137  * It is not possible to free enough memory to reach used-memory < maxmemory 
7138  * the server will start refusing commands that will enlarge even more the 
7141 static void freeMemoryIfNeeded(void) { 
7142     while (server
.maxmemory 
&& zmalloc_used_memory() > server
.maxmemory
) { 
7143         int j
, k
, freed 
= 0; 
7145         if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue; 
7146         for (j 
= 0; j 
< server
.dbnum
; j
++) { 
7148             robj 
*minkey 
= NULL
; 
7149             struct dictEntry 
*de
; 
7151             if (dictSize(server
.db
[j
].expires
)) { 
7153                 /* From a sample of three keys drop the one nearest to 
7154                  * the natural expire */ 
7155                 for (k 
= 0; k 
< 3; k
++) { 
7158                     de 
= dictGetRandomKey(server
.db
[j
].expires
); 
7159                     t 
= (time_t) dictGetEntryVal(de
); 
7160                     if (minttl 
== -1 || t 
< minttl
) { 
7161                         minkey 
= dictGetEntryKey(de
); 
7165                 deleteKey(server
.db
+j
,minkey
); 
7168         if (!freed
) return; /* nothing to free... */ 
7172 /* ============================== Append Only file ========================== */ 
7174 static void feedAppendOnlyFile(struct redisCommand 
*cmd
, int dictid
, robj 
**argv
, int argc
) { 
7175     sds buf 
= sdsempty(); 
7181     /* The DB this command was targetting is not the same as the last command 
7182      * we appendend. To issue a SELECT command is needed. */ 
7183     if (dictid 
!= server
.appendseldb
) { 
7186         snprintf(seldb
,sizeof(seldb
),"%d",dictid
); 
7187         buf 
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n", 
7188             (unsigned long)strlen(seldb
),seldb
); 
7189         server
.appendseldb 
= dictid
; 
7192     /* "Fix" the argv vector if the command is EXPIRE. We want to translate 
7193      * EXPIREs into EXPIREATs calls */ 
7194     if (cmd
->proc 
== expireCommand
) { 
7197         tmpargv
[0] = createStringObject("EXPIREAT",8); 
7198         tmpargv
[1] = argv
[1]; 
7199         incrRefCount(argv
[1]); 
7200         when 
= time(NULL
)+strtol(argv
[2]->ptr
,NULL
,10); 
7201         tmpargv
[2] = createObject(REDIS_STRING
, 
7202             sdscatprintf(sdsempty(),"%ld",when
)); 
7206     /* Append the actual command */ 
7207     buf 
= sdscatprintf(buf
,"*%d\r\n",argc
); 
7208     for (j 
= 0; j 
< argc
; j
++) { 
7211         o 
= getDecodedObject(o
); 
7212         buf 
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
)); 
7213         buf 
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
)); 
7214         buf 
= sdscatlen(buf
,"\r\n",2); 
7218     /* Free the objects from the modified argv for EXPIREAT */ 
7219     if (cmd
->proc 
== expireCommand
) { 
7220         for (j 
= 0; j 
< 3; j
++) 
7221             decrRefCount(argv
[j
]); 
7224     /* We want to perform a single write. This should be guaranteed atomic 
7225      * at least if the filesystem we are writing is a real physical one. 
7226      * While this will save us against the server being killed I don't think 
7227      * there is much to do about the whole server stopping for power problems 
7229      nwritten 
= write(server
.appendfd
,buf
,sdslen(buf
)); 
7230      if (nwritten 
!= (signed)sdslen(buf
)) { 
7231         /* Ooops, we are in troubles. The best thing to do for now is 
7232          * to simply exit instead to give the illusion that everything is 
7233          * working as expected. */ 
7234          if (nwritten 
== -1) { 
7235             redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
)); 
7237             redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
)); 
7241     /* If a background append only file rewriting is in progress we want to 
7242      * accumulate the differences between the child DB and the current one 
7243      * in a buffer, so that when the child process will do its work we 
7244      * can append the differences to the new append only file. */ 
7245     if (server
.bgrewritechildpid 
!= -1) 
7246         server
.bgrewritebuf 
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
)); 
7250     if (server
.appendfsync 
== APPENDFSYNC_ALWAYS 
|| 
7251         (server
.appendfsync 
== APPENDFSYNC_EVERYSEC 
&& 
7252          now
-server
.lastfsync 
> 1)) 
7254         fsync(server
.appendfd
); /* Let's try to get this data on the disk */ 
7255         server
.lastfsync 
= now
; 
7259 /* In Redis commands are always executed in the context of a client, so in 
7260  * order to load the append only file we need to create a fake client. */ 
7261 static struct redisClient 
*createFakeClient(void) { 
7262     struct redisClient 
*c 
= zmalloc(sizeof(*c
)); 
7266     c
->querybuf 
= sdsempty(); 
7270     /* We set the fake client as a slave waiting for the synchronization 
7271      * so that Redis will not try to send replies to this client. */ 
7272     c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_START
; 
7273     c
->reply 
= listCreate(); 
7274     listSetFreeMethod(c
->reply
,decrRefCount
); 
7275     listSetDupMethod(c
->reply
,dupClientReplyValue
); 
7279 static void freeFakeClient(struct redisClient 
*c
) { 
7280     sdsfree(c
->querybuf
); 
7281     listRelease(c
->reply
); 
7285 /* Replay the append log file. On error REDIS_OK is returned. On non fatal 
7286  * error (the append only file is zero-length) REDIS_ERR is returned. On 
7287  * fatal error an error message is logged and the program exists. */ 
7288 int loadAppendOnlyFile(char *filename
) { 
7289     struct redisClient 
*fakeClient
; 
7290     FILE *fp 
= fopen(filename
,"r"); 
7291     struct redis_stat sb
; 
7292     unsigned long long loadedkeys 
= 0; 
7294     if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size 
== 0) 
7298         redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
)); 
7302     fakeClient 
= createFakeClient(); 
7309         struct redisCommand 
*cmd
; 
7311         if (fgets(buf
,sizeof(buf
),fp
) == NULL
) { 
7317         if (buf
[0] != '*') goto fmterr
; 
7319         argv 
= zmalloc(sizeof(robj
*)*argc
); 
7320         for (j 
= 0; j 
< argc
; j
++) { 
7321             if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
; 
7322             if (buf
[0] != '$') goto fmterr
; 
7323             len 
= strtol(buf
+1,NULL
,10); 
7324             argsds 
= sdsnewlen(NULL
,len
); 
7325             if (len 
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
; 
7326             argv
[j
] = createObject(REDIS_STRING
,argsds
); 
7327             if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */ 
7330         /* Command lookup */ 
7331         cmd 
= lookupCommand(argv
[0]->ptr
); 
7333             redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
); 
7336         /* Try object sharing and encoding */ 
7337         if (server
.shareobjects
) { 
7339             for(j 
= 1; j 
< argc
; j
++) 
7340                 argv
[j
] = tryObjectSharing(argv
[j
]); 
7342         if (cmd
->flags 
& REDIS_CMD_BULK
) 
7343             tryObjectEncoding(argv
[argc
-1]); 
7344         /* Run the command in the context of a fake client */ 
7345         fakeClient
->argc 
= argc
; 
7346         fakeClient
->argv 
= argv
; 
7347         cmd
->proc(fakeClient
); 
7348         /* Discard the reply objects list from the fake client */ 
7349         while(listLength(fakeClient
->reply
)) 
7350             listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
)); 
7351         /* Clean up, ready for the next command */ 
7352         for (j 
= 0; j 
< argc
; j
++) decrRefCount(argv
[j
]); 
7354         /* Handle swapping while loading big datasets when VM is on */ 
7356         if (server
.vm_enabled 
&& (loadedkeys 
% 5000) == 0) { 
7357             while (zmalloc_used_memory() > server
.vm_max_memory
) { 
7358                 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break; 
7363     freeFakeClient(fakeClient
); 
7368         redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file"); 
7370         redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
)); 
7374     redisLog(REDIS_WARNING
,"Bad file format reading the append only file"); 
7378 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */ 
7379 static int fwriteBulk(FILE *fp
, robj 
*obj
) { 
7383     /* Avoid the incr/decr ref count business if possible to help 
7384      * copy-on-write (we are often in a child process when this function 
7386      * Also makes sure that key objects don't get incrRefCount-ed when VM 
7388     if (obj
->encoding 
!= REDIS_ENCODING_RAW
) { 
7389         obj 
= getDecodedObject(obj
); 
7392     snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
)); 
7393     if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
; 
7394     if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0) 
7396     if (fwrite("\r\n",2,1,fp
) == 0) goto err
; 
7397     if (decrrc
) decrRefCount(obj
); 
7400     if (decrrc
) decrRefCount(obj
); 
7404 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */ 
7405 static int fwriteBulkDouble(FILE *fp
, double d
) { 
7406     char buf
[128], dbuf
[128]; 
7408     snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
); 
7409     snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2); 
7410     if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0; 
7411     if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0; 
7415 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */ 
7416 static int fwriteBulkLong(FILE *fp
, long l
) { 
7417     char buf
[128], lbuf
[128]; 
7419     snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
); 
7420     snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2); 
7421     if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0; 
7422     if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0; 
7426 /* Write a sequence of commands able to fully rebuild the dataset into 
7427  * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */ 
7428 static int rewriteAppendOnlyFile(char *filename
) { 
7429     dictIterator 
*di 
= NULL
; 
7434     time_t now 
= time(NULL
); 
7436     /* Note that we have to use a different temp name here compared to the 
7437      * one used by rewriteAppendOnlyFileBackground() function. */ 
7438     snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid()); 
7439     fp 
= fopen(tmpfile
,"w"); 
7441         redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
)); 
7444     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
7445         char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n"; 
7446         redisDb 
*db 
= server
.db
+j
; 
7448         if (dictSize(d
) == 0) continue; 
7449         di 
= dictGetIterator(d
); 
7455         /* SELECT the new DB */ 
7456         if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
; 
7457         if (fwriteBulkLong(fp
,j
) == 0) goto werr
; 
7459         /* Iterate this DB writing every entry */ 
7460         while((de 
= dictNext(di
)) != NULL
) { 
7465             key 
= dictGetEntryKey(de
); 
7466             /* If the value for this key is swapped, load a preview in memory. 
7467              * We use a "swapped" flag to remember if we need to free the 
7468              * value object instead to just increment the ref count anyway 
7469              * in order to avoid copy-on-write of pages if we are forked() */ 
7470             if (!server
.vm_enabled 
|| key
->storage 
== REDIS_VM_MEMORY 
|| 
7471                 key
->storage 
== REDIS_VM_SWAPPING
) { 
7472                 o 
= dictGetEntryVal(de
); 
7475                 o 
= vmPreviewObject(key
); 
7478             expiretime 
= getExpire(db
,key
); 
7480             /* Save the key and associated value */ 
7481             if (o
->type 
== REDIS_STRING
) { 
7482                 /* Emit a SET command */ 
7483                 char cmd
[]="*3\r\n$3\r\nSET\r\n"; 
7484                 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
7486                 if (fwriteBulk(fp
,key
) == 0) goto werr
; 
7487                 if (fwriteBulk(fp
,o
) == 0) goto werr
; 
7488             } else if (o
->type 
== REDIS_LIST
) { 
7489                 /* Emit the RPUSHes needed to rebuild the list */ 
7490                 list 
*list 
= o
->ptr
; 
7494                 listRewind(list
,&li
); 
7495                 while((ln 
= listNext(&li
))) { 
7496                     char cmd
[]="*3\r\n$5\r\nRPUSH\r\n"; 
7497                     robj 
*eleobj 
= listNodeValue(ln
); 
7499                     if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
7500                     if (fwriteBulk(fp
,key
) == 0) goto werr
; 
7501                     if (fwriteBulk(fp
,eleobj
) == 0) goto werr
; 
7503             } else if (o
->type 
== REDIS_SET
) { 
7504                 /* Emit the SADDs needed to rebuild the set */ 
7506                 dictIterator 
*di 
= dictGetIterator(set
); 
7509                 while((de 
= dictNext(di
)) != NULL
) { 
7510                     char cmd
[]="*3\r\n$4\r\nSADD\r\n"; 
7511                     robj 
*eleobj 
= dictGetEntryKey(de
); 
7513                     if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
7514                     if (fwriteBulk(fp
,key
) == 0) goto werr
; 
7515                     if (fwriteBulk(fp
,eleobj
) == 0) goto werr
; 
7517                 dictReleaseIterator(di
); 
7518             } else if (o
->type 
== REDIS_ZSET
) { 
7519                 /* Emit the ZADDs needed to rebuild the sorted set */ 
7521                 dictIterator 
*di 
= dictGetIterator(zs
->dict
); 
7524                 while((de 
= dictNext(di
)) != NULL
) { 
7525                     char cmd
[]="*4\r\n$4\r\nZADD\r\n"; 
7526                     robj 
*eleobj 
= dictGetEntryKey(de
); 
7527                     double *score 
= dictGetEntryVal(de
); 
7529                     if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
7530                     if (fwriteBulk(fp
,key
) == 0) goto werr
; 
7531                     if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
; 
7532                     if (fwriteBulk(fp
,eleobj
) == 0) goto werr
; 
7534                 dictReleaseIterator(di
); 
7536                 redisAssert(0 != 0); 
7538             /* Save the expire time */ 
7539             if (expiretime 
!= -1) { 
7540                 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n"; 
7541                 /* If this key is already expired skip it */ 
7542                 if (expiretime 
< now
) continue; 
7543                 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
7544                 if (fwriteBulk(fp
,key
) == 0) goto werr
; 
7545                 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
; 
7547             if (swapped
) decrRefCount(o
); 
7549         dictReleaseIterator(di
); 
7552     /* Make sure data will not remain on the OS's output buffers */ 
7557     /* Use RENAME to make sure the DB file is changed atomically only 
7558      * if the generate DB file is ok. */ 
7559     if (rename(tmpfile
,filename
) == -1) { 
7560         redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
)); 
7564     redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed"); 
7570     redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
)); 
7571     if (di
) dictReleaseIterator(di
); 
7575 /* This is how rewriting of the append only file in background works: 
7577  * 1) The user calls BGREWRITEAOF 
7578  * 2) Redis calls this function, that forks(): 
7579  *    2a) the child rewrite the append only file in a temp file. 
7580  *    2b) the parent accumulates differences in server.bgrewritebuf. 
7581  * 3) When the child finished '2a' exists. 
7582  * 4) The parent will trap the exit code, if it's OK, will append the 
7583  *    data accumulated into server.bgrewritebuf into the temp file, and 
7584  *    finally will rename(2) the temp file in the actual file name. 
7585  *    The the new file is reopened as the new append only file. Profit! 
7587 static int rewriteAppendOnlyFileBackground(void) { 
7590     if (server
.bgrewritechildpid 
!= -1) return REDIS_ERR
; 
7591     if (server
.vm_enabled
) waitEmptyIOJobsQueue(); 
7592     if ((childpid 
= fork()) == 0) { 
7596         if (server
.vm_enabled
) vmReopenSwapFile(); 
7598         snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid()); 
7599         if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) { 
7606         if (childpid 
== -1) { 
7607             redisLog(REDIS_WARNING
, 
7608                 "Can't rewrite append only file in background: fork: %s", 
7612         redisLog(REDIS_NOTICE
, 
7613             "Background append only file rewriting started by pid %d",childpid
); 
7614         server
.bgrewritechildpid 
= childpid
; 
7615         /* We set appendseldb to -1 in order to force the next call to the 
7616          * feedAppendOnlyFile() to issue a SELECT command, so the differences 
7617          * accumulated by the parent into server.bgrewritebuf will start 
7618          * with a SELECT statement and it will be safe to merge. */ 
7619         server
.appendseldb 
= -1; 
7622     return REDIS_OK
; /* unreached */ 
7625 static void bgrewriteaofCommand(redisClient 
*c
) { 
7626     if (server
.bgrewritechildpid 
!= -1) { 
7627         addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n")); 
7630     if (rewriteAppendOnlyFileBackground() == REDIS_OK
) { 
7631         char *status 
= "+Background append only file rewriting started\r\n"; 
7632         addReplySds(c
,sdsnew(status
)); 
7634         addReply(c
,shared
.err
); 
7638 static void aofRemoveTempFile(pid_t childpid
) { 
7641     snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
); 
7645 /* Virtual Memory is composed mainly of two subsystems: 
7646  * - Blocking Virutal Memory 
7647  * - Threaded Virtual Memory I/O 
7648  * The two parts are not fully decoupled, but functions are split among two 
7649  * different sections of the source code (delimited by comments) in order to 
7650  * make more clear what functionality is about the blocking VM and what about 
7651  * the threaded (not blocking) VM. 
7655  * Redis VM is a blocking VM (one that blocks reading swapped values from 
7656  * disk into memory when a value swapped out is needed in memory) that is made 
7657  * unblocking by trying to examine the command argument vector in order to 
7658  * load in background values that will likely be needed in order to exec 
7659  * the command. The command is executed only once all the relevant keys 
7660  * are loaded into memory. 
7662  * This basically is almost as simple of a blocking VM, but almost as parallel 
7663  * as a fully non-blocking VM. 
7666 /* =================== Virtual Memory - Blocking Side  ====================== */ 
7668 /* substitute the first occurrence of '%p' with the process pid in the 
7669  * swap file name. */ 
7670 static void expandVmSwapFilename(void) { 
7671     char *p 
= strstr(server
.vm_swap_file
,"%p"); 
7677     new = sdscat(new,server
.vm_swap_file
); 
7678     new = sdscatprintf(new,"%ld",(long) getpid()); 
7679     new = sdscat(new,p
+2); 
7680     zfree(server
.vm_swap_file
); 
7681     server
.vm_swap_file 
= new; 
7684 static void vmInit(void) { 
7689     if (server
.vm_max_threads 
!= 0) 
7690         zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */ 
7692     expandVmSwapFilename(); 
7693     redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
); 
7694     if ((server
.vm_fp 
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) { 
7695         server
.vm_fp 
= fopen(server
.vm_swap_file
,"w+b"); 
7697     if (server
.vm_fp 
== NULL
) { 
7698         redisLog(REDIS_WARNING
, 
7699             "Impossible to open the swap file: %s. Exiting.", 
7703     server
.vm_fd 
= fileno(server
.vm_fp
); 
7704     server
.vm_next_page 
= 0; 
7705     server
.vm_near_pages 
= 0; 
7706     server
.vm_stats_used_pages 
= 0; 
7707     server
.vm_stats_swapped_objects 
= 0; 
7708     server
.vm_stats_swapouts 
= 0; 
7709     server
.vm_stats_swapins 
= 0; 
7710     totsize 
= server
.vm_pages
*server
.vm_page_size
; 
7711     redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
); 
7712     if (ftruncate(server
.vm_fd
,totsize
) == -1) { 
7713         redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.", 
7717         redisLog(REDIS_NOTICE
,"Swap file allocated with success"); 
7719     server
.vm_bitmap 
= zmalloc((server
.vm_pages
+7)/8); 
7720     redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages", 
7721         (long long) (server
.vm_pages
+7)/8, server
.vm_pages
); 
7722     memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8); 
7724     /* Initialize threaded I/O (used by Virtual Memory) */ 
7725     server
.io_newjobs 
= listCreate(); 
7726     server
.io_processing 
= listCreate(); 
7727     server
.io_processed 
= listCreate(); 
7728     server
.io_ready_clients 
= listCreate(); 
7729     pthread_mutex_init(&server
.io_mutex
,NULL
); 
7730     pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
); 
7731     pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
); 
7732     server
.io_active_threads 
= 0; 
7733     if (pipe(pipefds
) == -1) { 
7734         redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting." 
7738     server
.io_ready_pipe_read 
= pipefds
[0]; 
7739     server
.io_ready_pipe_write 
= pipefds
[1]; 
7740     redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
); 
7741     /* LZF requires a lot of stack */ 
7742     pthread_attr_init(&server
.io_threads_attr
); 
7743     pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
); 
7744     while (stacksize 
< REDIS_THREAD_STACK_SIZE
) stacksize 
*= 2; 
7745     pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
); 
7746     /* Listen for events in the threaded I/O pipe */ 
7747     if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
, 
7748         vmThreadedIOCompletedJob
, NULL
) == AE_ERR
) 
7749         oom("creating file event"); 
7752 /* Mark the page as used */ 
7753 static void vmMarkPageUsed(off_t page
) { 
7754     off_t byte 
= page
/8; 
7756     redisAssert(vmFreePage(page
) == 1); 
7757     server
.vm_bitmap
[byte
] |= 1<<bit
; 
7760 /* Mark N contiguous pages as used, with 'page' being the first. */ 
7761 static void vmMarkPagesUsed(off_t page
, off_t count
) { 
7764     for (j 
= 0; j 
< count
; j
++) 
7765         vmMarkPageUsed(page
+j
); 
7766     server
.vm_stats_used_pages 
+= count
; 
7767     redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n", 
7768         (long long)count
, (long long)page
); 
7771 /* Mark the page as free */ 
7772 static void vmMarkPageFree(off_t page
) { 
7773     off_t byte 
= page
/8; 
7775     redisAssert(vmFreePage(page
) == 0); 
7776     server
.vm_bitmap
[byte
] &= ~(1<<bit
); 
7779 /* Mark N contiguous pages as free, with 'page' being the first. */ 
7780 static void vmMarkPagesFree(off_t page
, off_t count
) { 
7783     for (j 
= 0; j 
< count
; j
++) 
7784         vmMarkPageFree(page
+j
); 
7785     server
.vm_stats_used_pages 
-= count
; 
7786     redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n", 
7787         (long long)count
, (long long)page
); 
7790 /* Test if the page is free */ 
7791 static int vmFreePage(off_t page
) { 
7792     off_t byte 
= page
/8; 
7794     return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0; 
7797 /* Find N contiguous free pages storing the first page of the cluster in *first. 
7798  * Returns REDIS_OK if it was able to find N contiguous pages, otherwise  
7799  * REDIS_ERR is returned. 
7801  * This function uses a simple algorithm: we try to allocate 
7802  * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start 
7803  * again from the start of the swap file searching for free spaces. 
7805  * If it looks pretty clear that there are no free pages near our offset 
7806  * we try to find less populated places doing a forward jump of 
7807  * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages 
7808  * without hurry, and then we jump again and so forth... 
7810  * This function can be improved using a free list to avoid to guess 
7811  * too much, since we could collect data about freed pages. 
7813  * note: I implemented this function just after watching an episode of 
7814  * Battlestar Galactica, where the hybrid was continuing to say "JUMP!" 
7816 static int vmFindContiguousPages(off_t 
*first
, off_t n
) { 
7817     off_t base
, offset 
= 0, since_jump 
= 0, numfree 
= 0; 
7819     if (server
.vm_near_pages 
== REDIS_VM_MAX_NEAR_PAGES
) { 
7820         server
.vm_near_pages 
= 0; 
7821         server
.vm_next_page 
= 0; 
7823     server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */ 
7824     base 
= server
.vm_next_page
; 
7826     while(offset 
< server
.vm_pages
) { 
7827         off_t 
this = base
+offset
; 
7829         /* If we overflow, restart from page zero */ 
7830         if (this >= server
.vm_pages
) { 
7831             this -= server
.vm_pages
; 
7833                 /* Just overflowed, what we found on tail is no longer 
7834                  * interesting, as it's no longer contiguous. */ 
7838         if (vmFreePage(this)) { 
7839             /* This is a free page */ 
7841             /* Already got N free pages? Return to the caller, with success */ 
7843                 *first 
= this-(n
-1); 
7844                 server
.vm_next_page 
= this+1; 
7845                 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
); 
7849             /* The current one is not a free page */ 
7853         /* Fast-forward if the current page is not free and we already 
7854          * searched enough near this place. */ 
7856         if (!numfree 
&& since_jump 
>= REDIS_VM_MAX_RANDOM_JUMP
/4) { 
7857             offset 
+= random() % REDIS_VM_MAX_RANDOM_JUMP
; 
7859             /* Note that even if we rewind after the jump, we are don't need 
7860              * to make sure numfree is set to zero as we only jump *if* it 
7861              * is set to zero. */ 
7863             /* Otherwise just check the next page */ 
7870 /* Write the specified object at the specified page of the swap file */ 
7871 static int vmWriteObjectOnSwap(robj 
*o
, off_t page
) { 
7872     if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
); 
7873     if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) { 
7874         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
); 
7875         redisLog(REDIS_WARNING
, 
7876             "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s", 
7880     rdbSaveObject(server
.vm_fp
,o
); 
7881     fflush(server
.vm_fp
); 
7882     if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
); 
7886 /* Swap the 'val' object relative to 'key' into disk. Store all the information 
7887  * needed to later retrieve the object into the key object. 
7888  * If we can't find enough contiguous empty pages to swap the object on disk 
7889  * REDIS_ERR is returned. */ 
7890 static int vmSwapObjectBlocking(robj 
*key
, robj 
*val
) { 
7891     off_t pages 
= rdbSavedObjectPages(val
,NULL
); 
7894     assert(key
->storage 
== REDIS_VM_MEMORY
); 
7895     assert(key
->refcount 
== 1); 
7896     if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
; 
7897     if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
; 
7898     key
->vm
.page 
= page
; 
7899     key
->vm
.usedpages 
= pages
; 
7900     key
->storage 
= REDIS_VM_SWAPPED
; 
7901     key
->vtype 
= val
->type
; 
7902     decrRefCount(val
); /* Deallocate the object from memory. */ 
7903     vmMarkPagesUsed(page
,pages
); 
7904     redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)", 
7905         (unsigned char*) key
->ptr
, 
7906         (unsigned long long) page
, (unsigned long long) pages
); 
7907     server
.vm_stats_swapped_objects
++; 
7908     server
.vm_stats_swapouts
++; 
7912 static robj 
*vmReadObjectFromSwap(off_t page
, int type
) { 
7915     if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
); 
7916     if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) { 
7917         redisLog(REDIS_WARNING
, 
7918             "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s", 
7922     o 
= rdbLoadObject(type
,server
.vm_fp
); 
7924         redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
)); 
7927     if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
); 
7931 /* Load the value object relative to the 'key' object from swap to memory. 
7932  * The newly allocated object is returned. 
7934  * If preview is true the unserialized object is returned to the caller but 
7935  * no changes are made to the key object, nor the pages are marked as freed */ 
7936 static robj 
*vmGenericLoadObject(robj 
*key
, int preview
) { 
7939     redisAssert(key
->storage 
== REDIS_VM_SWAPPED 
|| key
->storage 
== REDIS_VM_LOADING
); 
7940     val 
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
); 
7942         key
->storage 
= REDIS_VM_MEMORY
; 
7943         key
->vm
.atime 
= server
.unixtime
; 
7944         vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
); 
7945         redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk", 
7946             (unsigned char*) key
->ptr
); 
7947         server
.vm_stats_swapped_objects
--; 
7949         redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk", 
7950             (unsigned char*) key
->ptr
); 
7952     server
.vm_stats_swapins
++; 
7956 /* Plain object loading, from swap to memory */ 
7957 static robj 
*vmLoadObject(robj 
*key
) { 
7958     /* If we are loading the object in background, stop it, we 
7959      * need to load this object synchronously ASAP. */ 
7960     if (key
->storage 
== REDIS_VM_LOADING
) 
7961         vmCancelThreadedIOJob(key
); 
7962     return vmGenericLoadObject(key
,0); 
7965 /* Just load the value on disk, without to modify the key. 
7966  * This is useful when we want to perform some operation on the value 
7967  * without to really bring it from swap to memory, like while saving the 
7968  * dataset or rewriting the append only log. */ 
7969 static robj 
*vmPreviewObject(robj 
*key
) { 
7970     return vmGenericLoadObject(key
,1); 
7973 /* How a good candidate is this object for swapping? 
7974  * The better candidate it is, the greater the returned value. 
7976  * Currently we try to perform a fast estimation of the object size in 
7977  * memory, and combine it with aging informations. 
7979  * Basically swappability = idle-time * log(estimated size) 
7981  * Bigger objects are preferred over smaller objects, but not 
7982  * proportionally, this is why we use the logarithm. This algorithm is 
7983  * just a first try and will probably be tuned later. */ 
7984 static double computeObjectSwappability(robj 
*o
) { 
7985     time_t age 
= server
.unixtime 
- o
->vm
.atime
; 
7989     struct dictEntry 
*de
; 
7992     if (age 
<= 0) return 0; 
7995         if (o
->encoding 
!= REDIS_ENCODING_RAW
) { 
7998             asize 
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2; 
8003         listNode 
*ln 
= listFirst(l
); 
8005         asize 
= sizeof(list
); 
8007             robj 
*ele 
= ln
->value
; 
8010             elesize 
= (ele
->encoding 
== REDIS_ENCODING_RAW
) ? 
8011                             (sizeof(*o
)+sdslen(ele
->ptr
)) : 
8013             asize 
+= (sizeof(listNode
)+elesize
)*listLength(l
); 
8018         z 
= (o
->type 
== REDIS_ZSET
); 
8019         d 
= z 
? ((zset
*)o
->ptr
)->dict 
: o
->ptr
; 
8021         asize 
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
)); 
8022         if (z
) asize 
+= sizeof(zset
)-sizeof(dict
); 
8027             de 
= dictGetRandomKey(d
); 
8028             ele 
= dictGetEntryKey(de
); 
8029             elesize 
= (ele
->encoding 
== REDIS_ENCODING_RAW
) ? 
8030                             (sizeof(*o
)+sdslen(ele
->ptr
)) : 
8032             asize 
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
); 
8033             if (z
) asize 
+= sizeof(zskiplistNode
)*dictSize(d
); 
8037     return (double)age
*log(1+asize
); 
8040 /* Try to swap an object that's a good candidate for swapping. 
8041  * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible 
8042  * to swap any object at all. 
8044  * If 'usethreaded' is true, Redis will try to swap the object in background 
8045  * using I/O threads. */ 
8046 static int vmSwapOneObject(int usethreads
) { 
8048     struct dictEntry 
*best 
= NULL
; 
8049     double best_swappability 
= 0; 
8050     redisDb 
*best_db 
= NULL
; 
8053     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
8054         redisDb 
*db 
= server
.db
+j
; 
8055         /* Why maxtries is set to 100? 
8056          * Because this way (usually) we'll find 1 object even if just 1% - 2% 
8057          * are swappable objects */ 
8060         if (dictSize(db
->dict
) == 0) continue; 
8061         for (i 
= 0; i 
< 5; i
++) { 
8063             double swappability
; 
8065             if (maxtries
) maxtries
--; 
8066             de 
= dictGetRandomKey(db
->dict
); 
8067             key 
= dictGetEntryKey(de
); 
8068             val 
= dictGetEntryVal(de
); 
8069             /* Only swap objects that are currently in memory. 
8071              * Also don't swap shared objects if threaded VM is on, as we 
8072              * try to ensure that the main thread does not touch the 
8073              * object while the I/O thread is using it, but we can't 
8074              * control other keys without adding additional mutex. */ 
8075             if (key
->storage 
!= REDIS_VM_MEMORY 
|| 
8076                 (server
.vm_max_threads 
!= 0 && val
->refcount 
!= 1)) { 
8077                 if (maxtries
) i
--; /* don't count this try */ 
8080             swappability 
= computeObjectSwappability(val
); 
8081             if (!best 
|| swappability 
> best_swappability
) { 
8083                 best_swappability 
= swappability
; 
8088     if (best 
== NULL
) return REDIS_ERR
; 
8089     key 
= dictGetEntryKey(best
); 
8090     val 
= dictGetEntryVal(best
); 
8092     redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f", 
8093         key
->ptr
, best_swappability
); 
8095     /* Unshare the key if needed */ 
8096     if (key
->refcount 
> 1) { 
8097         robj 
*newkey 
= dupStringObject(key
); 
8099         key 
= dictGetEntryKey(best
) = newkey
; 
8103         vmSwapObjectThreaded(key
,val
,best_db
); 
8106         if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) { 
8107             dictGetEntryVal(best
) = NULL
; 
8115 static int vmSwapOneObjectBlocking() { 
8116     return vmSwapOneObject(0); 
8119 static int vmSwapOneObjectThreaded() { 
8120     return vmSwapOneObject(1); 
8123 /* Return true if it's safe to swap out objects in a given moment. 
8124  * Basically we don't want to swap objects out while there is a BGSAVE 
8125  * or a BGAEOREWRITE running in backgroud. */ 
8126 static int vmCanSwapOut(void) { 
8127     return (server
.bgsavechildpid 
== -1 && server
.bgrewritechildpid 
== -1); 
8130 /* Delete a key if swapped. Returns 1 if the key was found, was swapped 
8131  * and was deleted. Otherwise 0 is returned. */ 
8132 static int deleteIfSwapped(redisDb 
*db
, robj 
*key
) { 
8136     if ((de 
= dictFind(db
->dict
,key
)) == NULL
) return 0; 
8137     foundkey 
= dictGetEntryKey(de
); 
8138     if (foundkey
->storage 
== REDIS_VM_MEMORY
) return 0; 
8143 /* =================== Virtual Memory - Threaded I/O  ======================= */ 
8145 static void freeIOJob(iojob 
*j
) { 
8146     if ((j
->type 
== REDIS_IOJOB_PREPARE_SWAP 
|| 
8147         j
->type 
== REDIS_IOJOB_DO_SWAP 
|| 
8148         j
->type 
== REDIS_IOJOB_LOAD
) && j
->val 
!= NULL
) 
8149         decrRefCount(j
->val
); 
8150     decrRefCount(j
->key
); 
8154 /* Every time a thread finished a Job, it writes a byte into the write side 
8155  * of an unix pipe in order to "awake" the main thread, and this function 
8157 static void vmThreadedIOCompletedJob(aeEventLoop 
*el
, int fd
, void *privdata
, 
8161     int retval
, processed 
= 0, toprocess 
= -1, trytoswap 
= 1; 
8163     REDIS_NOTUSED(mask
); 
8164     REDIS_NOTUSED(privdata
); 
8166     /* For every byte we read in the read side of the pipe, there is one 
8167      * I/O job completed to process. */ 
8168     while((retval 
= read(fd
,buf
,1)) == 1) { 
8172         struct dictEntry 
*de
; 
8174         redisLog(REDIS_DEBUG
,"Processing I/O completed job"); 
8176         /* Get the processed element (the oldest one) */ 
8178         assert(listLength(server
.io_processed
) != 0); 
8179         if (toprocess 
== -1) { 
8180             toprocess 
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100; 
8181             if (toprocess 
<= 0) toprocess 
= 1; 
8183         ln 
= listFirst(server
.io_processed
); 
8185         listDelNode(server
.io_processed
,ln
); 
8187         /* If this job is marked as canceled, just ignore it */ 
8192         /* Post process it in the main thread, as there are things we 
8193          * can do just here to avoid race conditions and/or invasive locks */ 
8194         redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
); 
8195         de 
= dictFind(j
->db
->dict
,j
->key
); 
8197         key 
= dictGetEntryKey(de
); 
8198         if (j
->type 
== REDIS_IOJOB_LOAD
) { 
8201             /* Key loaded, bring it at home */ 
8202             key
->storage 
= REDIS_VM_MEMORY
; 
8203             key
->vm
.atime 
= server
.unixtime
; 
8204             vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
); 
8205             redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)", 
8206                 (unsigned char*) key
->ptr
); 
8207             server
.vm_stats_swapped_objects
--; 
8208             server
.vm_stats_swapins
++; 
8209             dictGetEntryVal(de
) = j
->val
; 
8210             incrRefCount(j
->val
); 
8213             /* Handle clients waiting for this key to be loaded. */ 
8214             handleClientsBlockedOnSwappedKey(db
,key
); 
8215         } else if (j
->type 
== REDIS_IOJOB_PREPARE_SWAP
) { 
8216             /* Now we know the amount of pages required to swap this object. 
8217              * Let's find some space for it, and queue this task again 
8218              * rebranded as REDIS_IOJOB_DO_SWAP. */ 
8219             if (!vmCanSwapOut() || 
8220                 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
) 
8222                 /* Ooops... no space or we can't swap as there is 
8223                  * a fork()ed Redis trying to save stuff on disk. */ 
8225                 key
->storage 
= REDIS_VM_MEMORY
; /* undo operation */ 
8227                 /* Note that we need to mark this pages as used now, 
8228                  * if the job will be canceled, we'll mark them as freed 
8230                 vmMarkPagesUsed(j
->page
,j
->pages
); 
8231                 j
->type 
= REDIS_IOJOB_DO_SWAP
; 
8236         } else if (j
->type 
== REDIS_IOJOB_DO_SWAP
) { 
8239             /* Key swapped. We can finally free some memory. */ 
8240             if (key
->storage 
!= REDIS_VM_SWAPPING
) { 
8241                 printf("key->storage: %d\n",key
->storage
); 
8242                 printf("key->name: %s\n",(char*)key
->ptr
); 
8243                 printf("key->refcount: %d\n",key
->refcount
); 
8244                 printf("val: %p\n",(void*)j
->val
); 
8245                 printf("val->type: %d\n",j
->val
->type
); 
8246                 printf("val->ptr: %s\n",(char*)j
->val
->ptr
); 
8248             redisAssert(key
->storage 
== REDIS_VM_SWAPPING
); 
8249             val 
= dictGetEntryVal(de
); 
8250             key
->vm
.page 
= j
->page
; 
8251             key
->vm
.usedpages 
= j
->pages
; 
8252             key
->storage 
= REDIS_VM_SWAPPED
; 
8253             key
->vtype 
= j
->val
->type
; 
8254             decrRefCount(val
); /* Deallocate the object from memory. */ 
8255             dictGetEntryVal(de
) = NULL
; 
8256             redisLog(REDIS_DEBUG
, 
8257                 "VM: object %s swapped out at %lld (%lld pages) (threaded)", 
8258                 (unsigned char*) key
->ptr
, 
8259                 (unsigned long long) j
->page
, (unsigned long long) j
->pages
); 
8260             server
.vm_stats_swapped_objects
++; 
8261             server
.vm_stats_swapouts
++; 
8263             /* Put a few more swap requests in queue if we are still 
8265             if (trytoswap 
&& vmCanSwapOut() && 
8266                 zmalloc_used_memory() > server
.vm_max_memory
) 
8271                     more 
= listLength(server
.io_newjobs
) < 
8272                             (unsigned) server
.vm_max_threads
; 
8274                     /* Don't waste CPU time if swappable objects are rare. */ 
8275                     if (vmSwapOneObjectThreaded() == REDIS_ERR
) { 
8283         if (processed 
== toprocess
) return; 
8285     if (retval 
< 0 && errno 
!= EAGAIN
) { 
8286         redisLog(REDIS_WARNING
, 
8287             "WARNING: read(2) error in vmThreadedIOCompletedJob() %s", 
8292 static void lockThreadedIO(void) { 
8293     pthread_mutex_lock(&server
.io_mutex
); 
8296 static void unlockThreadedIO(void) { 
8297     pthread_mutex_unlock(&server
.io_mutex
); 
8300 /* Remove the specified object from the threaded I/O queue if still not 
8301  * processed, otherwise make sure to flag it as canceled. */ 
8302 static void vmCancelThreadedIOJob(robj 
*o
) { 
8304         server
.io_newjobs
,      /* 0 */ 
8305         server
.io_processing
,   /* 1 */ 
8306         server
.io_processed     
/* 2 */ 
8310     assert(o
->storage 
== REDIS_VM_LOADING 
|| o
->storage 
== REDIS_VM_SWAPPING
); 
8313     /* Search for a matching key in one of the queues */ 
8314     for (i 
= 0; i 
< 3; i
++) { 
8318         listRewind(lists
[i
],&li
); 
8319         while ((ln 
= listNext(&li
)) != NULL
) { 
8320             iojob 
*job 
= ln
->value
; 
8322             if (job
->canceled
) continue; /* Skip this, already canceled. */ 
8323             if (compareStringObjects(job
->key
,o
) == 0) { 
8324                 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n", 
8325                     (void*)job
, (char*)o
->ptr
, job
->type
, i
); 
8326                 /* Mark the pages as free since the swap didn't happened 
8327                  * or happened but is now discarded. */ 
8328                 if (i 
!= 1 && job
->type 
== REDIS_IOJOB_DO_SWAP
) 
8329                     vmMarkPagesFree(job
->page
,job
->pages
); 
8330                 /* Cancel the job. It depends on the list the job is 
8333                 case 0: /* io_newjobs */ 
8334                     /* If the job was yet not processed the best thing to do 
8335                      * is to remove it from the queue at all */ 
8337                     listDelNode(lists
[i
],ln
); 
8339                 case 1: /* io_processing */ 
8340                     /* Oh Shi- the thread is messing with the Job: 
8342                      * Probably it's accessing the object if this is a 
8343                      * PREPARE_SWAP or DO_SWAP job. 
8344                      * If it's a LOAD job it may be reading from disk and 
8345                      * if we don't wait for the job to terminate before to 
8346                      * cancel it, maybe in a few microseconds data can be 
8347                      * corrupted in this pages. So the short story is: 
8349                      * Better to wait for the job to move into the 
8350                      * next queue (processed)... */ 
8352                     /* We try again and again until the job is completed. */ 
8354                     /* But let's wait some time for the I/O thread 
8355                      * to finish with this job. After all this condition 
8356                      * should be very rare. */ 
8359                 case 2: /* io_processed */ 
8360                     /* The job was already processed, that's easy... 
8361                      * just mark it as canceled so that we'll ignore it 
8362                      * when processing completed jobs. */ 
8366                 /* Finally we have to adjust the storage type of the object 
8367                  * in order to "UNDO" the operaiton. */ 
8368                 if (o
->storage 
== REDIS_VM_LOADING
) 
8369                     o
->storage 
= REDIS_VM_SWAPPED
; 
8370                 else if (o
->storage 
== REDIS_VM_SWAPPING
) 
8371                     o
->storage 
= REDIS_VM_MEMORY
; 
8378     assert(1 != 1); /* We should never reach this */ 
8381 static void *IOThreadEntryPoint(void *arg
) { 
8386     pthread_detach(pthread_self()); 
8388         /* Get a new job to process */ 
8390         if (listLength(server
.io_newjobs
) == 0) { 
8391             /* No new jobs in queue, exit. */ 
8392             redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do", 
8393                 (long) pthread_self()); 
8394             server
.io_active_threads
--; 
8398         ln 
= listFirst(server
.io_newjobs
); 
8400         listDelNode(server
.io_newjobs
,ln
); 
8401         /* Add the job in the processing queue */ 
8402         j
->thread 
= pthread_self(); 
8403         listAddNodeTail(server
.io_processing
,j
); 
8404         ln 
= listLast(server
.io_processing
); /* We use ln later to remove it */ 
8406         redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'", 
8407             (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
); 
8409         /* Process the Job */ 
8410         if (j
->type 
== REDIS_IOJOB_LOAD
) { 
8411             j
->val 
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
); 
8412         } else if (j
->type 
== REDIS_IOJOB_PREPARE_SWAP
) { 
8413             FILE *fp 
= fopen("/dev/null","w+"); 
8414             j
->pages 
= rdbSavedObjectPages(j
->val
,fp
); 
8416         } else if (j
->type 
== REDIS_IOJOB_DO_SWAP
) { 
8417             if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
) 
8421         /* Done: insert the job into the processed queue */ 
8422         redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)", 
8423             (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
); 
8425         listDelNode(server
.io_processing
,ln
); 
8426         listAddNodeTail(server
.io_processed
,j
); 
8429         /* Signal the main thread there is new stuff to process */ 
8430         assert(write(server
.io_ready_pipe_write
,"x",1) == 1); 
8432     return NULL
; /* never reached */ 
8435 static void spawnIOThread(void) { 
8437     sigset_t mask
, omask
; 
8440     sigaddset(&mask
,SIGCHLD
); 
8441     sigaddset(&mask
,SIGHUP
); 
8442     sigaddset(&mask
,SIGPIPE
); 
8443     pthread_sigmask(SIG_SETMASK
, &mask
, &omask
); 
8444     pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
); 
8445     pthread_sigmask(SIG_SETMASK
, &omask
, NULL
); 
8446     server
.io_active_threads
++; 
8449 /* We need to wait for the last thread to exit before we are able to 
8450  * fork() in order to BGSAVE or BGREWRITEAOF. */ 
8451 static void waitEmptyIOJobsQueue(void) { 
8453         int io_processed_len
; 
8456         if (listLength(server
.io_newjobs
) == 0 && 
8457             listLength(server
.io_processing
) == 0 && 
8458             server
.io_active_threads 
== 0) 
8463         /* While waiting for empty jobs queue condition we post-process some 
8464          * finshed job, as I/O threads may be hanging trying to write against 
8465          * the io_ready_pipe_write FD but there are so much pending jobs that 
8467         io_processed_len 
= listLength(server
.io_processed
); 
8469         if (io_processed_len
) { 
8470             vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0); 
8471             usleep(1000); /* 1 millisecond */ 
8473             usleep(10000); /* 10 milliseconds */ 
8478 static void vmReopenSwapFile(void) { 
8479     /* Note: we don't close the old one as we are in the child process 
8480      * and don't want to mess at all with the original file object. */ 
8481     server
.vm_fp 
= fopen(server
.vm_swap_file
,"r+b"); 
8482     if (server
.vm_fp 
== NULL
) { 
8483         redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.", 
8484             server
.vm_swap_file
); 
8487     server
.vm_fd 
= fileno(server
.vm_fp
); 
8490 /* This function must be called while with threaded IO locked */ 
8491 static void queueIOJob(iojob 
*j
) { 
8492     redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n", 
8493         (void*)j
, j
->type
, (char*)j
->key
->ptr
); 
8494     listAddNodeTail(server
.io_newjobs
,j
); 
8495     if (server
.io_active_threads 
< server
.vm_max_threads
) 
8499 static int vmSwapObjectThreaded(robj 
*key
, robj 
*val
, redisDb 
*db
) { 
8502     assert(key
->storage 
== REDIS_VM_MEMORY
); 
8503     assert(key
->refcount 
== 1); 
8505     j 
= zmalloc(sizeof(*j
)); 
8506     j
->type 
= REDIS_IOJOB_PREPARE_SWAP
; 
8508     j
->key 
= dupStringObject(key
); 
8512     j
->thread 
= (pthread_t
) -1; 
8513     key
->storage 
= REDIS_VM_SWAPPING
; 
8521 /* ============ Virtual Memory - Blocking clients on missing keys =========== */ 
8523 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded. 
8524  * If there is not already a job loading the key, it is craeted. 
8525  * The key is added to the io_keys list in the client structure, and also 
8526  * in the hash table mapping swapped keys to waiting clients, that is, 
8527  * server.io_waited_keys. */ 
8528 static int waitForSwappedKey(redisClient 
*c
, robj 
*key
) { 
8529     struct dictEntry 
*de
; 
8533     /* If the key does not exist or is already in RAM we don't need to 
8534      * block the client at all. */ 
8535     de 
= dictFind(c
->db
->dict
,key
); 
8536     if (de 
== NULL
) return 0; 
8537     o 
= dictGetEntryKey(de
); 
8538     if (o
->storage 
== REDIS_VM_MEMORY
) { 
8540     } else if (o
->storage 
== REDIS_VM_SWAPPING
) { 
8541         /* We were swapping the key, undo it! */ 
8542         vmCancelThreadedIOJob(o
); 
8546     /* OK: the key is either swapped, or being loaded just now. */ 
8548     /* Add the key to the list of keys this client is waiting for. 
8549      * This maps clients to keys they are waiting for. */ 
8550     listAddNodeTail(c
->io_keys
,key
); 
8553     /* Add the client to the swapped keys => clients waiting map. */ 
8554     de 
= dictFind(c
->db
->io_keys
,key
); 
8558         /* For every key we take a list of clients blocked for it */ 
8560         retval 
= dictAdd(c
->db
->io_keys
,key
,l
); 
8562         assert(retval 
== DICT_OK
); 
8564         l 
= dictGetEntryVal(de
); 
8566     listAddNodeTail(l
,c
); 
8568     /* Are we already loading the key from disk? If not create a job */ 
8569     if (o
->storage 
== REDIS_VM_SWAPPED
) { 
8572         o
->storage 
= REDIS_VM_LOADING
; 
8573         j 
= zmalloc(sizeof(*j
)); 
8574         j
->type 
= REDIS_IOJOB_LOAD
; 
8576         j
->key 
= dupStringObject(key
); 
8577         j
->key
->vtype 
= o
->vtype
; 
8578         j
->page 
= o
->vm
.page
; 
8581         j
->thread 
= (pthread_t
) -1; 
8589 /* Is this client attempting to run a command against swapped keys? 
8590  * If so, block it ASAP, load the keys in background, then resume it. 
8592  * The important idea about this function is that it can fail! If keys will 
8593  * still be swapped when the client is resumed, this key lookups will 
8594  * just block loading keys from disk. In practical terms this should only 
8595  * happen with SORT BY command or if there is a bug in this function. 
8597  * Return 1 if the client is marked as blocked, 0 if the client can 
8598  * continue as the keys it is going to access appear to be in memory. */ 
8599 static int blockClientOnSwappedKeys(struct redisCommand 
*cmd
, redisClient 
*c
) { 
8602     if (cmd
->vm_firstkey 
== 0) return 0; 
8603     last 
= cmd
->vm_lastkey
; 
8604     if (last 
< 0) last 
= c
->argc
+last
; 
8605     for (j 
= cmd
->vm_firstkey
; j 
<= last
; j 
+= cmd
->vm_keystep
) 
8606         waitForSwappedKey(c
,c
->argv
[j
]); 
8607     /* If the client was blocked for at least one key, mark it as blocked. */ 
8608     if (listLength(c
->io_keys
)) { 
8609         c
->flags 
|= REDIS_IO_WAIT
; 
8610         aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
); 
8611         server
.vm_blocked_clients
++; 
8618 /* Remove the 'key' from the list of blocked keys for a given client. 
8620  * The function returns 1 when there are no longer blocking keys after 
8621  * the current one was removed (and the client can be unblocked). */ 
8622 static int dontWaitForSwappedKey(redisClient 
*c
, robj 
*key
) { 
8626     struct dictEntry 
*de
; 
8628     /* Remove the key from the list of keys this client is waiting for. */ 
8629     listRewind(c
->io_keys
,&li
); 
8630     while ((ln 
= listNext(&li
)) != NULL
) { 
8631         if (compareStringObjects(ln
->value
,key
) == 0) { 
8632             listDelNode(c
->io_keys
,ln
); 
8638     /* Remove the client form the key => waiting clients map. */ 
8639     de 
= dictFind(c
->db
->io_keys
,key
); 
8641     l 
= dictGetEntryVal(de
); 
8642     ln 
= listSearchKey(l
,c
); 
8645     if (listLength(l
) == 0) 
8646         dictDelete(c
->db
->io_keys
,key
); 
8648     return listLength(c
->io_keys
) == 0; 
8651 static void handleClientsBlockedOnSwappedKey(redisDb 
*db
, robj 
*key
) { 
8652     struct dictEntry 
*de
; 
8657     de 
= dictFind(db
->io_keys
,key
); 
8660     l 
= dictGetEntryVal(de
); 
8661     len 
= listLength(l
); 
8662     /* Note: we can't use something like while(listLength(l)) as the list 
8663      * can be freed by the calling function when we remove the last element. */ 
8666         redisClient 
*c 
= ln
->value
; 
8668         if (dontWaitForSwappedKey(c
,key
)) { 
8669             /* Put the client in the list of clients ready to go as we 
8670              * loaded all the keys about it. */ 
8671             listAddNodeTail(server
.io_ready_clients
,c
); 
8676 /* ================================= Debugging ============================== */ 
8678 static void debugCommand(redisClient 
*c
) { 
8679     if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) { 
8681     } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) { 
8682         if (rdbSave(server
.dbfilename
) != REDIS_OK
) { 
8683             addReply(c
,shared
.err
); 
8687         if (rdbLoad(server
.dbfilename
) != REDIS_OK
) { 
8688             addReply(c
,shared
.err
); 
8691         redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD"); 
8692         addReply(c
,shared
.ok
); 
8693     } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) { 
8695         if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) { 
8696             addReply(c
,shared
.err
); 
8699         redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF"); 
8700         addReply(c
,shared
.ok
); 
8701     } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc 
== 3) { 
8702         dictEntry 
*de 
= dictFind(c
->db
->dict
,c
->argv
[2]); 
8706             addReply(c
,shared
.nokeyerr
); 
8709         key 
= dictGetEntryKey(de
); 
8710         val 
= dictGetEntryVal(de
); 
8711         if (!server
.vm_enabled 
|| (key
->storage 
== REDIS_VM_MEMORY 
|| 
8712                                    key
->storage 
== REDIS_VM_SWAPPING
)) { 
8713             addReplySds(c
,sdscatprintf(sdsempty(), 
8714                 "+Key at:%p refcount:%d, value at:%p refcount:%d " 
8715                 "encoding:%d serializedlength:%lld\r\n", 
8716                 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
, 
8717                 val
->encoding
, (long long) rdbSavedObjectLen(val
,NULL
))); 
8719             addReplySds(c
,sdscatprintf(sdsempty(), 
8720                 "+Key at:%p refcount:%d, value swapped at: page %llu " 
8721                 "using %llu pages\r\n", 
8722                 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
, 
8723                 (unsigned long long) key
->vm
.usedpages
)); 
8725     } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc 
== 3) { 
8726         dictEntry 
*de 
= dictFind(c
->db
->dict
,c
->argv
[2]); 
8729         if (!server
.vm_enabled
) { 
8730             addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n")); 
8734             addReply(c
,shared
.nokeyerr
); 
8737         key 
= dictGetEntryKey(de
); 
8738         val 
= dictGetEntryVal(de
); 
8739         /* If the key is shared we want to create a copy */ 
8740         if (key
->refcount 
> 1) { 
8741             robj 
*newkey 
= dupStringObject(key
); 
8743             key 
= dictGetEntryKey(de
) = newkey
; 
8746         if (key
->storage 
!= REDIS_VM_MEMORY
) { 
8747             addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n")); 
8748         } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) { 
8749             dictGetEntryVal(de
) = NULL
; 
8750             addReply(c
,shared
.ok
); 
8752             addReply(c
,shared
.err
); 
8755         addReplySds(c
,sdsnew( 
8756             "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPOUT <key>|RELOAD]\r\n")); 
8760 static void _redisAssert(char *estr
, char *file
, int line
) { 
8761     redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ==="); 
8762     redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true\n",file
,line
,estr
); 
8763 #ifdef HAVE_BACKTRACE 
8764     redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)"); 
8769 /* =================================== Main! ================================ */ 
8772 int linuxOvercommitMemoryValue(void) { 
8773     FILE *fp 
= fopen("/proc/sys/vm/overcommit_memory","r"); 
8777     if (fgets(buf
,64,fp
) == NULL
) { 
8786 void linuxOvercommitMemoryWarning(void) { 
8787     if (linuxOvercommitMemoryValue() == 0) { 
8788         redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low condition memory. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect."); 
8791 #endif /* __linux__ */ 
8793 static void daemonize(void) { 
8797     if (fork() != 0) exit(0); /* parent exits */ 
8798     setsid(); /* create a new session */ 
8800     /* Every output goes to /dev/null. If Redis is daemonized but 
8801      * the 'logfile' is set to 'stdout' in the configuration file 
8802      * it will not log at all. */ 
8803     if ((fd 
= open("/dev/null", O_RDWR
, 0)) != -1) { 
8804         dup2(fd
, STDIN_FILENO
); 
8805         dup2(fd
, STDOUT_FILENO
); 
8806         dup2(fd
, STDERR_FILENO
); 
8807         if (fd 
> STDERR_FILENO
) close(fd
); 
8809     /* Try to write the pid file */ 
8810     fp 
= fopen(server
.pidfile
,"w"); 
8812         fprintf(fp
,"%d\n",getpid()); 
8817 int main(int argc
, char **argv
) { 
8822         resetServerSaveParams(); 
8823         loadServerConfig(argv
[1]); 
8824     } else if (argc 
> 2) { 
8825         fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n"); 
8828         redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'"); 
8830     if (server
.daemonize
) daemonize(); 
8832     redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
); 
8834     linuxOvercommitMemoryWarning(); 
8837     if (server
.appendonly
) { 
8838         if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
) 
8839             redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
); 
8841         if (rdbLoad(server
.dbfilename
) == REDIS_OK
) 
8842             redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
); 
8844     redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
); 
8845     aeSetBeforeSleepProc(server
.el
,beforeSleep
); 
8847     aeDeleteEventLoop(server
.el
); 
8851 /* ============================= Backtrace support ========================= */ 
8853 #ifdef HAVE_BACKTRACE 
8854 static char *findFuncName(void *pointer
, unsigned long *offset
); 
8856 static void *getMcontextEip(ucontext_t 
*uc
) { 
8857 #if defined(__FreeBSD__) 
8858     return (void*) uc
->uc_mcontext
.mc_eip
; 
8859 #elif defined(__dietlibc__) 
8860     return (void*) uc
->uc_mcontext
.eip
; 
8861 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6) 
8863     return (void*) uc
->uc_mcontext
->__ss
.__rip
; 
8865     return (void*) uc
->uc_mcontext
->__ss
.__eip
; 
8867 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6) 
8868   #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__) 
8869     return (void*) uc
->uc_mcontext
->__ss
.__rip
; 
8871     return (void*) uc
->uc_mcontext
->__ss
.__eip
; 
8873 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__) 
8874     return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */ 
8875 #elif defined(__ia64__) /* Linux IA64 */ 
8876     return (void*) uc
->uc_mcontext
.sc_ip
; 
8882 static void segvHandler(int sig
, siginfo_t 
*info
, void *secret
) { 
8884     char **messages 
= NULL
; 
8885     int i
, trace_size 
= 0; 
8886     unsigned long offset
=0; 
8887     ucontext_t 
*uc 
= (ucontext_t
*) secret
; 
8889     REDIS_NOTUSED(info
); 
8891     redisLog(REDIS_WARNING
, 
8892         "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
); 
8893     infostring 
= genRedisInfoString(); 
8894     redisLog(REDIS_WARNING
, "%s",infostring
); 
8895     /* It's not safe to sdsfree() the returned string under memory 
8896      * corruption conditions. Let it leak as we are going to abort */ 
8898     trace_size 
= backtrace(trace
, 100); 
8899     /* overwrite sigaction with caller's address */ 
8900     if (getMcontextEip(uc
) != NULL
) { 
8901         trace
[1] = getMcontextEip(uc
); 
8903     messages 
= backtrace_symbols(trace
, trace_size
); 
8905     for (i
=1; i
<trace_size
; ++i
) { 
8906         char *fn 
= findFuncName(trace
[i
], &offset
), *p
; 
8908         p 
= strchr(messages
[i
],'+'); 
8909         if (!fn 
|| (p 
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) { 
8910             redisLog(REDIS_WARNING
,"%s", messages
[i
]); 
8912             redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
); 
8915     /* free(messages); Don't call free() with possibly corrupted memory. */ 
8919 static void setupSigSegvAction(void) { 
8920     struct sigaction act
; 
8922     sigemptyset (&act
.sa_mask
); 
8923     /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction 
8924      * is used. Otherwise, sa_handler is used */ 
8925     act
.sa_flags 
= SA_NODEFER 
| SA_ONSTACK 
| SA_RESETHAND 
| SA_SIGINFO
; 
8926     act
.sa_sigaction 
= segvHandler
; 
8927     sigaction (SIGSEGV
, &act
, NULL
); 
8928     sigaction (SIGBUS
, &act
, NULL
); 
8929     sigaction (SIGFPE
, &act
, NULL
); 
8930     sigaction (SIGILL
, &act
, NULL
); 
8931     sigaction (SIGBUS
, &act
, NULL
); 
8935 #include "staticsymbols.h" 
8936 /* This function try to convert a pointer into a function name. It's used in 
8937  * oreder to provide a backtrace under segmentation fault that's able to 
8938  * display functions declared as static (otherwise the backtrace is useless). */ 
8939 static char *findFuncName(void *pointer
, unsigned long *offset
){ 
8941     unsigned long off
, minoff 
= 0; 
8943     /* Try to match against the Symbol with the smallest offset */ 
8944     for (i
=0; symsTable
[i
].pointer
; i
++) { 
8945         unsigned long lp 
= (unsigned long) pointer
; 
8947         if (lp 
!= (unsigned long)-1 && lp 
>= symsTable
[i
].pointer
) { 
8948             off
=lp
-symsTable
[i
].pointer
; 
8949             if (ret 
< 0 || off 
< minoff
) { 
8955     if (ret 
== -1) return NULL
; 
8957     return symsTable
[ret
].name
; 
8959 #else /* HAVE_BACKTRACE */ 
8960 static void setupSigSegvAction(void) { 
8962 #endif /* HAVE_BACKTRACE */