2  * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com> 
   5  * Redistribution and use in source and binary forms, with or without 
   6  * modification, are permitted provided that the following conditions are met: 
   8  *   * Redistributions of source code must retain the above copyright notice, 
   9  *     this list of conditions and the following disclaimer. 
  10  *   * Redistributions in binary form must reproduce the above copyright 
  11  *     notice, this list of conditions and the following disclaimer in the 
  12  *     documentation and/or other materials provided with the distribution. 
  13  *   * Neither the name of Redis nor the names of its contributors may be used 
  14  *     to endorse or promote products derived from this software without 
  15  *     specific prior written permission. 
  17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
  18  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  20  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
  21  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
  22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
  23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
  24  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
  25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
  26  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
  27  * POSSIBILITY OF SUCH DAMAGE. 
  30 #define REDIS_VERSION "1.3.5" 
  40 #define __USE_POSIX199309 
  47 #endif /* HAVE_BACKTRACE */ 
  55 #include <arpa/inet.h> 
  59 #include <sys/resource.h> 
  66 #include "solarisfixes.h" 
  70 #include "ae.h"     /* Event driven programming library */ 
  71 #include "sds.h"    /* Dynamic safe strings */ 
  72 #include "anet.h"   /* Networking the easy way */ 
  73 #include "dict.h"   /* Hash tables */ 
  74 #include "adlist.h" /* Linked lists */ 
  75 #include "zmalloc.h" /* total memory usage aware version of malloc/free */ 
  76 #include "lzf.h"    /* LZF compression library */ 
  77 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */ 
  84 /* Static server configuration */ 
  85 #define REDIS_SERVERPORT        6379    /* TCP port */ 
  86 #define REDIS_MAXIDLETIME       (60*5)  /* default client timeout */ 
  87 #define REDIS_IOBUF_LEN         1024 
  88 #define REDIS_LOADBUF_LEN       1024 
  89 #define REDIS_STATIC_ARGS       4 
  90 #define REDIS_DEFAULT_DBNUM     16 
  91 #define REDIS_CONFIGLINE_MAX    1024 
  92 #define REDIS_OBJFREELIST_MAX   1000000 /* Max number of objects to cache */ 
  93 #define REDIS_MAX_SYNC_TIME     60      /* Slave can't take more to sync */ 
  94 #define REDIS_EXPIRELOOKUPS_PER_CRON    100 /* try to expire 100 keys/second */ 
  95 #define REDIS_MAX_WRITE_PER_EVENT (1024*64) 
  96 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */ 
  98 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */ 
  99 #define REDIS_WRITEV_THRESHOLD      3 
 100 /* Max number of iovecs used for each writev call */ 
 101 #define REDIS_WRITEV_IOVEC_COUNT    256 
 103 /* Hash table parameters */ 
 104 #define REDIS_HT_MINFILL        10      /* Minimal hash table fill 10% */ 
 107 #define REDIS_CMD_BULK          1       /* Bulk write command */ 
 108 #define REDIS_CMD_INLINE        2       /* Inline command */ 
 109 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with 
 110    this flags will return an error when the 'maxmemory' option is set in the 
 111    config file and the server is using more than maxmemory bytes of memory. 
 112    In short this commands are denied on low memory conditions. */ 
 113 #define REDIS_CMD_DENYOOM       4 
 116 #define REDIS_STRING 0 
 122 /* Objects encoding. Some kind of objects like Strings and Hashes can be 
 123  * internally represented in multiple ways. The 'encoding' field of the object 
 124  * is set to one of this fields for this object. */ 
 125 #define REDIS_ENCODING_RAW 0    /* Raw representation */ 
 126 #define REDIS_ENCODING_INT 1    /* Encoded as integer */ 
 127 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */ 
 128 #define REDIS_ENCODING_HT 3     /* Encoded as an hash table */ 
 130 static char* strencoding
[] = { 
 131     "raw", "int", "zipmap", "hashtable" 
 134 /* Object types only used for dumping to disk */ 
 135 #define REDIS_EXPIRETIME 253 
 136 #define REDIS_SELECTDB 254 
 137 #define REDIS_EOF 255 
 139 /* Defines related to the dump file format. To store 32 bits lengths for short 
 140  * keys requires a lot of space, so we check the most significant 2 bits of 
 141  * the first byte to interpreter the length: 
 143  * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte 
 144  * 01|000000 00000000 =>  01, the len is 14 byes, 6 bits + 8 bits of next byte 
 145  * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow 
 146  * 11|000000 this means: specially encoded object will follow. The six bits 
 147  *           number specify the kind of object that follows. 
 148  *           See the REDIS_RDB_ENC_* defines. 
 150  * Lenghts up to 63 are stored using a single byte, most DB keys, and may 
 151  * values, will fit inside. */ 
 152 #define REDIS_RDB_6BITLEN 0 
 153 #define REDIS_RDB_14BITLEN 1 
 154 #define REDIS_RDB_32BITLEN 2 
 155 #define REDIS_RDB_ENCVAL 3 
 156 #define REDIS_RDB_LENERR UINT_MAX 
 158 /* When a length of a string object stored on disk has the first two bits 
 159  * set, the remaining two bits specify a special encoding for the object 
 160  * accordingly to the following defines: */ 
 161 #define REDIS_RDB_ENC_INT8 0        /* 8 bit signed integer */ 
 162 #define REDIS_RDB_ENC_INT16 1       /* 16 bit signed integer */ 
 163 #define REDIS_RDB_ENC_INT32 2       /* 32 bit signed integer */ 
 164 #define REDIS_RDB_ENC_LZF 3         /* string compressed with FASTLZ */ 
 166 /* Virtual memory object->where field. */ 
 167 #define REDIS_VM_MEMORY 0       /* The object is on memory */ 
 168 #define REDIS_VM_SWAPPED 1      /* The object is on disk */ 
 169 #define REDIS_VM_SWAPPING 2     /* Redis is swapping this object on disk */ 
 170 #define REDIS_VM_LOADING 3      /* Redis is loading this object from disk */ 
 172 /* Virtual memory static configuration stuff. 
 173  * Check vmFindContiguousPages() to know more about this magic numbers. */ 
 174 #define REDIS_VM_MAX_NEAR_PAGES 65536 
 175 #define REDIS_VM_MAX_RANDOM_JUMP 4096 
 176 #define REDIS_VM_MAX_THREADS 32 
 177 #define REDIS_THREAD_STACK_SIZE (1024*1024*4) 
 178 /* The following is the *percentage* of completed I/O jobs to process when the 
 179  * handelr is called. While Virtual Memory I/O operations are performed by 
 180  * threads, this operations must be processed by the main thread when completed 
 181  * in order to take effect. */ 
 182 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1 
 185 #define REDIS_SLAVE 1       /* This client is a slave server */ 
 186 #define REDIS_MASTER 2      /* This client is a master server */ 
 187 #define REDIS_MONITOR 4     /* This client is a slave monitor, see MONITOR */ 
 188 #define REDIS_MULTI 8       /* This client is in a MULTI context */ 
 189 #define REDIS_BLOCKED 16    /* The client is waiting in a blocking operation */ 
 190 #define REDIS_IO_WAIT 32    /* The client is waiting for Virtual Memory I/O */ 
 192 /* Slave replication state - slave side */ 
 193 #define REDIS_REPL_NONE 0   /* No active replication */ 
 194 #define REDIS_REPL_CONNECT 1    /* Must connect to master */ 
 195 #define REDIS_REPL_CONNECTED 2  /* Connected to master */ 
 197 /* Slave replication state - from the point of view of master 
 198  * Note that in SEND_BULK and ONLINE state the slave receives new updates 
 199  * in its output queue. In the WAIT_BGSAVE state instead the server is waiting 
 200  * to start the next background saving in order to send updates to it. */ 
 201 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */ 
 202 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */ 
 203 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */ 
 204 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */ 
 206 /* List related stuff */ 
 210 /* Sort operations */ 
 211 #define REDIS_SORT_GET 0 
 212 #define REDIS_SORT_ASC 1 
 213 #define REDIS_SORT_DESC 2 
 214 #define REDIS_SORTKEY_MAX 1024 
 217 #define REDIS_DEBUG 0 
 218 #define REDIS_VERBOSE 1 
 219 #define REDIS_NOTICE 2 
 220 #define REDIS_WARNING 3 
 222 /* Anti-warning macro... */ 
 223 #define REDIS_NOTUSED(V) ((void) V) 
 225 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */ 
 226 #define ZSKIPLIST_P 0.25      /* Skiplist P = 1/4 */ 
 228 /* Append only defines */ 
 229 #define APPENDFSYNC_NO 0 
 230 #define APPENDFSYNC_ALWAYS 1 
 231 #define APPENDFSYNC_EVERYSEC 2 
 233 /* Hashes related defaults */ 
 234 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64 
 235 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512 
 237 /* We can print the stacktrace, so our assert is defined this way: */ 
 238 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1))) 
 239 static void _redisAssert(char *estr
, char *file
, int line
); 
 241 /*================================= Data types ============================== */ 
 243 /* A redis object, that is a type able to hold a string / list / set */ 
 245 /* The VM object structure */ 
 246 struct redisObjectVM 
{ 
 247     off_t page
;         /* the page at witch the object is stored on disk */ 
 248     off_t usedpages
;    /* number of pages used on disk */ 
 249     time_t atime
;       /* Last access time */ 
 252 /* The actual Redis Object */ 
 253 typedef struct redisObject 
{ 
 256     unsigned char encoding
; 
 257     unsigned char storage
;  /* If this object is a key, where is the value? 
 258                              * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */ 
 259     unsigned char vtype
; /* If this object is a key, and value is swapped out, 
 260                           * this is the type of the swapped out object. */ 
 262     /* VM fields, this are only allocated if VM is active, otherwise the 
 263      * object allocation function will just allocate 
 264      * sizeof(redisObjct) minus sizeof(redisObjectVM), so using 
 265      * Redis without VM active will not have any overhead. */ 
 266     struct redisObjectVM vm
; 
 269 /* Macro used to initalize a Redis object allocated on the stack. 
 270  * Note that this macro is taken near the structure definition to make sure 
 271  * we'll update it when the structure is changed, to avoid bugs like 
 272  * bug #85 introduced exactly in this way. */ 
 273 #define initStaticStringObject(_var,_ptr) do { \ 
 275     _var.type = REDIS_STRING; \ 
 276     _var.encoding = REDIS_ENCODING_RAW; \ 
 278     if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \ 
 281 typedef struct redisDb 
{ 
 282     dict 
*dict
;                 /* The keyspace for this DB */ 
 283     dict 
*expires
;              /* Timeout of keys with a timeout set */ 
 284     dict 
*blockingkeys
;         /* Keys with clients waiting for data (BLPOP) */ 
 285     dict 
*io_keys
;              /* Keys with clients waiting for VM I/O */ 
 289 /* Client MULTI/EXEC state */ 
 290 typedef struct multiCmd 
{ 
 293     struct redisCommand 
*cmd
; 
 296 typedef struct multiState 
{ 
 297     multiCmd 
*commands
;     /* Array of MULTI commands */ 
 298     int count
;              /* Total number of MULTI commands */ 
 301 /* With multiplexing we need to take per-clinet state. 
 302  * Clients are taken in a liked list. */ 
 303 typedef struct redisClient 
{ 
 308     robj 
**argv
, **mbargv
; 
 310     int bulklen
;            /* bulk read len. -1 if not in bulk read mode */ 
 311     int multibulk
;          /* multi bulk command format active */ 
 314     time_t lastinteraction
; /* time of the last interaction, used for timeout */ 
 315     int flags
;              /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */ 
 316     int slaveseldb
;         /* slave selected db, if this client is a slave */ 
 317     int authenticated
;      /* when requirepass is non-NULL */ 
 318     int replstate
;          /* replication state if this is a slave */ 
 319     int repldbfd
;           /* replication DB file descriptor */ 
 320     long repldboff
;         /* replication DB file offset */ 
 321     off_t repldbsize
;       /* replication DB file size */ 
 322     multiState mstate
;      /* MULTI/EXEC state */ 
 323     robj 
**blockingkeys
;    /* The key we are waiting to terminate a blocking 
 324                              * operation such as BLPOP. Otherwise NULL. */ 
 325     int blockingkeysnum
;    /* Number of blocking keys */ 
 326     time_t blockingto
;      /* Blocking operation timeout. If UNIX current time 
 327                              * is >= blockingto then the operation timed out. */ 
 328     list 
*io_keys
;          /* Keys this client is waiting to be loaded from the 
 329                              * swap file in order to continue. */ 
 337 /* Global server state structure */ 
 342     dict 
*sharingpool
;          /* Poll used for object sharing */ 
 343     unsigned int sharingpoolsize
; 
 344     long long dirty
;            /* changes to DB from the last save */ 
 346     list 
*slaves
, *monitors
; 
 347     char neterr
[ANET_ERR_LEN
]; 
 349     int cronloops
;              /* number of times the cron function run */ 
 350     list 
*objfreelist
;          /* A list of freed objects to avoid malloc() */ 
 351     time_t lastsave
;            /* Unix time of last save succeeede */ 
 352     /* Fields used only for stats */ 
 353     time_t stat_starttime
;         /* server start time */ 
 354     long long stat_numcommands
;    /* number of processed commands */ 
 355     long long stat_numconnections
; /* number of connections received */ 
 368     pid_t bgsavechildpid
; 
 369     pid_t bgrewritechildpid
; 
 370     sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */ 
 371     struct saveparam 
*saveparams
; 
 376     char *appendfilename
; 
 380     /* Replication related */ 
 385     redisClient 
*master
;    /* client that is master for this slave */ 
 387     unsigned int maxclients
; 
 388     unsigned long long maxmemory
; 
 389     unsigned int blpop_blocked_clients
; 
 390     unsigned int vm_blocked_clients
; 
 391     /* Sort parameters - qsort_r() is only available under BSD so we 
 392      * have to take this state global, in order to pass it to sortCompare() */ 
 396     /* Virtual memory configuration */ 
 401     unsigned long long vm_max_memory
; 
 403     size_t hash_max_zipmap_entries
; 
 404     size_t hash_max_zipmap_value
; 
 405     /* Virtual memory state */ 
 408     off_t vm_next_page
; /* Next probably empty page */ 
 409     off_t vm_near_pages
; /* Number of pages allocated sequentially */ 
 410     unsigned char *vm_bitmap
; /* Bitmap of free/used pages */ 
 411     time_t unixtime
;    /* Unix time sampled every second. */ 
 412     /* Virtual memory I/O threads stuff */ 
 413     /* An I/O thread process an element taken from the io_jobs queue and 
 414      * put the result of the operation in the io_done list. While the 
 415      * job is being processed, it's put on io_processing queue. */ 
 416     list 
*io_newjobs
; /* List of VM I/O jobs yet to be processed */ 
 417     list 
*io_processing
; /* List of VM I/O jobs being processed */ 
 418     list 
*io_processed
; /* List of VM I/O jobs already processed */ 
 419     list 
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */ 
 420     pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */ 
 421     pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */ 
 422     pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */ 
 423     pthread_attr_t io_threads_attr
; /* attributes for threads creation */ 
 424     int io_active_threads
; /* Number of running I/O threads */ 
 425     int vm_max_threads
; /* Max number of I/O threads running at the same time */ 
 426     /* Our main thread is blocked on the event loop, locking for sockets ready 
 427      * to be read or written, so when a threaded I/O operation is ready to be 
 428      * processed by the main thread, the I/O thread will use a unix pipe to 
 429      * awake the main thread. The followings are the two pipe FDs. */ 
 430     int io_ready_pipe_read
; 
 431     int io_ready_pipe_write
; 
 432     /* Virtual memory stats */ 
 433     unsigned long long vm_stats_used_pages
; 
 434     unsigned long long vm_stats_swapped_objects
; 
 435     unsigned long long vm_stats_swapouts
; 
 436     unsigned long long vm_stats_swapins
; 
 440 typedef void redisCommandProc(redisClient 
*c
); 
 441 struct redisCommand 
{ 
 443     redisCommandProc 
*proc
; 
 446     /* Use a function to determine which keys need to be loaded 
 447      * in the background prior to executing this command. Takes precedence 
 448      * over vm_firstkey and others, ignored when NULL */ 
 449     redisCommandProc 
*vm_preload_proc
; 
 450     /* What keys should be loaded in background when calling this command? */ 
 451     int vm_firstkey
; /* The first argument that's a key (0 = no keys) */ 
 452     int vm_lastkey
;  /* THe last argument that's a key */ 
 453     int vm_keystep
;  /* The step between first and last key */ 
 456 struct redisFunctionSym 
{ 
 458     unsigned long pointer
; 
 461 typedef struct _redisSortObject 
{ 
 469 typedef struct _redisSortOperation 
{ 
 472 } redisSortOperation
; 
 474 /* ZSETs use a specialized version of Skiplists */ 
 476 typedef struct zskiplistNode 
{ 
 477     struct zskiplistNode 
**forward
; 
 478     struct zskiplistNode 
*backward
; 
 484 typedef struct zskiplist 
{ 
 485     struct zskiplistNode 
*header
, *tail
; 
 486     unsigned long length
; 
 490 typedef struct zset 
{ 
 495 /* Our shared "common" objects */ 
 497 struct sharedObjectsStruct 
{ 
 498     robj 
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
, 
 499     *colon
, *nullbulk
, *nullmultibulk
, *queued
, 
 500     *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
, 
 501     *outofrangeerr
, *plus
, 
 502     *select0
, *select1
, *select2
, *select3
, *select4
, 
 503     *select5
, *select6
, *select7
, *select8
, *select9
; 
 506 /* Global vars that are actally used as constants. The following double 
 507  * values are used for double on-disk serialization, and are initialized 
 508  * at runtime to avoid strange compiler optimizations. */ 
 510 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
; 
 512 /* VM threaded I/O request message */ 
 513 #define REDIS_IOJOB_LOAD 0          /* Load from disk to memory */ 
 514 #define REDIS_IOJOB_PREPARE_SWAP 1  /* Compute needed pages */ 
 515 #define REDIS_IOJOB_DO_SWAP 2       /* Swap from memory to disk */ 
 516 typedef struct iojob 
{ 
 517     int type
;   /* Request type, REDIS_IOJOB_* */ 
 518     redisDb 
*db
;/* Redis database */ 
 519     robj 
*key
;  /* This I/O request is about swapping this key */ 
 520     robj 
*val
;  /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this 
 521                  * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */ 
 522     off_t page
; /* Swap page where to read/write the object */ 
 523     off_t pages
; /* Swap pages needed to safe object. PREPARE_SWAP return val */ 
 524     int canceled
; /* True if this command was canceled by blocking side of VM */ 
 525     pthread_t thread
; /* ID of the thread processing this entry */ 
 528 /*================================ Prototypes =============================== */ 
 530 static void freeStringObject(robj 
*o
); 
 531 static void freeListObject(robj 
*o
); 
 532 static void freeSetObject(robj 
*o
); 
 533 static void decrRefCount(void *o
); 
 534 static robj 
*createObject(int type
, void *ptr
); 
 535 static void freeClient(redisClient 
*c
); 
 536 static int rdbLoad(char *filename
); 
 537 static void addReply(redisClient 
*c
, robj 
*obj
); 
 538 static void addReplySds(redisClient 
*c
, sds s
); 
 539 static void incrRefCount(robj 
*o
); 
 540 static int rdbSaveBackground(char *filename
); 
 541 static robj 
*createStringObject(char *ptr
, size_t len
); 
 542 static robj 
*dupStringObject(robj 
*o
); 
 543 static void replicationFeedSlaves(list 
*slaves
, struct redisCommand 
*cmd
, int dictid
, robj 
**argv
, int argc
); 
 544 static void feedAppendOnlyFile(struct redisCommand 
*cmd
, int dictid
, robj 
**argv
, int argc
); 
 545 static int syncWithMaster(void); 
 546 static robj 
*tryObjectSharing(robj 
*o
); 
 547 static int tryObjectEncoding(robj 
*o
); 
 548 static robj 
*getDecodedObject(robj 
*o
); 
 549 static int removeExpire(redisDb 
*db
, robj 
*key
); 
 550 static int expireIfNeeded(redisDb 
*db
, robj 
*key
); 
 551 static int deleteIfVolatile(redisDb 
*db
, robj 
*key
); 
 552 static int deleteIfSwapped(redisDb 
*db
, robj 
*key
); 
 553 static int deleteKey(redisDb 
*db
, robj 
*key
); 
 554 static time_t getExpire(redisDb 
*db
, robj 
*key
); 
 555 static int setExpire(redisDb 
*db
, robj 
*key
, time_t when
); 
 556 static void updateSlavesWaitingBgsave(int bgsaveerr
); 
 557 static void freeMemoryIfNeeded(void); 
 558 static int processCommand(redisClient 
*c
); 
 559 static void setupSigSegvAction(void); 
 560 static void rdbRemoveTempFile(pid_t childpid
); 
 561 static void aofRemoveTempFile(pid_t childpid
); 
 562 static size_t stringObjectLen(robj 
*o
); 
 563 static void processInputBuffer(redisClient 
*c
); 
 564 static zskiplist 
*zslCreate(void); 
 565 static void zslFree(zskiplist 
*zsl
); 
 566 static void zslInsert(zskiplist 
*zsl
, double score
, robj 
*obj
); 
 567 static void sendReplyToClientWritev(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 568 static void initClientMultiState(redisClient 
*c
); 
 569 static void freeClientMultiState(redisClient 
*c
); 
 570 static void queueMultiCommand(redisClient 
*c
, struct redisCommand 
*cmd
); 
 571 static void unblockClientWaitingData(redisClient 
*c
); 
 572 static int handleClientsWaitingListPush(redisClient 
*c
, robj 
*key
, robj 
*ele
); 
 573 static void vmInit(void); 
 574 static void vmMarkPagesFree(off_t page
, off_t count
); 
 575 static robj 
*vmLoadObject(robj 
*key
); 
 576 static robj 
*vmPreviewObject(robj 
*key
); 
 577 static int vmSwapOneObjectBlocking(void); 
 578 static int vmSwapOneObjectThreaded(void); 
 579 static int vmCanSwapOut(void); 
 580 static int tryFreeOneObjectFromFreelist(void); 
 581 static void acceptHandler(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 582 static void vmThreadedIOCompletedJob(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 583 static void vmCancelThreadedIOJob(robj 
*o
); 
 584 static void lockThreadedIO(void); 
 585 static void unlockThreadedIO(void); 
 586 static int vmSwapObjectThreaded(robj 
*key
, robj 
*val
, redisDb 
*db
); 
 587 static void freeIOJob(iojob 
*j
); 
 588 static void queueIOJob(iojob 
*j
); 
 589 static int vmWriteObjectOnSwap(robj 
*o
, off_t page
); 
 590 static robj 
*vmReadObjectFromSwap(off_t page
, int type
); 
 591 static void waitEmptyIOJobsQueue(void); 
 592 static void vmReopenSwapFile(void); 
 593 static int vmFreePage(off_t page
); 
 594 static void zunionInterBlockClientOnSwappedKeys(redisClient 
*c
); 
 595 static int blockClientOnSwappedKeys(struct redisCommand 
*cmd
, redisClient 
*c
); 
 596 static int dontWaitForSwappedKey(redisClient 
*c
, robj 
*key
); 
 597 static void handleClientsBlockedOnSwappedKey(redisDb 
*db
, robj 
*key
); 
 598 static void readQueryFromClient(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 599 static struct redisCommand 
*lookupCommand(char *name
); 
 600 static void call(redisClient 
*c
, struct redisCommand 
*cmd
); 
 601 static void resetClient(redisClient 
*c
); 
 602 static void convertToRealHash(robj 
*o
); 
 604 static void authCommand(redisClient 
*c
); 
 605 static void pingCommand(redisClient 
*c
); 
 606 static void echoCommand(redisClient 
*c
); 
 607 static void setCommand(redisClient 
*c
); 
 608 static void setnxCommand(redisClient 
*c
); 
 609 static void getCommand(redisClient 
*c
); 
 610 static void delCommand(redisClient 
*c
); 
 611 static void existsCommand(redisClient 
*c
); 
 612 static void incrCommand(redisClient 
*c
); 
 613 static void decrCommand(redisClient 
*c
); 
 614 static void incrbyCommand(redisClient 
*c
); 
 615 static void decrbyCommand(redisClient 
*c
); 
 616 static void selectCommand(redisClient 
*c
); 
 617 static void randomkeyCommand(redisClient 
*c
); 
 618 static void keysCommand(redisClient 
*c
); 
 619 static void dbsizeCommand(redisClient 
*c
); 
 620 static void lastsaveCommand(redisClient 
*c
); 
 621 static void saveCommand(redisClient 
*c
); 
 622 static void bgsaveCommand(redisClient 
*c
); 
 623 static void bgrewriteaofCommand(redisClient 
*c
); 
 624 static void shutdownCommand(redisClient 
*c
); 
 625 static void moveCommand(redisClient 
*c
); 
 626 static void renameCommand(redisClient 
*c
); 
 627 static void renamenxCommand(redisClient 
*c
); 
 628 static void lpushCommand(redisClient 
*c
); 
 629 static void rpushCommand(redisClient 
*c
); 
 630 static void lpopCommand(redisClient 
*c
); 
 631 static void rpopCommand(redisClient 
*c
); 
 632 static void llenCommand(redisClient 
*c
); 
 633 static void lindexCommand(redisClient 
*c
); 
 634 static void lrangeCommand(redisClient 
*c
); 
 635 static void ltrimCommand(redisClient 
*c
); 
 636 static void typeCommand(redisClient 
*c
); 
 637 static void lsetCommand(redisClient 
*c
); 
 638 static void saddCommand(redisClient 
*c
); 
 639 static void sremCommand(redisClient 
*c
); 
 640 static void smoveCommand(redisClient 
*c
); 
 641 static void sismemberCommand(redisClient 
*c
); 
 642 static void scardCommand(redisClient 
*c
); 
 643 static void spopCommand(redisClient 
*c
); 
 644 static void srandmemberCommand(redisClient 
*c
); 
 645 static void sinterCommand(redisClient 
*c
); 
 646 static void sinterstoreCommand(redisClient 
*c
); 
 647 static void sunionCommand(redisClient 
*c
); 
 648 static void sunionstoreCommand(redisClient 
*c
); 
 649 static void sdiffCommand(redisClient 
*c
); 
 650 static void sdiffstoreCommand(redisClient 
*c
); 
 651 static void syncCommand(redisClient 
*c
); 
 652 static void flushdbCommand(redisClient 
*c
); 
 653 static void flushallCommand(redisClient 
*c
); 
 654 static void sortCommand(redisClient 
*c
); 
 655 static void lremCommand(redisClient 
*c
); 
 656 static void rpoplpushcommand(redisClient 
*c
); 
 657 static void infoCommand(redisClient 
*c
); 
 658 static void mgetCommand(redisClient 
*c
); 
 659 static void monitorCommand(redisClient 
*c
); 
 660 static void expireCommand(redisClient 
*c
); 
 661 static void expireatCommand(redisClient 
*c
); 
 662 static void getsetCommand(redisClient 
*c
); 
 663 static void ttlCommand(redisClient 
*c
); 
 664 static void slaveofCommand(redisClient 
*c
); 
 665 static void debugCommand(redisClient 
*c
); 
 666 static void msetCommand(redisClient 
*c
); 
 667 static void msetnxCommand(redisClient 
*c
); 
 668 static void zaddCommand(redisClient 
*c
); 
 669 static void zincrbyCommand(redisClient 
*c
); 
 670 static void zrangeCommand(redisClient 
*c
); 
 671 static void zrangebyscoreCommand(redisClient 
*c
); 
 672 static void zcountCommand(redisClient 
*c
); 
 673 static void zrevrangeCommand(redisClient 
*c
); 
 674 static void zcardCommand(redisClient 
*c
); 
 675 static void zremCommand(redisClient 
*c
); 
 676 static void zscoreCommand(redisClient 
*c
); 
 677 static void zremrangebyscoreCommand(redisClient 
*c
); 
 678 static void multiCommand(redisClient 
*c
); 
 679 static void execCommand(redisClient 
*c
); 
 680 static void discardCommand(redisClient 
*c
); 
 681 static void blpopCommand(redisClient 
*c
); 
 682 static void brpopCommand(redisClient 
*c
); 
 683 static void appendCommand(redisClient 
*c
); 
 684 static void substrCommand(redisClient 
*c
); 
 685 static void zrankCommand(redisClient 
*c
); 
 686 static void zrevrankCommand(redisClient 
*c
); 
 687 static void hsetCommand(redisClient 
*c
); 
 688 static void hgetCommand(redisClient 
*c
); 
 689 static void hdelCommand(redisClient 
*c
); 
 690 static void hlenCommand(redisClient 
*c
); 
 691 static void zremrangebyrankCommand(redisClient 
*c
); 
 692 static void zunionCommand(redisClient 
*c
); 
 693 static void zinterCommand(redisClient 
*c
); 
 694 static void hkeysCommand(redisClient 
*c
); 
 695 static void hvalsCommand(redisClient 
*c
); 
 696 static void hgetallCommand(redisClient 
*c
); 
 697 static void hexistsCommand(redisClient 
*c
); 
 699 /*================================= Globals ================================= */ 
 702 static struct redisServer server
; /* server global state */ 
 703 static struct redisCommand cmdTable
[] = { 
 704     {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 705     {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0}, 
 706     {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0}, 
 707     {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 708     {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 709     {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 710     {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 711     {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 712     {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 713     {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1}, 
 714     {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 715     {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 716     {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 717     {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 718     {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 719     {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 720     {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 721     {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 722     {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 723     {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 724     {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 725     {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 726     {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1}, 
 727     {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 728     {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 729     {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1}, 
 730     {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 731     {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 732     {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 733     {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 734     {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1}, 
 735     {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1}, 
 736     {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1}, 
 737     {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1}, 
 738     {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1}, 
 739     {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1}, 
 740     {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 741     {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 742     {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 743     {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 744     {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 745     {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 746     {"zunion",zunionCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0}, 
 747     {"zinter",zinterCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0}, 
 748     {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 749     {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 750     {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 751     {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 752     {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 753     {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 754     {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 755     {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 756     {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 757     {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 758     {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 759     {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 760     {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 761     {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 762     {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 763     {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 764     {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 765     {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 766     {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 767     {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2}, 
 768     {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2}, 
 769     {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 770     {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 771     {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 772     {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 773     {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 774     {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 775     {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 776     {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 777     {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 778     {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 779     {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 780     {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0}, 
 781     {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 782     {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 783     {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 784     {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 785     {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 786     {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 787     {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 788     {"exec",execCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 789     {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 790     {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 791     {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 792     {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 793     {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 794     {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 795     {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 796     {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 797     {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 798     {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 799     {NULL
,NULL
,0,0,NULL
,0,0,0} 
 802 /*============================ Utility functions ============================ */ 
 804 /* Glob-style pattern matching. */ 
 805 int stringmatchlen(const char *pattern
, int patternLen
, 
 806         const char *string
, int stringLen
, int nocase
) 
 811             while (pattern
[1] == '*') { 
 816                 return 1; /* match */ 
 818                 if (stringmatchlen(pattern
+1, patternLen
-1, 
 819                             string
, stringLen
, nocase
)) 
 820                     return 1; /* match */ 
 824             return 0; /* no match */ 
 828                 return 0; /* no match */ 
 838             not = pattern
[0] == '^'; 
 845                 if (pattern
[0] == '\\') { 
 848                     if (pattern
[0] == string
[0]) 
 850                 } else if (pattern
[0] == ']') { 
 852                 } else if (patternLen 
== 0) { 
 856                 } else if (pattern
[1] == '-' && patternLen 
>= 3) { 
 857                     int start 
= pattern
[0]; 
 858                     int end 
= pattern
[2]; 
 866                         start 
= tolower(start
); 
 872                     if (c 
>= start 
&& c 
<= end
) 
 876                         if (pattern
[0] == string
[0]) 
 879                         if (tolower((int)pattern
[0]) == tolower((int)string
[0])) 
 889                 return 0; /* no match */ 
 895             if (patternLen 
>= 2) { 
 902                 if (pattern
[0] != string
[0]) 
 903                     return 0; /* no match */ 
 905                 if (tolower((int)pattern
[0]) != tolower((int)string
[0])) 
 906                     return 0; /* no match */ 
 914         if (stringLen 
== 0) { 
 915             while(*pattern 
== '*') { 
 922     if (patternLen 
== 0 && stringLen 
== 0) 
 927 static void redisLog(int level
, const char *fmt
, ...) { 
 931     fp 
= (server
.logfile 
== NULL
) ? stdout 
: fopen(server
.logfile
,"a"); 
 935     if (level 
>= server
.verbosity
) { 
 941         strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
)); 
 942         fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]); 
 943         vfprintf(fp
, fmt
, ap
); 
 949     if (server
.logfile
) fclose(fp
); 
 952 /*====================== Hash table type implementation  ==================== */ 
 954 /* This is an hash table type that uses the SDS dynamic strings libary as 
 955  * keys and radis objects as values (objects can hold SDS strings, 
 958 static void dictVanillaFree(void *privdata
, void *val
) 
 960     DICT_NOTUSED(privdata
); 
 964 static void dictListDestructor(void *privdata
, void *val
) 
 966     DICT_NOTUSED(privdata
); 
 967     listRelease((list
*)val
); 
 970 static int sdsDictKeyCompare(void *privdata
, const void *key1
, 
 974     DICT_NOTUSED(privdata
); 
 976     l1 
= sdslen((sds
)key1
); 
 977     l2 
= sdslen((sds
)key2
); 
 978     if (l1 
!= l2
) return 0; 
 979     return memcmp(key1
, key2
, l1
) == 0; 
 982 static void dictRedisObjectDestructor(void *privdata
, void *val
) 
 984     DICT_NOTUSED(privdata
); 
 986     if (val 
== NULL
) return; /* Values of swapped out keys as set to NULL */ 
 990 static int dictObjKeyCompare(void *privdata
, const void *key1
, 
 993     const robj 
*o1 
= key1
, *o2 
= key2
; 
 994     return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
); 
 997 static unsigned int dictObjHash(const void *key
) { 
 999     return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
)); 
1002 static int dictEncObjKeyCompare(void *privdata
, const void *key1
, 
1005     robj 
*o1 
= (robj
*) key1
, *o2 
= (robj
*) key2
; 
1009     if (o1
->encoding 
== REDIS_ENCODING_INT 
&& 
1010         o2
->encoding 
== REDIS_ENCODING_INT 
&& 
1011         o1
->ptr 
== o2
->ptr
) return 0; 
1014     o1 
= getDecodedObject(o1
); 
1015     o2 
= getDecodedObject(o2
); 
1016     cmp 
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
); 
1022 static unsigned int dictEncObjHash(const void *key
) { 
1023     robj 
*o 
= (robj
*) key
; 
1025     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
1026         return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
)); 
1028         if (o
->encoding 
== REDIS_ENCODING_INT
) { 
1032             len 
= snprintf(buf
,32,"%ld",(long)o
->ptr
); 
1033             return dictGenHashFunction((unsigned char*)buf
, len
); 
1037             o 
= getDecodedObject(o
); 
1038             hash 
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
)); 
1045 /* Sets type and expires */ 
1046 static dictType setDictType 
= { 
1047     dictEncObjHash
,            /* hash function */ 
1050     dictEncObjKeyCompare
,      /* key compare */ 
1051     dictRedisObjectDestructor
, /* key destructor */ 
1052     NULL                       
/* val destructor */ 
1055 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */ 
1056 static dictType zsetDictType 
= { 
1057     dictEncObjHash
,            /* hash function */ 
1060     dictEncObjKeyCompare
,      /* key compare */ 
1061     dictRedisObjectDestructor
, /* key destructor */ 
1062     dictVanillaFree            
/* val destructor of malloc(sizeof(double)) */ 
1066 static dictType dbDictType 
= { 
1067     dictObjHash
,                /* hash function */ 
1070     dictObjKeyCompare
,          /* key compare */ 
1071     dictRedisObjectDestructor
,  /* key destructor */ 
1072     dictRedisObjectDestructor   
/* val destructor */ 
1076 static dictType keyptrDictType 
= { 
1077     dictObjHash
,               /* hash function */ 
1080     dictObjKeyCompare
,         /* key compare */ 
1081     dictRedisObjectDestructor
, /* key destructor */ 
1082     NULL                       
/* val destructor */ 
1085 /* Hash type hash table (note that small hashes are represented with zimpaps) */ 
1086 static dictType hashDictType 
= { 
1087     dictEncObjHash
,             /* hash function */ 
1090     dictEncObjKeyCompare
,       /* key compare */ 
1091     dictRedisObjectDestructor
,  /* key destructor */ 
1092     dictRedisObjectDestructor   
/* val destructor */ 
1095 /* Keylist hash table type has unencoded redis objects as keys and 
1096  * lists as values. It's used for blocking operations (BLPOP) and to 
1097  * map swapped keys to a list of clients waiting for this keys to be loaded. */ 
1098 static dictType keylistDictType 
= { 
1099     dictObjHash
,                /* hash function */ 
1102     dictObjKeyCompare
,          /* key compare */ 
1103     dictRedisObjectDestructor
,  /* key destructor */ 
1104     dictListDestructor          
/* val destructor */ 
1107 /* ========================= Random utility functions ======================= */ 
1109 /* Redis generally does not try to recover from out of memory conditions 
1110  * when allocating objects or strings, it is not clear if it will be possible 
1111  * to report this condition to the client since the networking layer itself 
1112  * is based on heap allocation for send buffers, so we simply abort. 
1113  * At least the code will be simpler to read... */ 
1114 static void oom(const char *msg
) { 
1115     redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
); 
1120 /* ====================== Redis server networking stuff ===================== */ 
1121 static void closeTimedoutClients(void) { 
1124     time_t now 
= time(NULL
); 
1127     listRewind(server
.clients
,&li
); 
1128     while ((ln 
= listNext(&li
)) != NULL
) { 
1129         c 
= listNodeValue(ln
); 
1130         if (server
.maxidletime 
&& 
1131             !(c
->flags 
& REDIS_SLAVE
) &&    /* no timeout for slaves */ 
1132             !(c
->flags 
& REDIS_MASTER
) &&   /* no timeout for masters */ 
1133              (now 
- c
->lastinteraction 
> server
.maxidletime
)) 
1135             redisLog(REDIS_VERBOSE
,"Closing idle client"); 
1137         } else if (c
->flags 
& REDIS_BLOCKED
) { 
1138             if (c
->blockingto 
!= 0 && c
->blockingto 
< now
) { 
1139                 addReply(c
,shared
.nullmultibulk
); 
1140                 unblockClientWaitingData(c
); 
1146 static int htNeedsResize(dict 
*dict
) { 
1147     long long size
, used
; 
1149     size 
= dictSlots(dict
); 
1150     used 
= dictSize(dict
); 
1151     return (size 
&& used 
&& size 
> DICT_HT_INITIAL_SIZE 
&& 
1152             (used
*100/size 
< REDIS_HT_MINFILL
)); 
1155 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL 
1156  * we resize the hash table to save memory */ 
1157 static void tryResizeHashTables(void) { 
1160     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1161         if (htNeedsResize(server
.db
[j
].dict
)) { 
1162             redisLog(REDIS_VERBOSE
,"The hash table %d is too sparse, resize it...",j
); 
1163             dictResize(server
.db
[j
].dict
); 
1164             redisLog(REDIS_VERBOSE
,"Hash table %d resized.",j
); 
1166         if (htNeedsResize(server
.db
[j
].expires
)) 
1167             dictResize(server
.db
[j
].expires
); 
1171 /* A background saving child (BGSAVE) terminated its work. Handle this. */ 
1172 void backgroundSaveDoneHandler(int statloc
) { 
1173     int exitcode 
= WEXITSTATUS(statloc
); 
1174     int bysignal 
= WIFSIGNALED(statloc
); 
1176     if (!bysignal 
&& exitcode 
== 0) { 
1177         redisLog(REDIS_NOTICE
, 
1178             "Background saving terminated with success"); 
1180         server
.lastsave 
= time(NULL
); 
1181     } else if (!bysignal 
&& exitcode 
!= 0) { 
1182         redisLog(REDIS_WARNING
, "Background saving error"); 
1184         redisLog(REDIS_WARNING
, 
1185             "Background saving terminated by signal"); 
1186         rdbRemoveTempFile(server
.bgsavechildpid
); 
1188     server
.bgsavechildpid 
= -1; 
1189     /* Possibly there are slaves waiting for a BGSAVE in order to be served 
1190      * (the first stage of SYNC is a bulk transfer of dump.rdb) */ 
1191     updateSlavesWaitingBgsave(exitcode 
== 0 ? REDIS_OK 
: REDIS_ERR
); 
1194 /* A background append only file rewriting (BGREWRITEAOF) terminated its work. 
1196 void backgroundRewriteDoneHandler(int statloc
) { 
1197     int exitcode 
= WEXITSTATUS(statloc
); 
1198     int bysignal 
= WIFSIGNALED(statloc
); 
1200     if (!bysignal 
&& exitcode 
== 0) { 
1204         redisLog(REDIS_NOTICE
, 
1205             "Background append only file rewriting terminated with success"); 
1206         /* Now it's time to flush the differences accumulated by the parent */ 
1207         snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
); 
1208         fd 
= open(tmpfile
,O_WRONLY
|O_APPEND
); 
1210             redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
)); 
1213         /* Flush our data... */ 
1214         if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) != 
1215                 (signed) sdslen(server
.bgrewritebuf
)) { 
1216             redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
)); 
1220         redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
)); 
1221         /* Now our work is to rename the temp file into the stable file. And 
1222          * switch the file descriptor used by the server for append only. */ 
1223         if (rename(tmpfile
,server
.appendfilename
) == -1) { 
1224             redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
)); 
1228         /* Mission completed... almost */ 
1229         redisLog(REDIS_NOTICE
,"Append only file successfully rewritten."); 
1230         if (server
.appendfd 
!= -1) { 
1231             /* If append only is actually enabled... */ 
1232             close(server
.appendfd
); 
1233             server
.appendfd 
= fd
; 
1235             server
.appendseldb 
= -1; /* Make sure it will issue SELECT */ 
1236             redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends."); 
1238             /* If append only is disabled we just generate a dump in this 
1239              * format. Why not? */ 
1242     } else if (!bysignal 
&& exitcode 
!= 0) { 
1243         redisLog(REDIS_WARNING
, "Background append only file rewriting error"); 
1245         redisLog(REDIS_WARNING
, 
1246             "Background append only file rewriting terminated by signal"); 
1249     sdsfree(server
.bgrewritebuf
); 
1250     server
.bgrewritebuf 
= sdsempty(); 
1251     aofRemoveTempFile(server
.bgrewritechildpid
); 
1252     server
.bgrewritechildpid 
= -1; 
1255 static int serverCron(struct aeEventLoop 
*eventLoop
, long long id
, void *clientData
) { 
1256     int j
, loops 
= server
.cronloops
++; 
1257     REDIS_NOTUSED(eventLoop
); 
1259     REDIS_NOTUSED(clientData
); 
1261     /* We take a cached value of the unix time in the global state because 
1262      * with virtual memory and aging there is to store the current time 
1263      * in objects at every object access, and accuracy is not needed. 
1264      * To access a global var is faster than calling time(NULL) */ 
1265     server
.unixtime 
= time(NULL
); 
1267     /* Show some info about non-empty databases */ 
1268     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1269         long long size
, used
, vkeys
; 
1271         size 
= dictSlots(server
.db
[j
].dict
); 
1272         used 
= dictSize(server
.db
[j
].dict
); 
1273         vkeys 
= dictSize(server
.db
[j
].expires
); 
1274         if (!(loops 
% 5) && (used 
|| vkeys
)) { 
1275             redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
); 
1276             /* dictPrintStats(server.dict); */ 
1280     /* We don't want to resize the hash tables while a bacground saving 
1281      * is in progress: the saving child is created using fork() that is 
1282      * implemented with a copy-on-write semantic in most modern systems, so 
1283      * if we resize the HT while there is the saving child at work actually 
1284      * a lot of memory movements in the parent will cause a lot of pages 
1286     if (server
.bgsavechildpid 
== -1) tryResizeHashTables(); 
1288     /* Show information about connected clients */ 
1290         redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use, %d shared objects", 
1291             listLength(server
.clients
)-listLength(server
.slaves
), 
1292             listLength(server
.slaves
), 
1293             zmalloc_used_memory(), 
1294             dictSize(server
.sharingpool
)); 
1297     /* Close connections of timedout clients */ 
1298     if ((server
.maxidletime 
&& !(loops 
% 10)) || server
.blpop_blocked_clients
) 
1299         closeTimedoutClients(); 
1301     /* Check if a background saving or AOF rewrite in progress terminated */ 
1302     if (server
.bgsavechildpid 
!= -1 || server
.bgrewritechildpid 
!= -1) { 
1306         if ((pid 
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) { 
1307             if (pid 
== server
.bgsavechildpid
) { 
1308                 backgroundSaveDoneHandler(statloc
); 
1310                 backgroundRewriteDoneHandler(statloc
); 
1314         /* If there is not a background saving in progress check if 
1315          * we have to save now */ 
1316          time_t now 
= time(NULL
); 
1317          for (j 
= 0; j 
< server
.saveparamslen
; j
++) { 
1318             struct saveparam 
*sp 
= server
.saveparams
+j
; 
1320             if (server
.dirty 
>= sp
->changes 
&& 
1321                 now
-server
.lastsave 
> sp
->seconds
) { 
1322                 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...", 
1323                     sp
->changes
, sp
->seconds
); 
1324                 rdbSaveBackground(server
.dbfilename
); 
1330     /* Try to expire a few timed out keys. The algorithm used is adaptive and 
1331      * will use few CPU cycles if there are few expiring keys, otherwise 
1332      * it will get more aggressive to avoid that too much memory is used by 
1333      * keys that can be removed from the keyspace. */ 
1334     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1336         redisDb 
*db 
= server
.db
+j
; 
1338         /* Continue to expire if at the end of the cycle more than 25% 
1339          * of the keys were expired. */ 
1341             long num 
= dictSize(db
->expires
); 
1342             time_t now 
= time(NULL
); 
1345             if (num 
> REDIS_EXPIRELOOKUPS_PER_CRON
) 
1346                 num 
= REDIS_EXPIRELOOKUPS_PER_CRON
; 
1351                 if ((de 
= dictGetRandomKey(db
->expires
)) == NULL
) break; 
1352                 t 
= (time_t) dictGetEntryVal(de
); 
1354                     deleteKey(db
,dictGetEntryKey(de
)); 
1358         } while (expired 
> REDIS_EXPIRELOOKUPS_PER_CRON
/4); 
1361     /* Swap a few keys on disk if we are over the memory limit and VM 
1362      * is enbled. Try to free objects from the free list first. */ 
1363     if (vmCanSwapOut()) { 
1364         while (server
.vm_enabled 
&& zmalloc_used_memory() > 
1365                 server
.vm_max_memory
) 
1369             if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue; 
1370             retval 
= (server
.vm_max_threads 
== 0) ? 
1371                         vmSwapOneObjectBlocking() : 
1372                         vmSwapOneObjectThreaded(); 
1373             if (retval 
== REDIS_ERR 
&& (loops 
% 30) == 0 && 
1374                 zmalloc_used_memory() > 
1375                 (server
.vm_max_memory
+server
.vm_max_memory
/10)) 
1377                 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!"); 
1379             /* Note that when using threade I/O we free just one object, 
1380              * because anyway when the I/O thread in charge to swap this 
1381              * object out will finish, the handler of completed jobs 
1382              * will try to swap more objects if we are still out of memory. */ 
1383             if (retval 
== REDIS_ERR 
|| server
.vm_max_threads 
> 0) break; 
1387     /* Check if we should connect to a MASTER */ 
1388     if (server
.replstate 
== REDIS_REPL_CONNECT
) { 
1389         redisLog(REDIS_NOTICE
,"Connecting to MASTER..."); 
1390         if (syncWithMaster() == REDIS_OK
) { 
1391             redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded"); 
1397 /* This function gets called every time Redis is entering the 
1398  * main loop of the event driven library, that is, before to sleep 
1399  * for ready file descriptors. */ 
1400 static void beforeSleep(struct aeEventLoop 
*eventLoop
) { 
1401     REDIS_NOTUSED(eventLoop
); 
1403     if (server
.vm_enabled 
&& listLength(server
.io_ready_clients
)) { 
1407         listRewind(server
.io_ready_clients
,&li
); 
1408         while((ln 
= listNext(&li
))) { 
1409             redisClient 
*c 
= ln
->value
; 
1410             struct redisCommand 
*cmd
; 
1412             /* Resume the client. */ 
1413             listDelNode(server
.io_ready_clients
,ln
); 
1414             c
->flags 
&= (~REDIS_IO_WAIT
); 
1415             server
.vm_blocked_clients
--; 
1416             aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
, 
1417                 readQueryFromClient
, c
); 
1418             cmd 
= lookupCommand(c
->argv
[0]->ptr
); 
1419             assert(cmd 
!= NULL
); 
1422             /* There may be more data to process in the input buffer. */ 
1423             if (c
->querybuf 
&& sdslen(c
->querybuf
) > 0) 
1424                 processInputBuffer(c
); 
1429 static void createSharedObjects(void) { 
1430     shared
.crlf 
= createObject(REDIS_STRING
,sdsnew("\r\n")); 
1431     shared
.ok 
= createObject(REDIS_STRING
,sdsnew("+OK\r\n")); 
1432     shared
.err 
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n")); 
1433     shared
.emptybulk 
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n")); 
1434     shared
.czero 
= createObject(REDIS_STRING
,sdsnew(":0\r\n")); 
1435     shared
.cone 
= createObject(REDIS_STRING
,sdsnew(":1\r\n")); 
1436     shared
.nullbulk 
= createObject(REDIS_STRING
,sdsnew("$-1\r\n")); 
1437     shared
.nullmultibulk 
= createObject(REDIS_STRING
,sdsnew("*-1\r\n")); 
1438     shared
.emptymultibulk 
= createObject(REDIS_STRING
,sdsnew("*0\r\n")); 
1439     shared
.pong 
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n")); 
1440     shared
.queued 
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n")); 
1441     shared
.wrongtypeerr 
= createObject(REDIS_STRING
,sdsnew( 
1442         "-ERR Operation against a key holding the wrong kind of value\r\n")); 
1443     shared
.nokeyerr 
= createObject(REDIS_STRING
,sdsnew( 
1444         "-ERR no such key\r\n")); 
1445     shared
.syntaxerr 
= createObject(REDIS_STRING
,sdsnew( 
1446         "-ERR syntax error\r\n")); 
1447     shared
.sameobjecterr 
= createObject(REDIS_STRING
,sdsnew( 
1448         "-ERR source and destination objects are the same\r\n")); 
1449     shared
.outofrangeerr 
= createObject(REDIS_STRING
,sdsnew( 
1450         "-ERR index out of range\r\n")); 
1451     shared
.space 
= createObject(REDIS_STRING
,sdsnew(" ")); 
1452     shared
.colon 
= createObject(REDIS_STRING
,sdsnew(":")); 
1453     shared
.plus 
= createObject(REDIS_STRING
,sdsnew("+")); 
1454     shared
.select0 
= createStringObject("select 0\r\n",10); 
1455     shared
.select1 
= createStringObject("select 1\r\n",10); 
1456     shared
.select2 
= createStringObject("select 2\r\n",10); 
1457     shared
.select3 
= createStringObject("select 3\r\n",10); 
1458     shared
.select4 
= createStringObject("select 4\r\n",10); 
1459     shared
.select5 
= createStringObject("select 5\r\n",10); 
1460     shared
.select6 
= createStringObject("select 6\r\n",10); 
1461     shared
.select7 
= createStringObject("select 7\r\n",10); 
1462     shared
.select8 
= createStringObject("select 8\r\n",10); 
1463     shared
.select9 
= createStringObject("select 9\r\n",10); 
1466 static void appendServerSaveParams(time_t seconds
, int changes
) { 
1467     server
.saveparams 
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1)); 
1468     server
.saveparams
[server
.saveparamslen
].seconds 
= seconds
; 
1469     server
.saveparams
[server
.saveparamslen
].changes 
= changes
; 
1470     server
.saveparamslen
++; 
1473 static void resetServerSaveParams() { 
1474     zfree(server
.saveparams
); 
1475     server
.saveparams 
= NULL
; 
1476     server
.saveparamslen 
= 0; 
1479 static void initServerConfig() { 
1480     server
.dbnum 
= REDIS_DEFAULT_DBNUM
; 
1481     server
.port 
= REDIS_SERVERPORT
; 
1482     server
.verbosity 
= REDIS_VERBOSE
; 
1483     server
.maxidletime 
= REDIS_MAXIDLETIME
; 
1484     server
.saveparams 
= NULL
; 
1485     server
.logfile 
= NULL
; /* NULL = log on standard output */ 
1486     server
.bindaddr 
= NULL
; 
1487     server
.glueoutputbuf 
= 1; 
1488     server
.daemonize 
= 0; 
1489     server
.appendonly 
= 0; 
1490     server
.appendfsync 
= APPENDFSYNC_ALWAYS
; 
1491     server
.lastfsync 
= time(NULL
); 
1492     server
.appendfd 
= -1; 
1493     server
.appendseldb 
= -1; /* Make sure the first time will not match */ 
1494     server
.pidfile 
= "/var/run/redis.pid"; 
1495     server
.dbfilename 
= "dump.rdb"; 
1496     server
.appendfilename 
= "appendonly.aof"; 
1497     server
.requirepass 
= NULL
; 
1498     server
.shareobjects 
= 0; 
1499     server
.rdbcompression 
= 1; 
1500     server
.sharingpoolsize 
= 1024; 
1501     server
.maxclients 
= 0; 
1502     server
.blpop_blocked_clients 
= 0; 
1503     server
.maxmemory 
= 0; 
1504     server
.vm_enabled 
= 0; 
1505     server
.vm_swap_file 
= zstrdup("/tmp/redis-%p.vm"); 
1506     server
.vm_page_size 
= 256;          /* 256 bytes per page */ 
1507     server
.vm_pages 
= 1024*1024*100;    /* 104 millions of pages */ 
1508     server
.vm_max_memory 
= 1024LL*1024*1024*1; /* 1 GB of RAM */ 
1509     server
.vm_max_threads 
= 4; 
1510     server
.vm_blocked_clients 
= 0; 
1511     server
.hash_max_zipmap_entries 
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
; 
1512     server
.hash_max_zipmap_value 
= REDIS_HASH_MAX_ZIPMAP_VALUE
; 
1514     resetServerSaveParams(); 
1516     appendServerSaveParams(60*60,1);  /* save after 1 hour and 1 change */ 
1517     appendServerSaveParams(300,100);  /* save after 5 minutes and 100 changes */ 
1518     appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */ 
1519     /* Replication related */ 
1521     server
.masterauth 
= NULL
; 
1522     server
.masterhost 
= NULL
; 
1523     server
.masterport 
= 6379; 
1524     server
.master 
= NULL
; 
1525     server
.replstate 
= REDIS_REPL_NONE
; 
1527     /* Double constants initialization */ 
1529     R_PosInf 
= 1.0/R_Zero
; 
1530     R_NegInf 
= -1.0/R_Zero
; 
1531     R_Nan 
= R_Zero
/R_Zero
; 
1534 static void initServer() { 
1537     signal(SIGHUP
, SIG_IGN
); 
1538     signal(SIGPIPE
, SIG_IGN
); 
1539     setupSigSegvAction(); 
1541     server
.devnull 
= fopen("/dev/null","w"); 
1542     if (server
.devnull 
== NULL
) { 
1543         redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
); 
1546     server
.clients 
= listCreate(); 
1547     server
.slaves 
= listCreate(); 
1548     server
.monitors 
= listCreate(); 
1549     server
.objfreelist 
= listCreate(); 
1550     createSharedObjects(); 
1551     server
.el 
= aeCreateEventLoop(); 
1552     server
.db 
= zmalloc(sizeof(redisDb
)*server
.dbnum
); 
1553     server
.sharingpool 
= dictCreate(&setDictType
,NULL
); 
1554     server
.fd 
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
); 
1555     if (server
.fd 
== -1) { 
1556         redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
); 
1559     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1560         server
.db
[j
].dict 
= dictCreate(&dbDictType
,NULL
); 
1561         server
.db
[j
].expires 
= dictCreate(&keyptrDictType
,NULL
); 
1562         server
.db
[j
].blockingkeys 
= dictCreate(&keylistDictType
,NULL
); 
1563         if (server
.vm_enabled
) 
1564             server
.db
[j
].io_keys 
= dictCreate(&keylistDictType
,NULL
); 
1565         server
.db
[j
].id 
= j
; 
1567     server
.cronloops 
= 0; 
1568     server
.bgsavechildpid 
= -1; 
1569     server
.bgrewritechildpid 
= -1; 
1570     server
.bgrewritebuf 
= sdsempty(); 
1571     server
.lastsave 
= time(NULL
); 
1573     server
.stat_numcommands 
= 0; 
1574     server
.stat_numconnections 
= 0; 
1575     server
.stat_starttime 
= time(NULL
); 
1576     server
.unixtime 
= time(NULL
); 
1577     aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
); 
1578     if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
, 
1579         acceptHandler
, NULL
) == AE_ERR
) oom("creating file event"); 
1581     if (server
.appendonly
) { 
1582         server
.appendfd 
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644); 
1583         if (server
.appendfd 
== -1) { 
1584             redisLog(REDIS_WARNING
, "Can't open the append-only file: %s", 
1590     if (server
.vm_enabled
) vmInit(); 
1593 /* Empty the whole database */ 
1594 static long long emptyDb() { 
1596     long long removed 
= 0; 
1598     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1599         removed 
+= dictSize(server
.db
[j
].dict
); 
1600         dictEmpty(server
.db
[j
].dict
); 
1601         dictEmpty(server
.db
[j
].expires
); 
1606 static int yesnotoi(char *s
) { 
1607     if (!strcasecmp(s
,"yes")) return 1; 
1608     else if (!strcasecmp(s
,"no")) return 0; 
1612 /* I agree, this is a very rudimental way to load a configuration... 
1613    will improve later if the config gets more complex */ 
1614 static void loadServerConfig(char *filename
) { 
1616     char buf
[REDIS_CONFIGLINE_MAX
+1], *err 
= NULL
; 
1620     if (filename
[0] == '-' && filename
[1] == '\0') 
1623         if ((fp 
= fopen(filename
,"r")) == NULL
) { 
1624             redisLog(REDIS_WARNING
,"Fatal error, can't open config file"); 
1629     while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) { 
1635         line 
= sdstrim(line
," \t\r\n"); 
1637         /* Skip comments and blank lines*/ 
1638         if (line
[0] == '#' || line
[0] == '\0') { 
1643         /* Split into arguments */ 
1644         argv 
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
); 
1645         sdstolower(argv
[0]); 
1647         /* Execute config directives */ 
1648         if (!strcasecmp(argv
[0],"timeout") && argc 
== 2) { 
1649             server
.maxidletime 
= atoi(argv
[1]); 
1650             if (server
.maxidletime 
< 0) { 
1651                 err 
= "Invalid timeout value"; goto loaderr
; 
1653         } else if (!strcasecmp(argv
[0],"port") && argc 
== 2) { 
1654             server
.port 
= atoi(argv
[1]); 
1655             if (server
.port 
< 1 || server
.port 
> 65535) { 
1656                 err 
= "Invalid port"; goto loaderr
; 
1658         } else if (!strcasecmp(argv
[0],"bind") && argc 
== 2) { 
1659             server
.bindaddr 
= zstrdup(argv
[1]); 
1660         } else if (!strcasecmp(argv
[0],"save") && argc 
== 3) { 
1661             int seconds 
= atoi(argv
[1]); 
1662             int changes 
= atoi(argv
[2]); 
1663             if (seconds 
< 1 || changes 
< 0) { 
1664                 err 
= "Invalid save parameters"; goto loaderr
; 
1666             appendServerSaveParams(seconds
,changes
); 
1667         } else if (!strcasecmp(argv
[0],"dir") && argc 
== 2) { 
1668             if (chdir(argv
[1]) == -1) { 
1669                 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s", 
1670                     argv
[1], strerror(errno
)); 
1673         } else if (!strcasecmp(argv
[0],"loglevel") && argc 
== 2) { 
1674             if (!strcasecmp(argv
[1],"debug")) server
.verbosity 
= REDIS_DEBUG
; 
1675             else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity 
= REDIS_VERBOSE
; 
1676             else if (!strcasecmp(argv
[1],"notice")) server
.verbosity 
= REDIS_NOTICE
; 
1677             else if (!strcasecmp(argv
[1],"warning")) server
.verbosity 
= REDIS_WARNING
; 
1679                 err 
= "Invalid log level. Must be one of debug, notice, warning"; 
1682         } else if (!strcasecmp(argv
[0],"logfile") && argc 
== 2) { 
1685             server
.logfile 
= zstrdup(argv
[1]); 
1686             if (!strcasecmp(server
.logfile
,"stdout")) { 
1687                 zfree(server
.logfile
); 
1688                 server
.logfile 
= NULL
; 
1690             if (server
.logfile
) { 
1691                 /* Test if we are able to open the file. The server will not 
1692                  * be able to abort just for this problem later... */ 
1693                 logfp 
= fopen(server
.logfile
,"a"); 
1694                 if (logfp 
== NULL
) { 
1695                     err 
= sdscatprintf(sdsempty(), 
1696                         "Can't open the log file: %s", strerror(errno
)); 
1701         } else if (!strcasecmp(argv
[0],"databases") && argc 
== 2) { 
1702             server
.dbnum 
= atoi(argv
[1]); 
1703             if (server
.dbnum 
< 1) { 
1704                 err 
= "Invalid number of databases"; goto loaderr
; 
1706         } else if (!strcasecmp(argv
[0],"maxclients") && argc 
== 2) { 
1707             server
.maxclients 
= atoi(argv
[1]); 
1708         } else if (!strcasecmp(argv
[0],"maxmemory") && argc 
== 2) { 
1709             server
.maxmemory 
= strtoll(argv
[1], NULL
, 10); 
1710         } else if (!strcasecmp(argv
[0],"slaveof") && argc 
== 3) { 
1711             server
.masterhost 
= sdsnew(argv
[1]); 
1712             server
.masterport 
= atoi(argv
[2]); 
1713             server
.replstate 
= REDIS_REPL_CONNECT
; 
1714         } else if (!strcasecmp(argv
[0],"masterauth") && argc 
== 2) { 
1715                 server
.masterauth 
= zstrdup(argv
[1]); 
1716         } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc 
== 2) { 
1717             if ((server
.glueoutputbuf 
= yesnotoi(argv
[1])) == -1) { 
1718                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1720         } else if (!strcasecmp(argv
[0],"shareobjects") && argc 
== 2) { 
1721             if ((server
.shareobjects 
= yesnotoi(argv
[1])) == -1) { 
1722                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1724         } else if (!strcasecmp(argv
[0],"rdbcompression") && argc 
== 2) { 
1725             if ((server
.rdbcompression 
= yesnotoi(argv
[1])) == -1) { 
1726                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1728         } else if (!strcasecmp(argv
[0],"shareobjectspoolsize") && argc 
== 2) { 
1729             server
.sharingpoolsize 
= atoi(argv
[1]); 
1730             if (server
.sharingpoolsize 
< 1) { 
1731                 err 
= "invalid object sharing pool size"; goto loaderr
; 
1733         } else if (!strcasecmp(argv
[0],"daemonize") && argc 
== 2) { 
1734             if ((server
.daemonize 
= yesnotoi(argv
[1])) == -1) { 
1735                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1737         } else if (!strcasecmp(argv
[0],"appendonly") && argc 
== 2) { 
1738             if ((server
.appendonly 
= yesnotoi(argv
[1])) == -1) { 
1739                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1741         } else if (!strcasecmp(argv
[0],"appendfsync") && argc 
== 2) { 
1742             if (!strcasecmp(argv
[1],"no")) { 
1743                 server
.appendfsync 
= APPENDFSYNC_NO
; 
1744             } else if (!strcasecmp(argv
[1],"always")) { 
1745                 server
.appendfsync 
= APPENDFSYNC_ALWAYS
; 
1746             } else if (!strcasecmp(argv
[1],"everysec")) { 
1747                 server
.appendfsync 
= APPENDFSYNC_EVERYSEC
; 
1749                 err 
= "argument must be 'no', 'always' or 'everysec'"; 
1752         } else if (!strcasecmp(argv
[0],"requirepass") && argc 
== 2) { 
1753             server
.requirepass 
= zstrdup(argv
[1]); 
1754         } else if (!strcasecmp(argv
[0],"pidfile") && argc 
== 2) { 
1755             server
.pidfile 
= zstrdup(argv
[1]); 
1756         } else if (!strcasecmp(argv
[0],"dbfilename") && argc 
== 2) { 
1757             server
.dbfilename 
= zstrdup(argv
[1]); 
1758         } else if (!strcasecmp(argv
[0],"vm-enabled") && argc 
== 2) { 
1759             if ((server
.vm_enabled 
= yesnotoi(argv
[1])) == -1) { 
1760                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1762         } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc 
== 2) { 
1763             zfree(server
.vm_swap_file
); 
1764             server
.vm_swap_file 
= zstrdup(argv
[1]); 
1765         } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc 
== 2) { 
1766             server
.vm_max_memory 
= strtoll(argv
[1], NULL
, 10); 
1767         } else if (!strcasecmp(argv
[0],"vm-page-size") && argc 
== 2) { 
1768             server
.vm_page_size 
= strtoll(argv
[1], NULL
, 10); 
1769         } else if (!strcasecmp(argv
[0],"vm-pages") && argc 
== 2) { 
1770             server
.vm_pages 
= strtoll(argv
[1], NULL
, 10); 
1771         } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc 
== 2) { 
1772             server
.vm_max_threads 
= strtoll(argv
[1], NULL
, 10); 
1773         } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc 
== 2){ 
1774             server
.hash_max_zipmap_entries 
= strtol(argv
[1], NULL
, 10); 
1775         } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc 
== 2){ 
1776             server
.hash_max_zipmap_value 
= strtol(argv
[1], NULL
, 10); 
1777         } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc 
== 2) { 
1778             server
.vm_max_threads 
= strtoll(argv
[1], NULL
, 10); 
1780             err 
= "Bad directive or wrong number of arguments"; goto loaderr
; 
1782         for (j 
= 0; j 
< argc
; j
++) 
1787     if (fp 
!= stdin
) fclose(fp
); 
1791     fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n"); 
1792     fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
); 
1793     fprintf(stderr
, ">>> '%s'\n", line
); 
1794     fprintf(stderr
, "%s\n", err
); 
1798 static void freeClientArgv(redisClient 
*c
) { 
1801     for (j 
= 0; j 
< c
->argc
; j
++) 
1802         decrRefCount(c
->argv
[j
]); 
1803     for (j 
= 0; j 
< c
->mbargc
; j
++) 
1804         decrRefCount(c
->mbargv
[j
]); 
1809 static void freeClient(redisClient 
*c
) { 
1812     /* Note that if the client we are freeing is blocked into a blocking 
1813      * call, we have to set querybuf to NULL *before* to call 
1814      * unblockClientWaitingData() to avoid processInputBuffer() will get 
1815      * called. Also it is important to remove the file events after 
1816      * this, because this call adds the READABLE event. */ 
1817     sdsfree(c
->querybuf
); 
1819     if (c
->flags 
& REDIS_BLOCKED
) 
1820         unblockClientWaitingData(c
); 
1822     aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
); 
1823     aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
); 
1824     listRelease(c
->reply
); 
1827     /* Remove from the list of clients */ 
1828     ln 
= listSearchKey(server
.clients
,c
); 
1829     redisAssert(ln 
!= NULL
); 
1830     listDelNode(server
.clients
,ln
); 
1831     /* Remove from the list of clients waiting for swapped keys */ 
1832     if (c
->flags 
& REDIS_IO_WAIT 
&& listLength(c
->io_keys
) == 0) { 
1833         ln 
= listSearchKey(server
.io_ready_clients
,c
); 
1835             listDelNode(server
.io_ready_clients
,ln
); 
1836             server
.vm_blocked_clients
--; 
1839     while (server
.vm_enabled 
&& listLength(c
->io_keys
)) { 
1840         ln 
= listFirst(c
->io_keys
); 
1841         dontWaitForSwappedKey(c
,ln
->value
); 
1843     listRelease(c
->io_keys
); 
1845     if (c
->flags 
& REDIS_SLAVE
) { 
1846         if (c
->replstate 
== REDIS_REPL_SEND_BULK 
&& c
->repldbfd 
!= -1) 
1848         list 
*l 
= (c
->flags 
& REDIS_MONITOR
) ? server
.monitors 
: server
.slaves
; 
1849         ln 
= listSearchKey(l
,c
); 
1850         redisAssert(ln 
!= NULL
); 
1853     if (c
->flags 
& REDIS_MASTER
) { 
1854         server
.master 
= NULL
; 
1855         server
.replstate 
= REDIS_REPL_CONNECT
; 
1859     freeClientMultiState(c
); 
1863 #define GLUEREPLY_UP_TO (1024) 
1864 static void glueReplyBuffersIfNeeded(redisClient 
*c
) { 
1866     char buf
[GLUEREPLY_UP_TO
]; 
1871     listRewind(c
->reply
,&li
); 
1872     while((ln 
= listNext(&li
))) { 
1876         objlen 
= sdslen(o
->ptr
); 
1877         if (copylen 
+ objlen 
<= GLUEREPLY_UP_TO
) { 
1878             memcpy(buf
+copylen
,o
->ptr
,objlen
); 
1880             listDelNode(c
->reply
,ln
); 
1882             if (copylen 
== 0) return; 
1886     /* Now the output buffer is empty, add the new single element */ 
1887     o 
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
)); 
1888     listAddNodeHead(c
->reply
,o
); 
1891 static void sendReplyToClient(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
1892     redisClient 
*c 
= privdata
; 
1893     int nwritten 
= 0, totwritten 
= 0, objlen
; 
1896     REDIS_NOTUSED(mask
); 
1898     /* Use writev() if we have enough buffers to send */ 
1899     if (!server
.glueoutputbuf 
&& 
1900         listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD 
&&  
1901         !(c
->flags 
& REDIS_MASTER
)) 
1903         sendReplyToClientWritev(el
, fd
, privdata
, mask
); 
1907     while(listLength(c
->reply
)) { 
1908         if (server
.glueoutputbuf 
&& listLength(c
->reply
) > 1) 
1909             glueReplyBuffersIfNeeded(c
); 
1911         o 
= listNodeValue(listFirst(c
->reply
)); 
1912         objlen 
= sdslen(o
->ptr
); 
1915             listDelNode(c
->reply
,listFirst(c
->reply
)); 
1919         if (c
->flags 
& REDIS_MASTER
) { 
1920             /* Don't reply to a master */ 
1921             nwritten 
= objlen 
- c
->sentlen
; 
1923             nwritten 
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen 
- c
->sentlen
); 
1924             if (nwritten 
<= 0) break; 
1926         c
->sentlen 
+= nwritten
; 
1927         totwritten 
+= nwritten
; 
1928         /* If we fully sent the object on head go to the next one */ 
1929         if (c
->sentlen 
== objlen
) { 
1930             listDelNode(c
->reply
,listFirst(c
->reply
)); 
1933         /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT 
1934          * bytes, in a single threaded server it's a good idea to serve 
1935          * other clients as well, even if a very large request comes from 
1936          * super fast link that is always able to accept data (in real world 
1937          * scenario think about 'KEYS *' against the loopback interfae) */ 
1938         if (totwritten 
> REDIS_MAX_WRITE_PER_EVENT
) break; 
1940     if (nwritten 
== -1) { 
1941         if (errno 
== EAGAIN
) { 
1944             redisLog(REDIS_VERBOSE
, 
1945                 "Error writing to client: %s", strerror(errno
)); 
1950     if (totwritten 
> 0) c
->lastinteraction 
= time(NULL
); 
1951     if (listLength(c
->reply
) == 0) { 
1953         aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
); 
1957 static void sendReplyToClientWritev(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) 
1959     redisClient 
*c 
= privdata
; 
1960     int nwritten 
= 0, totwritten 
= 0, objlen
, willwrite
; 
1962     struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
]; 
1963     int offset
, ion 
= 0; 
1965     REDIS_NOTUSED(mask
); 
1968     while (listLength(c
->reply
)) { 
1969         offset 
= c
->sentlen
; 
1973         /* fill-in the iov[] array */ 
1974         for(node 
= listFirst(c
->reply
); node
; node 
= listNextNode(node
)) { 
1975             o 
= listNodeValue(node
); 
1976             objlen 
= sdslen(o
->ptr
); 
1978             if (totwritten 
+ objlen 
- offset 
> REDIS_MAX_WRITE_PER_EVENT
)  
1981             if(ion 
== REDIS_WRITEV_IOVEC_COUNT
) 
1982                 break; /* no more iovecs */ 
1984             iov
[ion
].iov_base 
= ((char*)o
->ptr
) + offset
; 
1985             iov
[ion
].iov_len 
= objlen 
- offset
; 
1986             willwrite 
+= objlen 
- offset
; 
1987             offset 
= 0; /* just for the first item */ 
1994         /* write all collected blocks at once */ 
1995         if((nwritten 
= writev(fd
, iov
, ion
)) < 0) { 
1996             if (errno 
!= EAGAIN
) { 
1997                 redisLog(REDIS_VERBOSE
, 
1998                          "Error writing to client: %s", strerror(errno
)); 
2005         totwritten 
+= nwritten
; 
2006         offset 
= c
->sentlen
; 
2008         /* remove written robjs from c->reply */ 
2009         while (nwritten 
&& listLength(c
->reply
)) { 
2010             o 
= listNodeValue(listFirst(c
->reply
)); 
2011             objlen 
= sdslen(o
->ptr
); 
2013             if(nwritten 
>= objlen 
- offset
) { 
2014                 listDelNode(c
->reply
, listFirst(c
->reply
)); 
2015                 nwritten 
-= objlen 
- offset
; 
2019                 c
->sentlen 
+= nwritten
; 
2027         c
->lastinteraction 
= time(NULL
); 
2029     if (listLength(c
->reply
) == 0) { 
2031         aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
); 
2035 static struct redisCommand 
*lookupCommand(char *name
) { 
2037     while(cmdTable
[j
].name 
!= NULL
) { 
2038         if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
]; 
2044 /* resetClient prepare the client to process the next command */ 
2045 static void resetClient(redisClient 
*c
) { 
2051 /* Call() is the core of Redis execution of a command */ 
2052 static void call(redisClient 
*c
, struct redisCommand 
*cmd
) { 
2055     dirty 
= server
.dirty
; 
2057     if (server
.appendonly 
&& server
.dirty
-dirty
) 
2058         feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
); 
2059     if (server
.dirty
-dirty 
&& listLength(server
.slaves
)) 
2060         replicationFeedSlaves(server
.slaves
,cmd
,c
->db
->id
,c
->argv
,c
->argc
); 
2061     if (listLength(server
.monitors
)) 
2062         replicationFeedSlaves(server
.monitors
,cmd
,c
->db
->id
,c
->argv
,c
->argc
); 
2063     server
.stat_numcommands
++; 
2066 /* If this function gets called we already read a whole 
2067  * command, argments are in the client argv/argc fields. 
2068  * processCommand() execute the command or prepare the 
2069  * server for a bulk read from the client. 
2071  * If 1 is returned the client is still alive and valid and 
2072  * and other operations can be performed by the caller. Otherwise 
2073  * if 0 is returned the client was destroied (i.e. after QUIT). */ 
2074 static int processCommand(redisClient 
*c
) { 
2075     struct redisCommand 
*cmd
; 
2077     /* Free some memory if needed (maxmemory setting) */ 
2078     if (server
.maxmemory
) freeMemoryIfNeeded(); 
2080     /* Handle the multi bulk command type. This is an alternative protocol 
2081      * supported by Redis in order to receive commands that are composed of 
2082      * multiple binary-safe "bulk" arguments. The latency of processing is 
2083      * a bit higher but this allows things like multi-sets, so if this 
2084      * protocol is used only for MSET and similar commands this is a big win. */ 
2085     if (c
->multibulk 
== 0 && c
->argc 
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') { 
2086         c
->multibulk 
= atoi(((char*)c
->argv
[0]->ptr
)+1); 
2087         if (c
->multibulk 
<= 0) { 
2091             decrRefCount(c
->argv
[c
->argc
-1]); 
2095     } else if (c
->multibulk
) { 
2096         if (c
->bulklen 
== -1) { 
2097             if (((char*)c
->argv
[0]->ptr
)[0] != '$') { 
2098                 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n")); 
2102                 int bulklen 
= atoi(((char*)c
->argv
[0]->ptr
)+1); 
2103                 decrRefCount(c
->argv
[0]); 
2104                 if (bulklen 
< 0 || bulklen 
> 1024*1024*1024) { 
2106                     addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n")); 
2111                 c
->bulklen 
= bulklen
+2; /* add two bytes for CR+LF */ 
2115             c
->mbargv 
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1)); 
2116             c
->mbargv
[c
->mbargc
] = c
->argv
[0]; 
2120             if (c
->multibulk 
== 0) { 
2124                 /* Here we need to swap the multi-bulk argc/argv with the 
2125                  * normal argc/argv of the client structure. */ 
2127                 c
->argv 
= c
->mbargv
; 
2128                 c
->mbargv 
= auxargv
; 
2131                 c
->argc 
= c
->mbargc
; 
2132                 c
->mbargc 
= auxargc
; 
2134                 /* We need to set bulklen to something different than -1 
2135                  * in order for the code below to process the command without 
2136                  * to try to read the last argument of a bulk command as 
2137                  * a special argument. */ 
2139                 /* continue below and process the command */ 
2146     /* -- end of multi bulk commands processing -- */ 
2148     /* The QUIT command is handled as a special case. Normal command 
2149      * procs are unable to close the client connection safely */ 
2150     if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) { 
2155     /* Now lookup the command and check ASAP about trivial error conditions 
2156      * such wrong arity, bad command name and so forth. */ 
2157     cmd 
= lookupCommand(c
->argv
[0]->ptr
); 
2160             sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n", 
2161                 (char*)c
->argv
[0]->ptr
)); 
2164     } else if ((cmd
->arity 
> 0 && cmd
->arity 
!= c
->argc
) || 
2165                (c
->argc 
< -cmd
->arity
)) { 
2167             sdscatprintf(sdsempty(), 
2168                 "-ERR wrong number of arguments for '%s' command\r\n", 
2172     } else if (server
.maxmemory 
&& cmd
->flags 
& REDIS_CMD_DENYOOM 
&& zmalloc_used_memory() > server
.maxmemory
) { 
2173         addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n")); 
2176     } else if (cmd
->flags 
& REDIS_CMD_BULK 
&& c
->bulklen 
== -1) { 
2177         /* This is a bulk command, we have to read the last argument yet. */ 
2178         int bulklen 
= atoi(c
->argv
[c
->argc
-1]->ptr
); 
2180         decrRefCount(c
->argv
[c
->argc
-1]); 
2181         if (bulklen 
< 0 || bulklen 
> 1024*1024*1024) { 
2183             addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n")); 
2188         c
->bulklen 
= bulklen
+2; /* add two bytes for CR+LF */ 
2189         /* It is possible that the bulk read is already in the 
2190          * buffer. Check this condition and handle it accordingly. 
2191          * This is just a fast path, alternative to call processInputBuffer(). 
2192          * It's a good idea since the code is small and this condition 
2193          * happens most of the times. */ 
2194         if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) { 
2195             c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2); 
2197             c
->querybuf 
= sdsrange(c
->querybuf
,c
->bulklen
,-1); 
2199             /* Otherwise return... there is to read the last argument 
2200              * from the socket. */ 
2204     /* Let's try to share objects on the command arguments vector */ 
2205     if (server
.shareobjects
) { 
2207         for(j 
= 1; j 
< c
->argc
; j
++) 
2208             c
->argv
[j
] = tryObjectSharing(c
->argv
[j
]); 
2210     /* Let's try to encode the bulk object to save space. */ 
2211     if (cmd
->flags 
& REDIS_CMD_BULK
) 
2212         tryObjectEncoding(c
->argv
[c
->argc
-1]); 
2214     /* Check if the user is authenticated */ 
2215     if (server
.requirepass 
&& !c
->authenticated 
&& cmd
->proc 
!= authCommand
) { 
2216         addReplySds(c
,sdsnew("-ERR operation not permitted\r\n")); 
2221     /* Exec the command */ 
2222     if (c
->flags 
& REDIS_MULTI 
&& cmd
->proc 
!= execCommand 
&& cmd
->proc 
!= discardCommand
) { 
2223         queueMultiCommand(c
,cmd
); 
2224         addReply(c
,shared
.queued
); 
2226         if (server
.vm_enabled 
&& server
.vm_max_threads 
> 0 && 
2227             blockClientOnSwappedKeys(cmd
,c
)) return 1; 
2231     /* Prepare the client for the next command */ 
2236 static void replicationFeedSlaves(list 
*slaves
, struct redisCommand 
*cmd
, int dictid
, robj 
**argv
, int argc
) { 
2241     /* (args*2)+1 is enough room for args, spaces, newlines */ 
2242     robj 
*static_outv
[REDIS_STATIC_ARGS
*2+1]; 
2244     if (argc 
<= REDIS_STATIC_ARGS
) { 
2247         outv 
= zmalloc(sizeof(robj
*)*(argc
*2+1)); 
2250     for (j 
= 0; j 
< argc
; j
++) { 
2251         if (j 
!= 0) outv
[outc
++] = shared
.space
; 
2252         if ((cmd
->flags 
& REDIS_CMD_BULK
) && j 
== argc
-1) { 
2255             lenobj 
= createObject(REDIS_STRING
, 
2256                 sdscatprintf(sdsempty(),"%lu\r\n", 
2257                     (unsigned long) stringObjectLen(argv
[j
]))); 
2258             lenobj
->refcount 
= 0; 
2259             outv
[outc
++] = lenobj
; 
2261         outv
[outc
++] = argv
[j
]; 
2263     outv
[outc
++] = shared
.crlf
; 
2265     /* Increment all the refcounts at start and decrement at end in order to 
2266      * be sure to free objects if there is no slave in a replication state 
2267      * able to be feed with commands */ 
2268     for (j 
= 0; j 
< outc
; j
++) incrRefCount(outv
[j
]); 
2269     listRewind(slaves
,&li
); 
2270     while((ln 
= listNext(&li
))) { 
2271         redisClient 
*slave 
= ln
->value
; 
2273         /* Don't feed slaves that are still waiting for BGSAVE to start */ 
2274         if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_START
) continue; 
2276         /* Feed all the other slaves, MONITORs and so on */ 
2277         if (slave
->slaveseldb 
!= dictid
) { 
2281             case 0: selectcmd 
= shared
.select0
; break; 
2282             case 1: selectcmd 
= shared
.select1
; break; 
2283             case 2: selectcmd 
= shared
.select2
; break; 
2284             case 3: selectcmd 
= shared
.select3
; break; 
2285             case 4: selectcmd 
= shared
.select4
; break; 
2286             case 5: selectcmd 
= shared
.select5
; break; 
2287             case 6: selectcmd 
= shared
.select6
; break; 
2288             case 7: selectcmd 
= shared
.select7
; break; 
2289             case 8: selectcmd 
= shared
.select8
; break; 
2290             case 9: selectcmd 
= shared
.select9
; break; 
2292                 selectcmd 
= createObject(REDIS_STRING
, 
2293                     sdscatprintf(sdsempty(),"select %d\r\n",dictid
)); 
2294                 selectcmd
->refcount 
= 0; 
2297             addReply(slave
,selectcmd
); 
2298             slave
->slaveseldb 
= dictid
; 
2300         for (j 
= 0; j 
< outc
; j
++) addReply(slave
,outv
[j
]); 
2302     for (j 
= 0; j 
< outc
; j
++) decrRefCount(outv
[j
]); 
2303     if (outv 
!= static_outv
) zfree(outv
); 
2306 static void processInputBuffer(redisClient 
*c
) { 
2308     /* Before to process the input buffer, make sure the client is not 
2309      * waitig for a blocking operation such as BLPOP. Note that the first 
2310      * iteration the client is never blocked, otherwise the processInputBuffer 
2311      * would not be called at all, but after the execution of the first commands 
2312      * in the input buffer the client may be blocked, and the "goto again" 
2313      * will try to reiterate. The following line will make it return asap. */ 
2314     if (c
->flags 
& REDIS_BLOCKED 
|| c
->flags 
& REDIS_IO_WAIT
) return; 
2315     if (c
->bulklen 
== -1) { 
2316         /* Read the first line of the query */ 
2317         char *p 
= strchr(c
->querybuf
,'\n'); 
2324             query 
= c
->querybuf
; 
2325             c
->querybuf 
= sdsempty(); 
2326             querylen 
= 1+(p
-(query
)); 
2327             if (sdslen(query
) > querylen
) { 
2328                 /* leave data after the first line of the query in the buffer */ 
2329                 c
->querybuf 
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
); 
2331             *p 
= '\0'; /* remove "\n" */ 
2332             if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */ 
2333             sdsupdatelen(query
); 
2335             /* Now we can split the query in arguments */ 
2336             argv 
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
); 
2339             if (c
->argv
) zfree(c
->argv
); 
2340             c
->argv 
= zmalloc(sizeof(robj
*)*argc
); 
2342             for (j 
= 0; j 
< argc
; j
++) { 
2343                 if (sdslen(argv
[j
])) { 
2344                     c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]); 
2352                 /* Execute the command. If the client is still valid 
2353                  * after processCommand() return and there is something 
2354                  * on the query buffer try to process the next command. */ 
2355                 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
; 
2357                 /* Nothing to process, argc == 0. Just process the query 
2358                  * buffer if it's not empty or return to the caller */ 
2359                 if (sdslen(c
->querybuf
)) goto again
; 
2362         } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) { 
2363             redisLog(REDIS_VERBOSE
, "Client protocol error"); 
2368         /* Bulk read handling. Note that if we are at this point 
2369            the client already sent a command terminated with a newline, 
2370            we are reading the bulk data that is actually the last 
2371            argument of the command. */ 
2372         int qbl 
= sdslen(c
->querybuf
); 
2374         if (c
->bulklen 
<= qbl
) { 
2375             /* Copy everything but the final CRLF as final argument */ 
2376             c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2); 
2378             c
->querybuf 
= sdsrange(c
->querybuf
,c
->bulklen
,-1); 
2379             /* Process the command. If the client is still valid after 
2380              * the processing and there is more data in the buffer 
2381              * try to parse it. */ 
2382             if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
; 
2388 static void readQueryFromClient(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
2389     redisClient 
*c 
= (redisClient
*) privdata
; 
2390     char buf
[REDIS_IOBUF_LEN
]; 
2393     REDIS_NOTUSED(mask
); 
2395     nread 
= read(fd
, buf
, REDIS_IOBUF_LEN
); 
2397         if (errno 
== EAGAIN
) { 
2400             redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
)); 
2404     } else if (nread 
== 0) { 
2405         redisLog(REDIS_VERBOSE
, "Client closed connection"); 
2410         c
->querybuf 
= sdscatlen(c
->querybuf
, buf
, nread
); 
2411         c
->lastinteraction 
= time(NULL
); 
2415     if (!(c
->flags 
& REDIS_BLOCKED
)) 
2416         processInputBuffer(c
); 
2419 static int selectDb(redisClient 
*c
, int id
) { 
2420     if (id 
< 0 || id 
>= server
.dbnum
) 
2422     c
->db 
= &server
.db
[id
]; 
2426 static void *dupClientReplyValue(void *o
) { 
2427     incrRefCount((robj
*)o
); 
2431 static redisClient 
*createClient(int fd
) { 
2432     redisClient 
*c 
= zmalloc(sizeof(*c
)); 
2434     anetNonBlock(NULL
,fd
); 
2435     anetTcpNoDelay(NULL
,fd
); 
2436     if (!c
) return NULL
; 
2439     c
->querybuf 
= sdsempty(); 
2448     c
->lastinteraction 
= time(NULL
); 
2449     c
->authenticated 
= 0; 
2450     c
->replstate 
= REDIS_REPL_NONE
; 
2451     c
->reply 
= listCreate(); 
2452     listSetFreeMethod(c
->reply
,decrRefCount
); 
2453     listSetDupMethod(c
->reply
,dupClientReplyValue
); 
2454     c
->blockingkeys 
= NULL
; 
2455     c
->blockingkeysnum 
= 0; 
2456     c
->io_keys 
= listCreate(); 
2457     listSetFreeMethod(c
->io_keys
,decrRefCount
); 
2458     if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
, 
2459         readQueryFromClient
, c
) == AE_ERR
) { 
2463     listAddNodeTail(server
.clients
,c
); 
2464     initClientMultiState(c
); 
2468 static void addReply(redisClient 
*c
, robj 
*obj
) { 
2469     if (listLength(c
->reply
) == 0 && 
2470         (c
->replstate 
== REDIS_REPL_NONE 
|| 
2471          c
->replstate 
== REDIS_REPL_ONLINE
) && 
2472         aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
, 
2473         sendReplyToClient
, c
) == AE_ERR
) return; 
2475     if (server
.vm_enabled 
&& obj
->storage 
!= REDIS_VM_MEMORY
) { 
2476         obj 
= dupStringObject(obj
); 
2477         obj
->refcount 
= 0; /* getDecodedObject() will increment the refcount */ 
2479     listAddNodeTail(c
->reply
,getDecodedObject(obj
)); 
2482 static void addReplySds(redisClient 
*c
, sds s
) { 
2483     robj 
*o 
= createObject(REDIS_STRING
,s
); 
2488 static void addReplyDouble(redisClient 
*c
, double d
) { 
2491     snprintf(buf
,sizeof(buf
),"%.17g",d
); 
2492     addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n", 
2493         (unsigned long) strlen(buf
),buf
)); 
2496 static void addReplyLong(redisClient 
*c
, long l
) { 
2501         addReply(c
,shared
.czero
); 
2503     } else if (l 
== 1) { 
2504         addReply(c
,shared
.cone
); 
2507     len 
= snprintf(buf
,sizeof(buf
),":%ld\r\n",l
); 
2508     addReplySds(c
,sdsnewlen(buf
,len
)); 
2511 static void addReplyUlong(redisClient 
*c
, unsigned long ul
) { 
2516         addReply(c
,shared
.czero
); 
2518     } else if (ul 
== 1) { 
2519         addReply(c
,shared
.cone
); 
2522     len 
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
); 
2523     addReplySds(c
,sdsnewlen(buf
,len
)); 
2526 static void addReplyBulkLen(redisClient 
*c
, robj 
*obj
) { 
2529     if (obj
->encoding 
== REDIS_ENCODING_RAW
) { 
2530         len 
= sdslen(obj
->ptr
); 
2532         long n 
= (long)obj
->ptr
; 
2534         /* Compute how many bytes will take this integer as a radix 10 string */ 
2540         while((n 
= n
/10) != 0) { 
2544     addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",(unsigned long)len
)); 
2547 static void addReplyBulk(redisClient 
*c
, robj 
*obj
) { 
2548     addReplyBulkLen(c
,obj
); 
2550     addReply(c
,shared
.crlf
); 
2553 static void acceptHandler(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
2558     REDIS_NOTUSED(mask
); 
2559     REDIS_NOTUSED(privdata
); 
2561     cfd 
= anetAccept(server
.neterr
, fd
, cip
, &cport
); 
2562     if (cfd 
== AE_ERR
) { 
2563         redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
); 
2566     redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
); 
2567     if ((c 
= createClient(cfd
)) == NULL
) { 
2568         redisLog(REDIS_WARNING
,"Error allocating resoures for the client"); 
2569         close(cfd
); /* May be already closed, just ingore errors */ 
2572     /* If maxclient directive is set and this is one client more... close the 
2573      * connection. Note that we create the client instead to check before 
2574      * for this condition, since now the socket is already set in nonblocking 
2575      * mode and we can send an error for free using the Kernel I/O */ 
2576     if (server
.maxclients 
&& listLength(server
.clients
) > server
.maxclients
) { 
2577         char *err 
= "-ERR max number of clients reached\r\n"; 
2579         /* That's a best effort error message, don't check write errors */ 
2580         if (write(c
->fd
,err
,strlen(err
)) == -1) { 
2581             /* Nothing to do, Just to avoid the warning... */ 
2586     server
.stat_numconnections
++; 
2589 /* ======================= Redis objects implementation ===================== */ 
2591 static robj 
*createObject(int type
, void *ptr
) { 
2594     if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
); 
2595     if (listLength(server
.objfreelist
)) { 
2596         listNode 
*head 
= listFirst(server
.objfreelist
); 
2597         o 
= listNodeValue(head
); 
2598         listDelNode(server
.objfreelist
,head
); 
2599         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
2601         if (server
.vm_enabled
) { 
2602             pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
2603             o 
= zmalloc(sizeof(*o
)); 
2605             o 
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
)); 
2609     o
->encoding 
= REDIS_ENCODING_RAW
; 
2612     if (server
.vm_enabled
) { 
2613         /* Note that this code may run in the context of an I/O thread 
2614          * and accessing to server.unixtime in theory is an error 
2615          * (no locks). But in practice this is safe, and even if we read 
2616          * garbage Redis will not fail, as it's just a statistical info */ 
2617         o
->vm
.atime 
= server
.unixtime
; 
2618         o
->storage 
= REDIS_VM_MEMORY
; 
2623 static robj 
*createStringObject(char *ptr
, size_t len
) { 
2624     return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
)); 
2627 static robj 
*dupStringObject(robj 
*o
) { 
2628     assert(o
->encoding 
== REDIS_ENCODING_RAW
); 
2629     return createStringObject(o
->ptr
,sdslen(o
->ptr
)); 
2632 static robj 
*createListObject(void) { 
2633     list 
*l 
= listCreate(); 
2635     listSetFreeMethod(l
,decrRefCount
); 
2636     return createObject(REDIS_LIST
,l
); 
2639 static robj 
*createSetObject(void) { 
2640     dict 
*d 
= dictCreate(&setDictType
,NULL
); 
2641     return createObject(REDIS_SET
,d
); 
2644 static robj 
*createHashObject(void) { 
2645     /* All the Hashes start as zipmaps. Will be automatically converted 
2646      * into hash tables if there are enough elements or big elements 
2648     unsigned char *zm 
= zipmapNew(); 
2649     robj 
*o 
= createObject(REDIS_HASH
,zm
); 
2650     o
->encoding 
= REDIS_ENCODING_ZIPMAP
; 
2654 static robj 
*createZsetObject(void) { 
2655     zset 
*zs 
= zmalloc(sizeof(*zs
)); 
2657     zs
->dict 
= dictCreate(&zsetDictType
,NULL
); 
2658     zs
->zsl 
= zslCreate(); 
2659     return createObject(REDIS_ZSET
,zs
); 
2662 static void freeStringObject(robj 
*o
) { 
2663     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
2668 static void freeListObject(robj 
*o
) { 
2669     listRelease((list
*) o
->ptr
); 
2672 static void freeSetObject(robj 
*o
) { 
2673     dictRelease((dict
*) o
->ptr
); 
2676 static void freeZsetObject(robj 
*o
) { 
2679     dictRelease(zs
->dict
); 
2684 static void freeHashObject(robj 
*o
) { 
2685     switch (o
->encoding
) { 
2686     case REDIS_ENCODING_HT
: 
2687         dictRelease((dict
*) o
->ptr
); 
2689     case REDIS_ENCODING_ZIPMAP
: 
2698 static void incrRefCount(robj 
*o
) { 
2699     redisAssert(!server
.vm_enabled 
|| o
->storage 
== REDIS_VM_MEMORY
); 
2703 static void decrRefCount(void *obj
) { 
2706     /* Object is a key of a swapped out value, or in the process of being 
2708     if (server
.vm_enabled 
&& 
2709         (o
->storage 
== REDIS_VM_SWAPPED 
|| o
->storage 
== REDIS_VM_LOADING
)) 
2711         if (o
->storage 
== REDIS_VM_SWAPPED 
|| o
->storage 
== REDIS_VM_LOADING
) { 
2712             redisAssert(o
->refcount 
== 1); 
2714         if (o
->storage 
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
); 
2715         redisAssert(o
->type 
== REDIS_STRING
); 
2716         freeStringObject(o
); 
2717         vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
); 
2718         pthread_mutex_lock(&server
.obj_freelist_mutex
); 
2719         if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX 
|| 
2720             !listAddNodeHead(server
.objfreelist
,o
)) 
2722         pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
2723         server
.vm_stats_swapped_objects
--; 
2726     /* Object is in memory, or in the process of being swapped out. */ 
2727     if (--(o
->refcount
) == 0) { 
2728         if (server
.vm_enabled 
&& o
->storage 
== REDIS_VM_SWAPPING
) 
2729             vmCancelThreadedIOJob(obj
); 
2731         case REDIS_STRING
: freeStringObject(o
); break; 
2732         case REDIS_LIST
: freeListObject(o
); break; 
2733         case REDIS_SET
: freeSetObject(o
); break; 
2734         case REDIS_ZSET
: freeZsetObject(o
); break; 
2735         case REDIS_HASH
: freeHashObject(o
); break; 
2736         default: redisAssert(0); break; 
2738         if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
); 
2739         if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX 
|| 
2740             !listAddNodeHead(server
.objfreelist
,o
)) 
2742         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
2746 static robj 
*lookupKey(redisDb 
*db
, robj 
*key
) { 
2747     dictEntry 
*de 
= dictFind(db
->dict
,key
); 
2749         robj 
*key 
= dictGetEntryKey(de
); 
2750         robj 
*val 
= dictGetEntryVal(de
); 
2752         if (server
.vm_enabled
) { 
2753             if (key
->storage 
== REDIS_VM_MEMORY 
|| 
2754                 key
->storage 
== REDIS_VM_SWAPPING
) 
2756                 /* If we were swapping the object out, stop it, this key 
2758                 if (key
->storage 
== REDIS_VM_SWAPPING
) 
2759                     vmCancelThreadedIOJob(key
); 
2760                 /* Update the access time of the key for the aging algorithm. */ 
2761                 key
->vm
.atime 
= server
.unixtime
; 
2763                 int notify 
= (key
->storage 
== REDIS_VM_LOADING
); 
2765                 /* Our value was swapped on disk. Bring it at home. */ 
2766                 redisAssert(val 
== NULL
); 
2767                 val 
= vmLoadObject(key
); 
2768                 dictGetEntryVal(de
) = val
; 
2770                 /* Clients blocked by the VM subsystem may be waiting for 
2772                 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
); 
2781 static robj 
*lookupKeyRead(redisDb 
*db
, robj 
*key
) { 
2782     expireIfNeeded(db
,key
); 
2783     return lookupKey(db
,key
); 
2786 static robj 
*lookupKeyWrite(redisDb 
*db
, robj 
*key
) { 
2787     deleteIfVolatile(db
,key
); 
2788     return lookupKey(db
,key
); 
2791 static robj 
*lookupKeyReadOrReply(redisClient 
*c
, robj 
*key
, robj 
*reply
) { 
2792     robj 
*o 
= lookupKeyRead(c
->db
, key
); 
2793     if (!o
) addReply(c
,reply
); 
2797 static robj 
*lookupKeyWriteOrReply(redisClient 
*c
, robj 
*key
, robj 
*reply
) { 
2798     robj 
*o 
= lookupKeyWrite(c
->db
, key
); 
2799     if (!o
) addReply(c
,reply
); 
2803 static int checkType(redisClient 
*c
, robj 
*o
, int type
) { 
2804     if (o
->type 
!= type
) { 
2805         addReply(c
,shared
.wrongtypeerr
); 
2811 static int deleteKey(redisDb 
*db
, robj 
*key
) { 
2814     /* We need to protect key from destruction: after the first dictDelete() 
2815      * it may happen that 'key' is no longer valid if we don't increment 
2816      * it's count. This may happen when we get the object reference directly 
2817      * from the hash table with dictRandomKey() or dict iterators */ 
2819     if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
); 
2820     retval 
= dictDelete(db
->dict
,key
); 
2823     return retval 
== DICT_OK
; 
2826 /* Try to share an object against the shared objects pool */ 
2827 static robj 
*tryObjectSharing(robj 
*o
) { 
2828     struct dictEntry 
*de
; 
2831     if (o 
== NULL 
|| server
.shareobjects 
== 0) return o
; 
2833     redisAssert(o
->type 
== REDIS_STRING
); 
2834     de 
= dictFind(server
.sharingpool
,o
); 
2836         robj 
*shared 
= dictGetEntryKey(de
); 
2838         c 
= ((unsigned long) dictGetEntryVal(de
))+1; 
2839         dictGetEntryVal(de
) = (void*) c
; 
2840         incrRefCount(shared
); 
2844         /* Here we are using a stream algorihtm: Every time an object is 
2845          * shared we increment its count, everytime there is a miss we 
2846          * recrement the counter of a random object. If this object reaches 
2847          * zero we remove the object and put the current object instead. */ 
2848         if (dictSize(server
.sharingpool
) >= 
2849                 server
.sharingpoolsize
) { 
2850             de 
= dictGetRandomKey(server
.sharingpool
); 
2851             redisAssert(de 
!= NULL
); 
2852             c 
= ((unsigned long) dictGetEntryVal(de
))-1; 
2853             dictGetEntryVal(de
) = (void*) c
; 
2855                 dictDelete(server
.sharingpool
,de
->key
); 
2858             c 
= 0; /* If the pool is empty we want to add this object */ 
2863             retval 
= dictAdd(server
.sharingpool
,o
,(void*)1); 
2864             redisAssert(retval 
== DICT_OK
); 
2871 /* Check if the nul-terminated string 's' can be represented by a long 
2872  * (that is, is a number that fits into long without any other space or 
2873  * character before or after the digits). 
2875  * If so, the function returns REDIS_OK and *longval is set to the value 
2876  * of the number. Otherwise REDIS_ERR is returned */ 
2877 static int isStringRepresentableAsLong(sds s
, long *longval
) { 
2878     char buf
[32], *endptr
; 
2882     value 
= strtol(s
, &endptr
, 10); 
2883     if (endptr
[0] != '\0') return REDIS_ERR
; 
2884     slen 
= snprintf(buf
,32,"%ld",value
); 
2886     /* If the number converted back into a string is not identical 
2887      * then it's not possible to encode the string as integer */ 
2888     if (sdslen(s
) != (unsigned)slen 
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
; 
2889     if (longval
) *longval 
= value
; 
2893 /* Try to encode a string object in order to save space */ 
2894 static int tryObjectEncoding(robj 
*o
) { 
2898     if (o
->encoding 
!= REDIS_ENCODING_RAW
) 
2899         return REDIS_ERR
; /* Already encoded */ 
2901     /* It's not save to encode shared objects: shared objects can be shared 
2902      * everywhere in the "object space" of Redis. Encoded objects can only 
2903      * appear as "values" (and not, for instance, as keys) */ 
2904      if (o
->refcount 
> 1) return REDIS_ERR
; 
2906     /* Currently we try to encode only strings */ 
2907     redisAssert(o
->type 
== REDIS_STRING
); 
2909     /* Check if we can represent this string as a long integer */ 
2910     if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return REDIS_ERR
; 
2912     /* Ok, this object can be encoded */ 
2913     o
->encoding 
= REDIS_ENCODING_INT
; 
2915     o
->ptr 
= (void*) value
; 
2919 /* Get a decoded version of an encoded object (returned as a new object). 
2920  * If the object is already raw-encoded just increment the ref count. */ 
2921 static robj 
*getDecodedObject(robj 
*o
) { 
2924     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
2928     if (o
->type 
== REDIS_STRING 
&& o
->encoding 
== REDIS_ENCODING_INT
) { 
2931         snprintf(buf
,32,"%ld",(long)o
->ptr
); 
2932         dec 
= createStringObject(buf
,strlen(buf
)); 
2935         redisAssert(1 != 1); 
2939 /* Compare two string objects via strcmp() or alike. 
2940  * Note that the objects may be integer-encoded. In such a case we 
2941  * use snprintf() to get a string representation of the numbers on the stack 
2942  * and compare the strings, it's much faster than calling getDecodedObject(). 
2944  * Important note: if objects are not integer encoded, but binary-safe strings, 
2945  * sdscmp() from sds.c will apply memcmp() so this function ca be considered 
2947 static int compareStringObjects(robj 
*a
, robj 
*b
) { 
2948     redisAssert(a
->type 
== REDIS_STRING 
&& b
->type 
== REDIS_STRING
); 
2949     char bufa
[128], bufb
[128], *astr
, *bstr
; 
2952     if (a 
== b
) return 0; 
2953     if (a
->encoding 
!= REDIS_ENCODING_RAW
) { 
2954         snprintf(bufa
,sizeof(bufa
),"%ld",(long) a
->ptr
); 
2960     if (b
->encoding 
!= REDIS_ENCODING_RAW
) { 
2961         snprintf(bufb
,sizeof(bufb
),"%ld",(long) b
->ptr
); 
2967     return bothsds 
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
); 
2970 static size_t stringObjectLen(robj 
*o
) { 
2971     redisAssert(o
->type 
== REDIS_STRING
); 
2972     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
2973         return sdslen(o
->ptr
); 
2977         return snprintf(buf
,32,"%ld",(long)o
->ptr
); 
2981 /*============================ RDB saving/loading =========================== */ 
2983 static int rdbSaveType(FILE *fp
, unsigned char type
) { 
2984     if (fwrite(&type
,1,1,fp
) == 0) return -1; 
2988 static int rdbSaveTime(FILE *fp
, time_t t
) { 
2989     int32_t t32 
= (int32_t) t
; 
2990     if (fwrite(&t32
,4,1,fp
) == 0) return -1; 
2994 /* check rdbLoadLen() comments for more info */ 
2995 static int rdbSaveLen(FILE *fp
, uint32_t len
) { 
2996     unsigned char buf
[2]; 
2999         /* Save a 6 bit len */ 
3000         buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6); 
3001         if (fwrite(buf
,1,1,fp
) == 0) return -1; 
3002     } else if (len 
< (1<<14)) { 
3003         /* Save a 14 bit len */ 
3004         buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6); 
3006         if (fwrite(buf
,2,1,fp
) == 0) return -1; 
3008         /* Save a 32 bit len */ 
3009         buf
[0] = (REDIS_RDB_32BITLEN
<<6); 
3010         if (fwrite(buf
,1,1,fp
) == 0) return -1; 
3012         if (fwrite(&len
,4,1,fp
) == 0) return -1; 
3017 /* String objects in the form "2391" "-100" without any space and with a 
3018  * range of values that can fit in an 8, 16 or 32 bit signed value can be 
3019  * encoded as integers to save space */ 
3020 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) { 
3022     char *endptr
, buf
[32]; 
3024     /* Check if it's possible to encode this value as a number */ 
3025     value 
= strtoll(s
, &endptr
, 10); 
3026     if (endptr
[0] != '\0') return 0; 
3027     snprintf(buf
,32,"%lld",value
); 
3029     /* If the number converted back into a string is not identical 
3030      * then it's not possible to encode the string as integer */ 
3031     if (strlen(buf
) != len 
|| memcmp(buf
,s
,len
)) return 0; 
3033     /* Finally check if it fits in our ranges */ 
3034     if (value 
>= -(1<<7) && value 
<= (1<<7)-1) { 
3035         enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
; 
3036         enc
[1] = value
&0xFF; 
3038     } else if (value 
>= -(1<<15) && value 
<= (1<<15)-1) { 
3039         enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
; 
3040         enc
[1] = value
&0xFF; 
3041         enc
[2] = (value
>>8)&0xFF; 
3043     } else if (value 
>= -((long long)1<<31) && value 
<= ((long long)1<<31)-1) { 
3044         enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
; 
3045         enc
[1] = value
&0xFF; 
3046         enc
[2] = (value
>>8)&0xFF; 
3047         enc
[3] = (value
>>16)&0xFF; 
3048         enc
[4] = (value
>>24)&0xFF; 
3055 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) { 
3056     size_t comprlen
, outlen
; 
3060     /* We require at least four bytes compression for this to be worth it */ 
3061     if (len 
<= 4) return 0; 
3063     if ((out 
= zmalloc(outlen
+1)) == NULL
) return 0; 
3064     comprlen 
= lzf_compress(s
, len
, out
, outlen
); 
3065     if (comprlen 
== 0) { 
3069     /* Data compressed! Let's save it on disk */ 
3070     byte 
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
; 
3071     if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
; 
3072     if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
; 
3073     if (rdbSaveLen(fp
,len
) == -1) goto writeerr
; 
3074     if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
; 
3083 /* Save a string objet as [len][data] on disk. If the object is a string 
3084  * representation of an integer value we try to safe it in a special form */ 
3085 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) { 
3088     /* Try integer encoding */ 
3090         unsigned char buf
[5]; 
3091         if ((enclen 
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) { 
3092             if (fwrite(buf
,enclen
,1,fp
) == 0) return -1; 
3097     /* Try LZF compression - under 20 bytes it's unable to compress even 
3098      * aaaaaaaaaaaaaaaaaa so skip it */ 
3099     if (server
.rdbcompression 
&& len 
> 20) { 
3102         retval 
= rdbSaveLzfStringObject(fp
,s
,len
); 
3103         if (retval 
== -1) return -1; 
3104         if (retval 
> 0) return 0; 
3105         /* retval == 0 means data can't be compressed, save the old way */ 
3108     /* Store verbatim */ 
3109     if (rdbSaveLen(fp
,len
) == -1) return -1; 
3110     if (len 
&& fwrite(s
,len
,1,fp
) == 0) return -1; 
3114 /* Like rdbSaveStringObjectRaw() but handle encoded objects */ 
3115 static int rdbSaveStringObject(FILE *fp
, robj 
*obj
) { 
3118     /* Avoid incr/decr ref count business when possible. 
3119      * This plays well with copy-on-write given that we are probably 
3120      * in a child process (BGSAVE). Also this makes sure key objects 
3121      * of swapped objects are not incRefCount-ed (an assert does not allow 
3122      * this in order to avoid bugs) */ 
3123     if (obj
->encoding 
!= REDIS_ENCODING_RAW
) { 
3124         obj 
= getDecodedObject(obj
); 
3125         retval 
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
)); 
3128         retval 
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
)); 
3133 /* Save a double value. Doubles are saved as strings prefixed by an unsigned 
3134  * 8 bit integer specifing the length of the representation. 
3135  * This 8 bit integer has special values in order to specify the following 
3141 static int rdbSaveDoubleValue(FILE *fp
, double val
) { 
3142     unsigned char buf
[128]; 
3148     } else if (!isfinite(val
)) { 
3150         buf
[0] = (val 
< 0) ? 255 : 254; 
3152         snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
); 
3153         buf
[0] = strlen((char*)buf
+1); 
3156     if (fwrite(buf
,len
,1,fp
) == 0) return -1; 
3160 /* Save a Redis object. */ 
3161 static int rdbSaveObject(FILE *fp
, robj 
*o
) { 
3162     if (o
->type 
== REDIS_STRING
) { 
3163         /* Save a string value */ 
3164         if (rdbSaveStringObject(fp
,o
) == -1) return -1; 
3165     } else if (o
->type 
== REDIS_LIST
) { 
3166         /* Save a list value */ 
3167         list 
*list 
= o
->ptr
; 
3171         if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1; 
3172         listRewind(list
,&li
); 
3173         while((ln 
= listNext(&li
))) { 
3174             robj 
*eleobj 
= listNodeValue(ln
); 
3176             if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1; 
3178     } else if (o
->type 
== REDIS_SET
) { 
3179         /* Save a set value */ 
3181         dictIterator 
*di 
= dictGetIterator(set
); 
3184         if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1; 
3185         while((de 
= dictNext(di
)) != NULL
) { 
3186             robj 
*eleobj 
= dictGetEntryKey(de
); 
3188             if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1; 
3190         dictReleaseIterator(di
); 
3191     } else if (o
->type 
== REDIS_ZSET
) { 
3192         /* Save a set value */ 
3194         dictIterator 
*di 
= dictGetIterator(zs
->dict
); 
3197         if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1; 
3198         while((de 
= dictNext(di
)) != NULL
) { 
3199             robj 
*eleobj 
= dictGetEntryKey(de
); 
3200             double *score 
= dictGetEntryVal(de
); 
3202             if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1; 
3203             if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1; 
3205         dictReleaseIterator(di
); 
3206     } else if (o
->type 
== REDIS_HASH
) { 
3207         /* Save a hash value */ 
3208         if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
3209             unsigned char *p 
= zipmapRewind(o
->ptr
); 
3210             unsigned int count 
= zipmapLen(o
->ptr
); 
3211             unsigned char *key
, *val
; 
3212             unsigned int klen
, vlen
; 
3214             if (rdbSaveLen(fp
,count
) == -1) return -1; 
3215             while((p 
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) { 
3216                 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1; 
3217                 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1; 
3220             dictIterator 
*di 
= dictGetIterator(o
->ptr
); 
3223             if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1; 
3224             while((de 
= dictNext(di
)) != NULL
) { 
3225                 robj 
*key 
= dictGetEntryKey(de
); 
3226                 robj 
*val 
= dictGetEntryVal(de
); 
3228                 if (rdbSaveStringObject(fp
,key
) == -1) return -1; 
3229                 if (rdbSaveStringObject(fp
,val
) == -1) return -1; 
3231             dictReleaseIterator(di
); 
3239 /* Return the length the object will have on disk if saved with 
3240  * the rdbSaveObject() function. Currently we use a trick to get 
3241  * this length with very little changes to the code. In the future 
3242  * we could switch to a faster solution. */ 
3243 static off_t 
rdbSavedObjectLen(robj 
*o
, FILE *fp
) { 
3244     if (fp 
== NULL
) fp 
= server
.devnull
; 
3246     assert(rdbSaveObject(fp
,o
) != 1); 
3250 /* Return the number of pages required to save this object in the swap file */ 
3251 static off_t 
rdbSavedObjectPages(robj 
*o
, FILE *fp
) { 
3252     off_t bytes 
= rdbSavedObjectLen(o
,fp
); 
3254     return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
; 
3257 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */ 
3258 static int rdbSave(char *filename
) { 
3259     dictIterator 
*di 
= NULL
; 
3264     time_t now 
= time(NULL
); 
3266     /* Wait for I/O therads to terminate, just in case this is a 
3267      * foreground-saving, to avoid seeking the swap file descriptor at the 
3269     if (server
.vm_enabled
) 
3270         waitEmptyIOJobsQueue(); 
3272     snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid()); 
3273     fp 
= fopen(tmpfile
,"w"); 
3275         redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
)); 
3278     if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
; 
3279     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
3280         redisDb 
*db 
= server
.db
+j
; 
3282         if (dictSize(d
) == 0) continue; 
3283         di 
= dictGetIterator(d
); 
3289         /* Write the SELECT DB opcode */ 
3290         if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
; 
3291         if (rdbSaveLen(fp
,j
) == -1) goto werr
; 
3293         /* Iterate this DB writing every entry */ 
3294         while((de 
= dictNext(di
)) != NULL
) { 
3295             robj 
*key 
= dictGetEntryKey(de
); 
3296             robj 
*o 
= dictGetEntryVal(de
); 
3297             time_t expiretime 
= getExpire(db
,key
); 
3299             /* Save the expire time */ 
3300             if (expiretime 
!= -1) { 
3301                 /* If this key is already expired skip it */ 
3302                 if (expiretime 
< now
) continue; 
3303                 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
; 
3304                 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
; 
3306             /* Save the key and associated value. This requires special 
3307              * handling if the value is swapped out. */ 
3308             if (!server
.vm_enabled 
|| key
->storage 
== REDIS_VM_MEMORY 
|| 
3309                                       key
->storage 
== REDIS_VM_SWAPPING
) { 
3310                 /* Save type, key, value */ 
3311                 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
; 
3312                 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
; 
3313                 if (rdbSaveObject(fp
,o
) == -1) goto werr
; 
3315                 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */ 
3317                 /* Get a preview of the object in memory */ 
3318                 po 
= vmPreviewObject(key
); 
3319                 /* Save type, key, value */ 
3320                 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
; 
3321                 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
; 
3322                 if (rdbSaveObject(fp
,po
) == -1) goto werr
; 
3323                 /* Remove the loaded object from memory */ 
3327         dictReleaseIterator(di
); 
3330     if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
; 
3332     /* Make sure data will not remain on the OS's output buffers */ 
3337     /* Use RENAME to make sure the DB file is changed atomically only 
3338      * if the generate DB file is ok. */ 
3339     if (rename(tmpfile
,filename
) == -1) { 
3340         redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
)); 
3344     redisLog(REDIS_NOTICE
,"DB saved on disk"); 
3346     server
.lastsave 
= time(NULL
); 
3352     redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
)); 
3353     if (di
) dictReleaseIterator(di
); 
3357 static int rdbSaveBackground(char *filename
) { 
3360     if (server
.bgsavechildpid 
!= -1) return REDIS_ERR
; 
3361     if (server
.vm_enabled
) waitEmptyIOJobsQueue(); 
3362     if ((childpid 
= fork()) == 0) { 
3364         if (server
.vm_enabled
) vmReopenSwapFile(); 
3366         if (rdbSave(filename
) == REDIS_OK
) { 
3373         if (childpid 
== -1) { 
3374             redisLog(REDIS_WARNING
,"Can't save in background: fork: %s", 
3378         redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
); 
3379         server
.bgsavechildpid 
= childpid
; 
3382     return REDIS_OK
; /* unreached */ 
3385 static void rdbRemoveTempFile(pid_t childpid
) { 
3388     snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
); 
3392 static int rdbLoadType(FILE *fp
) { 
3394     if (fread(&type
,1,1,fp
) == 0) return -1; 
3398 static time_t rdbLoadTime(FILE *fp
) { 
3400     if (fread(&t32
,4,1,fp
) == 0) return -1; 
3401     return (time_t) t32
; 
3404 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top 
3405  * of this file for a description of how this are stored on disk. 
3407  * isencoded is set to 1 if the readed length is not actually a length but 
3408  * an "encoding type", check the above comments for more info */ 
3409 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) { 
3410     unsigned char buf
[2]; 
3414     if (isencoded
) *isencoded 
= 0; 
3415     if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
; 
3416     type 
= (buf
[0]&0xC0)>>6; 
3417     if (type 
== REDIS_RDB_6BITLEN
) { 
3418         /* Read a 6 bit len */ 
3420     } else if (type 
== REDIS_RDB_ENCVAL
) { 
3421         /* Read a 6 bit len encoding type */ 
3422         if (isencoded
) *isencoded 
= 1; 
3424     } else if (type 
== REDIS_RDB_14BITLEN
) { 
3425         /* Read a 14 bit len */ 
3426         if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
; 
3427         return ((buf
[0]&0x3F)<<8)|buf
[1]; 
3429         /* Read a 32 bit len */ 
3430         if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
; 
3435 static robj 
*rdbLoadIntegerObject(FILE *fp
, int enctype
) { 
3436     unsigned char enc
[4]; 
3439     if (enctype 
== REDIS_RDB_ENC_INT8
) { 
3440         if (fread(enc
,1,1,fp
) == 0) return NULL
; 
3441         val 
= (signed char)enc
[0]; 
3442     } else if (enctype 
== REDIS_RDB_ENC_INT16
) { 
3444         if (fread(enc
,2,1,fp
) == 0) return NULL
; 
3445         v 
= enc
[0]|(enc
[1]<<8); 
3447     } else if (enctype 
== REDIS_RDB_ENC_INT32
) { 
3449         if (fread(enc
,4,1,fp
) == 0) return NULL
; 
3450         v 
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24); 
3453         val 
= 0; /* anti-warning */ 
3456     return createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",val
)); 
3459 static robj 
*rdbLoadLzfStringObject(FILE*fp
) { 
3460     unsigned int len
, clen
; 
3461     unsigned char *c 
= NULL
; 
3464     if ((clen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
3465     if ((len 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
3466     if ((c 
= zmalloc(clen
)) == NULL
) goto err
; 
3467     if ((val 
= sdsnewlen(NULL
,len
)) == NULL
) goto err
; 
3468     if (fread(c
,clen
,1,fp
) == 0) goto err
; 
3469     if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
; 
3471     return createObject(REDIS_STRING
,val
); 
3478 static robj 
*rdbLoadStringObject(FILE*fp
) { 
3483     len 
= rdbLoadLen(fp
,&isencoded
); 
3486         case REDIS_RDB_ENC_INT8
: 
3487         case REDIS_RDB_ENC_INT16
: 
3488         case REDIS_RDB_ENC_INT32
: 
3489             return tryObjectSharing(rdbLoadIntegerObject(fp
,len
)); 
3490         case REDIS_RDB_ENC_LZF
: 
3491             return tryObjectSharing(rdbLoadLzfStringObject(fp
)); 
3497     if (len 
== REDIS_RDB_LENERR
) return NULL
; 
3498     val 
= sdsnewlen(NULL
,len
); 
3499     if (len 
&& fread(val
,len
,1,fp
) == 0) { 
3503     return tryObjectSharing(createObject(REDIS_STRING
,val
)); 
3506 /* For information about double serialization check rdbSaveDoubleValue() */ 
3507 static int rdbLoadDoubleValue(FILE *fp
, double *val
) { 
3511     if (fread(&len
,1,1,fp
) == 0) return -1; 
3513     case 255: *val 
= R_NegInf
; return 0; 
3514     case 254: *val 
= R_PosInf
; return 0; 
3515     case 253: *val 
= R_Nan
; return 0; 
3517         if (fread(buf
,len
,1,fp
) == 0) return -1; 
3519         sscanf(buf
, "%lg", val
); 
3524 /* Load a Redis object of the specified type from the specified file. 
3525  * On success a newly allocated object is returned, otherwise NULL. */ 
3526 static robj 
*rdbLoadObject(int type
, FILE *fp
) { 
3529     redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
)); 
3530     if (type 
== REDIS_STRING
) { 
3531         /* Read string value */ 
3532         if ((o 
= rdbLoadStringObject(fp
)) == NULL
) return NULL
; 
3533         tryObjectEncoding(o
); 
3534     } else if (type 
== REDIS_LIST 
|| type 
== REDIS_SET
) { 
3535         /* Read list/set value */ 
3538         if ((listlen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
3539         o 
= (type 
== REDIS_LIST
) ? createListObject() : createSetObject(); 
3540         /* It's faster to expand the dict to the right size asap in order 
3541          * to avoid rehashing */ 
3542         if (type 
== REDIS_SET 
&& listlen 
> DICT_HT_INITIAL_SIZE
) 
3543             dictExpand(o
->ptr
,listlen
); 
3544         /* Load every single element of the list/set */ 
3548             if ((ele 
= rdbLoadStringObject(fp
)) == NULL
) return NULL
; 
3549             tryObjectEncoding(ele
); 
3550             if (type 
== REDIS_LIST
) { 
3551                 listAddNodeTail((list
*)o
->ptr
,ele
); 
3553                 dictAdd((dict
*)o
->ptr
,ele
,NULL
); 
3556     } else if (type 
== REDIS_ZSET
) { 
3557         /* Read list/set value */ 
3561         if ((zsetlen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
3562         o 
= createZsetObject(); 
3564         /* Load every single element of the list/set */ 
3567             double *score 
= zmalloc(sizeof(double)); 
3569             if ((ele 
= rdbLoadStringObject(fp
)) == NULL
) return NULL
; 
3570             tryObjectEncoding(ele
); 
3571             if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
; 
3572             dictAdd(zs
->dict
,ele
,score
); 
3573             zslInsert(zs
->zsl
,*score
,ele
); 
3574             incrRefCount(ele
); /* added to skiplist */ 
3576     } else if (type 
== REDIS_HASH
) { 
3579         if ((hashlen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
3580         o 
= createHashObject(); 
3581         /* Too many entries? Use an hash table. */ 
3582         if (hashlen 
> server
.hash_max_zipmap_entries
) 
3583             convertToRealHash(o
); 
3584         /* Load every key/value, then set it into the zipmap or hash 
3585          * table, as needed. */ 
3589             if ((key 
= rdbLoadStringObject(fp
)) == NULL
) return NULL
; 
3590             if ((val 
= rdbLoadStringObject(fp
)) == NULL
) return NULL
; 
3591             /* If we are using a zipmap and there are too big values 
3592              * the object is converted to real hash table encoding. */ 
3593             if (o
->encoding 
!= REDIS_ENCODING_HT 
&& 
3594                (sdslen(key
->ptr
) > server
.hash_max_zipmap_value 
|| 
3595                 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
)) 
3597                     convertToRealHash(o
); 
3600             if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
3601                 unsigned char *zm 
= o
->ptr
; 
3603                 zm 
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
), 
3604                                   val
->ptr
,sdslen(val
->ptr
),NULL
); 
3609                 tryObjectEncoding(key
); 
3610                 tryObjectEncoding(val
); 
3611                 dictAdd((dict
*)o
->ptr
,key
,val
); 
3620 static int rdbLoad(char *filename
) { 
3622     robj 
*keyobj 
= NULL
; 
3624     int type
, retval
, rdbver
; 
3625     dict 
*d 
= server
.db
[0].dict
; 
3626     redisDb 
*db 
= server
.db
+0; 
3628     time_t expiretime 
= -1, now 
= time(NULL
); 
3629     long long loadedkeys 
= 0; 
3631     fp 
= fopen(filename
,"r"); 
3632     if (!fp
) return REDIS_ERR
; 
3633     if (fread(buf
,9,1,fp
) == 0) goto eoferr
; 
3635     if (memcmp(buf
,"REDIS",5) != 0) { 
3637         redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file"); 
3640     rdbver 
= atoi(buf
+5); 
3643         redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
); 
3650         if ((type 
= rdbLoadType(fp
)) == -1) goto eoferr
; 
3651         if (type 
== REDIS_EXPIRETIME
) { 
3652             if ((expiretime 
= rdbLoadTime(fp
)) == -1) goto eoferr
; 
3653             /* We read the time so we need to read the object type again */ 
3654             if ((type 
= rdbLoadType(fp
)) == -1) goto eoferr
; 
3656         if (type 
== REDIS_EOF
) break; 
3657         /* Handle SELECT DB opcode as a special case */ 
3658         if (type 
== REDIS_SELECTDB
) { 
3659             if ((dbid 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) 
3661             if (dbid 
>= (unsigned)server
.dbnum
) { 
3662                 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
); 
3665             db 
= server
.db
+dbid
; 
3670         if ((keyobj 
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
; 
3672         if ((o 
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
; 
3673         /* Add the new object in the hash table */ 
3674         retval 
= dictAdd(d
,keyobj
,o
); 
3675         if (retval 
== DICT_ERR
) { 
3676             redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj
->ptr
); 
3679         /* Set the expire time if needed */ 
3680         if (expiretime 
!= -1) { 
3681             setExpire(db
,keyobj
,expiretime
); 
3682             /* Delete this key if already expired */ 
3683             if (expiretime 
< now
) deleteKey(db
,keyobj
); 
3687         /* Handle swapping while loading big datasets when VM is on */ 
3689         if (server
.vm_enabled 
&& (loadedkeys 
% 5000) == 0) { 
3690             while (zmalloc_used_memory() > server
.vm_max_memory
) { 
3691                 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break; 
3698 eoferr
: /* unexpected end of file is handled here with a fatal exit */ 
3699     if (keyobj
) decrRefCount(keyobj
); 
3700     redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now."); 
3702     return REDIS_ERR
; /* Just to avoid warning */ 
3705 /*================================== Commands =============================== */ 
3707 static void authCommand(redisClient 
*c
) { 
3708     if (!server
.requirepass 
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) { 
3709       c
->authenticated 
= 1; 
3710       addReply(c
,shared
.ok
); 
3712       c
->authenticated 
= 0; 
3713       addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n")); 
3717 static void pingCommand(redisClient 
*c
) { 
3718     addReply(c
,shared
.pong
); 
3721 static void echoCommand(redisClient 
*c
) { 
3722     addReplyBulk(c
,c
->argv
[1]); 
3725 /*=================================== Strings =============================== */ 
3727 static void setGenericCommand(redisClient 
*c
, int nx
) { 
3730     if (nx
) deleteIfVolatile(c
->db
,c
->argv
[1]); 
3731     retval 
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]); 
3732     if (retval 
== DICT_ERR
) { 
3734             /* If the key is about a swapped value, we want a new key object 
3735              * to overwrite the old. So we delete the old key in the database. 
3736              * This will also make sure that swap pages about the old object 
3737              * will be marked as free. */ 
3738             if (server
.vm_enabled 
&& deleteIfSwapped(c
->db
,c
->argv
[1])) 
3739                 incrRefCount(c
->argv
[1]); 
3740             dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]); 
3741             incrRefCount(c
->argv
[2]); 
3743             addReply(c
,shared
.czero
); 
3747         incrRefCount(c
->argv
[1]); 
3748         incrRefCount(c
->argv
[2]); 
3751     removeExpire(c
->db
,c
->argv
[1]); 
3752     addReply(c
, nx 
? shared
.cone 
: shared
.ok
); 
3755 static void setCommand(redisClient 
*c
) { 
3756     setGenericCommand(c
,0); 
3759 static void setnxCommand(redisClient 
*c
) { 
3760     setGenericCommand(c
,1); 
3763 static int getGenericCommand(redisClient 
*c
) { 
3766     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
) 
3769     if (o
->type 
!= REDIS_STRING
) { 
3770         addReply(c
,shared
.wrongtypeerr
); 
3778 static void getCommand(redisClient 
*c
) { 
3779     getGenericCommand(c
); 
3782 static void getsetCommand(redisClient 
*c
) { 
3783     if (getGenericCommand(c
) == REDIS_ERR
) return; 
3784     if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) { 
3785         dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]); 
3787         incrRefCount(c
->argv
[1]); 
3789     incrRefCount(c
->argv
[2]); 
3791     removeExpire(c
->db
,c
->argv
[1]); 
3794 static void mgetCommand(redisClient 
*c
) { 
3797     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1)); 
3798     for (j 
= 1; j 
< c
->argc
; j
++) { 
3799         robj 
*o 
= lookupKeyRead(c
->db
,c
->argv
[j
]); 
3801             addReply(c
,shared
.nullbulk
); 
3803             if (o
->type 
!= REDIS_STRING
) { 
3804                 addReply(c
,shared
.nullbulk
); 
3812 static void msetGenericCommand(redisClient 
*c
, int nx
) { 
3813     int j
, busykeys 
= 0; 
3815     if ((c
->argc 
% 2) == 0) { 
3816         addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n")); 
3819     /* Handle the NX flag. The MSETNX semantic is to return zero and don't 
3820      * set nothing at all if at least one already key exists. */ 
3822         for (j 
= 1; j 
< c
->argc
; j 
+= 2) { 
3823             if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) { 
3829         addReply(c
, shared
.czero
); 
3833     for (j 
= 1; j 
< c
->argc
; j 
+= 2) { 
3836         tryObjectEncoding(c
->argv
[j
+1]); 
3837         retval 
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]); 
3838         if (retval 
== DICT_ERR
) { 
3839             dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]); 
3840             incrRefCount(c
->argv
[j
+1]); 
3842             incrRefCount(c
->argv
[j
]); 
3843             incrRefCount(c
->argv
[j
+1]); 
3845         removeExpire(c
->db
,c
->argv
[j
]); 
3847     server
.dirty 
+= (c
->argc
-1)/2; 
3848     addReply(c
, nx 
? shared
.cone 
: shared
.ok
); 
3851 static void msetCommand(redisClient 
*c
) { 
3852     msetGenericCommand(c
,0); 
3855 static void msetnxCommand(redisClient 
*c
) { 
3856     msetGenericCommand(c
,1); 
3859 static void incrDecrCommand(redisClient 
*c
, long long incr
) { 
3864     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
3868         if (o
->type 
!= REDIS_STRING
) { 
3873             if (o
->encoding 
== REDIS_ENCODING_RAW
) 
3874                 value 
= strtoll(o
->ptr
, &eptr
, 10); 
3875             else if (o
->encoding 
== REDIS_ENCODING_INT
) 
3876                 value 
= (long)o
->ptr
; 
3878                 redisAssert(1 != 1); 
3883     o 
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
)); 
3884     tryObjectEncoding(o
); 
3885     retval 
= dictAdd(c
->db
->dict
,c
->argv
[1],o
); 
3886     if (retval 
== DICT_ERR
) { 
3887         dictReplace(c
->db
->dict
,c
->argv
[1],o
); 
3888         removeExpire(c
->db
,c
->argv
[1]); 
3890         incrRefCount(c
->argv
[1]); 
3893     addReply(c
,shared
.colon
); 
3895     addReply(c
,shared
.crlf
); 
3898 static void incrCommand(redisClient 
*c
) { 
3899     incrDecrCommand(c
,1); 
3902 static void decrCommand(redisClient 
*c
) { 
3903     incrDecrCommand(c
,-1); 
3906 static void incrbyCommand(redisClient 
*c
) { 
3907     long long incr 
= strtoll(c
->argv
[2]->ptr
, NULL
, 10); 
3908     incrDecrCommand(c
,incr
); 
3911 static void decrbyCommand(redisClient 
*c
) { 
3912     long long incr 
= strtoll(c
->argv
[2]->ptr
, NULL
, 10); 
3913     incrDecrCommand(c
,-incr
); 
3916 static void appendCommand(redisClient 
*c
) { 
3921     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
3923         /* Create the key */ 
3924         retval 
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]); 
3925         incrRefCount(c
->argv
[1]); 
3926         incrRefCount(c
->argv
[2]); 
3927         totlen 
= stringObjectLen(c
->argv
[2]); 
3931         de 
= dictFind(c
->db
->dict
,c
->argv
[1]); 
3934         o 
= dictGetEntryVal(de
); 
3935         if (o
->type 
!= REDIS_STRING
) { 
3936             addReply(c
,shared
.wrongtypeerr
); 
3939         /* If the object is specially encoded or shared we have to make 
3941         if (o
->refcount 
!= 1 || o
->encoding 
!= REDIS_ENCODING_RAW
) { 
3942             robj 
*decoded 
= getDecodedObject(o
); 
3944             o 
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
)); 
3945             decrRefCount(decoded
); 
3946             dictReplace(c
->db
->dict
,c
->argv
[1],o
); 
3949         if (c
->argv
[2]->encoding 
== REDIS_ENCODING_RAW
) { 
3950             o
->ptr 
= sdscatlen(o
->ptr
, 
3951                 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
)); 
3953             o
->ptr 
= sdscatprintf(o
->ptr
, "%ld", 
3954                 (unsigned long) c
->argv
[2]->ptr
); 
3956         totlen 
= sdslen(o
->ptr
); 
3959     addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
)); 
3962 static void substrCommand(redisClient 
*c
) { 
3964     long start 
= atoi(c
->argv
[2]->ptr
); 
3965     long end 
= atoi(c
->argv
[3]->ptr
); 
3966     size_t rangelen
, strlen
; 
3969     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
3970         checkType(c
,o
,REDIS_STRING
)) return; 
3972     o 
= getDecodedObject(o
); 
3973     strlen 
= sdslen(o
->ptr
); 
3975     /* convert negative indexes */ 
3976     if (start 
< 0) start 
= strlen
+start
; 
3977     if (end 
< 0) end 
= strlen
+end
; 
3978     if (start 
< 0) start 
= 0; 
3979     if (end 
< 0) end 
= 0; 
3981     /* indexes sanity checks */ 
3982     if (start 
> end 
|| (size_t)start 
>= strlen
) { 
3983         /* Out of range start or start > end result in null reply */ 
3984         addReply(c
,shared
.nullbulk
); 
3988     if ((size_t)end 
>= strlen
) end 
= strlen
-1; 
3989     rangelen 
= (end
-start
)+1; 
3991     /* Return the result */ 
3992     addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
)); 
3993     range 
= sdsnewlen((char*)o
->ptr
+start
,rangelen
); 
3994     addReplySds(c
,range
); 
3995     addReply(c
,shared
.crlf
); 
3999 /* ========================= Type agnostic commands ========================= */ 
4001 static void delCommand(redisClient 
*c
) { 
4004     for (j 
= 1; j 
< c
->argc
; j
++) { 
4005         if (deleteKey(c
->db
,c
->argv
[j
])) { 
4010     addReplyLong(c
,deleted
); 
4013 static void existsCommand(redisClient 
*c
) { 
4014     addReply(c
,lookupKeyRead(c
->db
,c
->argv
[1]) ? shared
.cone 
: shared
.czero
); 
4017 static void selectCommand(redisClient 
*c
) { 
4018     int id 
= atoi(c
->argv
[1]->ptr
); 
4020     if (selectDb(c
,id
) == REDIS_ERR
) { 
4021         addReplySds(c
,sdsnew("-ERR invalid DB index\r\n")); 
4023         addReply(c
,shared
.ok
); 
4027 static void randomkeyCommand(redisClient 
*c
) { 
4031         de 
= dictGetRandomKey(c
->db
->dict
); 
4032         if (!de 
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break; 
4035         addReply(c
,shared
.plus
); 
4036         addReply(c
,shared
.crlf
); 
4038         addReply(c
,shared
.plus
); 
4039         addReply(c
,dictGetEntryKey(de
)); 
4040         addReply(c
,shared
.crlf
); 
4044 static void keysCommand(redisClient 
*c
) { 
4047     sds pattern 
= c
->argv
[1]->ptr
; 
4048     int plen 
= sdslen(pattern
); 
4049     unsigned long numkeys 
= 0; 
4050     robj 
*lenobj 
= createObject(REDIS_STRING
,NULL
); 
4052     di 
= dictGetIterator(c
->db
->dict
); 
4054     decrRefCount(lenobj
); 
4055     while((de 
= dictNext(di
)) != NULL
) { 
4056         robj 
*keyobj 
= dictGetEntryKey(de
); 
4058         sds key 
= keyobj
->ptr
; 
4059         if ((pattern
[0] == '*' && pattern
[1] == '\0') || 
4060             stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) { 
4061             if (expireIfNeeded(c
->db
,keyobj
) == 0) { 
4062                 addReplyBulk(c
,keyobj
); 
4067     dictReleaseIterator(di
); 
4068     lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
); 
4071 static void dbsizeCommand(redisClient 
*c
) { 
4073         sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
))); 
4076 static void lastsaveCommand(redisClient 
*c
) { 
4078         sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
)); 
4081 static void typeCommand(redisClient 
*c
) { 
4085     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
4090         case REDIS_STRING
: type 
= "+string"; break; 
4091         case REDIS_LIST
: type 
= "+list"; break; 
4092         case REDIS_SET
: type 
= "+set"; break; 
4093         case REDIS_ZSET
: type 
= "+zset"; break; 
4094         case REDIS_HASH
: type 
= "+hash"; break; 
4095         default: type 
= "+unknown"; break; 
4098     addReplySds(c
,sdsnew(type
)); 
4099     addReply(c
,shared
.crlf
); 
4102 static void saveCommand(redisClient 
*c
) { 
4103     if (server
.bgsavechildpid 
!= -1) { 
4104         addReplySds(c
,sdsnew("-ERR background save in progress\r\n")); 
4107     if (rdbSave(server
.dbfilename
) == REDIS_OK
) { 
4108         addReply(c
,shared
.ok
); 
4110         addReply(c
,shared
.err
); 
4114 static void bgsaveCommand(redisClient 
*c
) { 
4115     if (server
.bgsavechildpid 
!= -1) { 
4116         addReplySds(c
,sdsnew("-ERR background save already in progress\r\n")); 
4119     if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) { 
4120         char *status 
= "+Background saving started\r\n"; 
4121         addReplySds(c
,sdsnew(status
)); 
4123         addReply(c
,shared
.err
); 
4127 static void shutdownCommand(redisClient 
*c
) { 
4128     redisLog(REDIS_WARNING
,"User requested shutdown, saving DB..."); 
4129     /* Kill the saving child if there is a background saving in progress. 
4130        We want to avoid race conditions, for instance our saving child may 
4131        overwrite the synchronous saving did by SHUTDOWN. */ 
4132     if (server
.bgsavechildpid 
!= -1) { 
4133         redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!"); 
4134         kill(server
.bgsavechildpid
,SIGKILL
); 
4135         rdbRemoveTempFile(server
.bgsavechildpid
); 
4137     if (server
.appendonly
) { 
4138         /* Append only file: fsync() the AOF and exit */ 
4139         fsync(server
.appendfd
); 
4140         if (server
.vm_enabled
) unlink(server
.vm_swap_file
); 
4143         /* Snapshotting. Perform a SYNC SAVE and exit */ 
4144         if (rdbSave(server
.dbfilename
) == REDIS_OK
) { 
4145             if (server
.daemonize
) 
4146                 unlink(server
.pidfile
); 
4147             redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory()); 
4148             redisLog(REDIS_WARNING
,"Server exit now, bye bye..."); 
4149             if (server
.vm_enabled
) unlink(server
.vm_swap_file
); 
4152             /* Ooops.. error saving! The best we can do is to continue 
4153              * operating. Note that if there was a background saving process, 
4154              * in the next cron() Redis will be notified that the background 
4155              * saving aborted, handling special stuff like slaves pending for 
4156              * synchronization... */ 
4157             redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");  
4159                 sdsnew("-ERR can't quit, problems saving the DB\r\n")); 
4164 static void renameGenericCommand(redisClient 
*c
, int nx
) { 
4167     /* To use the same key as src and dst is probably an error */ 
4168     if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) { 
4169         addReply(c
,shared
.sameobjecterr
); 
4173     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
) 
4177     deleteIfVolatile(c
->db
,c
->argv
[2]); 
4178     if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) { 
4181             addReply(c
,shared
.czero
); 
4184         dictReplace(c
->db
->dict
,c
->argv
[2],o
); 
4186         incrRefCount(c
->argv
[2]); 
4188     deleteKey(c
->db
,c
->argv
[1]); 
4190     addReply(c
,nx 
? shared
.cone 
: shared
.ok
); 
4193 static void renameCommand(redisClient 
*c
) { 
4194     renameGenericCommand(c
,0); 
4197 static void renamenxCommand(redisClient 
*c
) { 
4198     renameGenericCommand(c
,1); 
4201 static void moveCommand(redisClient 
*c
) { 
4206     /* Obtain source and target DB pointers */ 
4209     if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) { 
4210         addReply(c
,shared
.outofrangeerr
); 
4214     selectDb(c
,srcid
); /* Back to the source DB */ 
4216     /* If the user is moving using as target the same 
4217      * DB as the source DB it is probably an error. */ 
4219         addReply(c
,shared
.sameobjecterr
); 
4223     /* Check if the element exists and get a reference */ 
4224     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4226         addReply(c
,shared
.czero
); 
4230     /* Try to add the element to the target DB */ 
4231     deleteIfVolatile(dst
,c
->argv
[1]); 
4232     if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) { 
4233         addReply(c
,shared
.czero
); 
4236     incrRefCount(c
->argv
[1]); 
4239     /* OK! key moved, free the entry in the source DB */ 
4240     deleteKey(src
,c
->argv
[1]); 
4242     addReply(c
,shared
.cone
); 
4245 /* =================================== Lists ================================ */ 
4246 static void pushGenericCommand(redisClient 
*c
, int where
) { 
4250     lobj 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4252         if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) { 
4253             addReply(c
,shared
.cone
); 
4256         lobj 
= createListObject(); 
4258         if (where 
== REDIS_HEAD
) { 
4259             listAddNodeHead(list
,c
->argv
[2]); 
4261             listAddNodeTail(list
,c
->argv
[2]); 
4263         dictAdd(c
->db
->dict
,c
->argv
[1],lobj
); 
4264         incrRefCount(c
->argv
[1]); 
4265         incrRefCount(c
->argv
[2]); 
4267         if (lobj
->type 
!= REDIS_LIST
) { 
4268             addReply(c
,shared
.wrongtypeerr
); 
4271         if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) { 
4272             addReply(c
,shared
.cone
); 
4276         if (where 
== REDIS_HEAD
) { 
4277             listAddNodeHead(list
,c
->argv
[2]); 
4279             listAddNodeTail(list
,c
->argv
[2]); 
4281         incrRefCount(c
->argv
[2]); 
4284     addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",listLength(list
))); 
4287 static void lpushCommand(redisClient 
*c
) { 
4288     pushGenericCommand(c
,REDIS_HEAD
); 
4291 static void rpushCommand(redisClient 
*c
) { 
4292     pushGenericCommand(c
,REDIS_TAIL
); 
4295 static void llenCommand(redisClient 
*c
) { 
4299     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
4300         checkType(c
,o
,REDIS_LIST
)) return; 
4303     addReplyUlong(c
,listLength(l
)); 
4306 static void lindexCommand(redisClient 
*c
) { 
4308     int index 
= atoi(c
->argv
[2]->ptr
); 
4312     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
4313         checkType(c
,o
,REDIS_LIST
)) return; 
4316     ln 
= listIndex(list
, index
); 
4318         addReply(c
,shared
.nullbulk
); 
4320         robj 
*ele 
= listNodeValue(ln
); 
4321         addReplyBulk(c
,ele
); 
4325 static void lsetCommand(redisClient 
*c
) { 
4327     int index 
= atoi(c
->argv
[2]->ptr
); 
4331     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL 
|| 
4332         checkType(c
,o
,REDIS_LIST
)) return; 
4335     ln 
= listIndex(list
, index
); 
4337         addReply(c
,shared
.outofrangeerr
); 
4339         robj 
*ele 
= listNodeValue(ln
); 
4342         listNodeValue(ln
) = c
->argv
[3]; 
4343         incrRefCount(c
->argv
[3]); 
4344         addReply(c
,shared
.ok
); 
4349 static void popGenericCommand(redisClient 
*c
, int where
) { 
4354     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
4355         checkType(c
,o
,REDIS_LIST
)) return; 
4358     if (where 
== REDIS_HEAD
) 
4359         ln 
= listFirst(list
); 
4361         ln 
= listLast(list
); 
4364         addReply(c
,shared
.nullbulk
); 
4366         robj 
*ele 
= listNodeValue(ln
); 
4367         addReplyBulk(c
,ele
); 
4368         listDelNode(list
,ln
); 
4373 static void lpopCommand(redisClient 
*c
) { 
4374     popGenericCommand(c
,REDIS_HEAD
); 
4377 static void rpopCommand(redisClient 
*c
) { 
4378     popGenericCommand(c
,REDIS_TAIL
); 
4381 static void lrangeCommand(redisClient 
*c
) { 
4383     int start 
= atoi(c
->argv
[2]->ptr
); 
4384     int end 
= atoi(c
->argv
[3]->ptr
); 
4391     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL 
|| 
4392         checkType(c
,o
,REDIS_LIST
)) return; 
4394     llen 
= listLength(list
); 
4396     /* convert negative indexes */ 
4397     if (start 
< 0) start 
= llen
+start
; 
4398     if (end 
< 0) end 
= llen
+end
; 
4399     if (start 
< 0) start 
= 0; 
4400     if (end 
< 0) end 
= 0; 
4402     /* indexes sanity checks */ 
4403     if (start 
> end 
|| start 
>= llen
) { 
4404         /* Out of range start or start > end result in empty list */ 
4405         addReply(c
,shared
.emptymultibulk
); 
4408     if (end 
>= llen
) end 
= llen
-1; 
4409     rangelen 
= (end
-start
)+1; 
4411     /* Return the result in form of a multi-bulk reply */ 
4412     ln 
= listIndex(list
, start
); 
4413     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
)); 
4414     for (j 
= 0; j 
< rangelen
; j
++) { 
4415         ele 
= listNodeValue(ln
); 
4416         addReplyBulk(c
,ele
); 
4421 static void ltrimCommand(redisClient 
*c
) { 
4423     int start 
= atoi(c
->argv
[2]->ptr
); 
4424     int end 
= atoi(c
->argv
[3]->ptr
); 
4426     int j
, ltrim
, rtrim
; 
4430     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL 
|| 
4431         checkType(c
,o
,REDIS_LIST
)) return; 
4433     llen 
= listLength(list
); 
4435     /* convert negative indexes */ 
4436     if (start 
< 0) start 
= llen
+start
; 
4437     if (end 
< 0) end 
= llen
+end
; 
4438     if (start 
< 0) start 
= 0; 
4439     if (end 
< 0) end 
= 0; 
4441     /* indexes sanity checks */ 
4442     if (start 
> end 
|| start 
>= llen
) { 
4443         /* Out of range start or start > end result in empty list */ 
4447         if (end 
>= llen
) end 
= llen
-1; 
4452     /* Remove list elements to perform the trim */ 
4453     for (j 
= 0; j 
< ltrim
; j
++) { 
4454         ln 
= listFirst(list
); 
4455         listDelNode(list
,ln
); 
4457     for (j 
= 0; j 
< rtrim
; j
++) { 
4458         ln 
= listLast(list
); 
4459         listDelNode(list
,ln
); 
4462     addReply(c
,shared
.ok
); 
4465 static void lremCommand(redisClient 
*c
) { 
4468     listNode 
*ln
, *next
; 
4469     int toremove 
= atoi(c
->argv
[2]->ptr
); 
4473     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
4474         checkType(c
,o
,REDIS_LIST
)) return; 
4478         toremove 
= -toremove
; 
4481     ln 
= fromtail 
? list
->tail 
: list
->head
; 
4483         robj 
*ele 
= listNodeValue(ln
); 
4485         next 
= fromtail 
? ln
->prev 
: ln
->next
; 
4486         if (compareStringObjects(ele
,c
->argv
[3]) == 0) { 
4487             listDelNode(list
,ln
); 
4490             if (toremove 
&& removed 
== toremove
) break; 
4494     addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
)); 
4497 /* This is the semantic of this command: 
4498  *  RPOPLPUSH srclist dstlist: 
4499  *   IF LLEN(srclist) > 0 
4500  *     element = RPOP srclist 
4501  *     LPUSH dstlist element 
4508  * The idea is to be able to get an element from a list in a reliable way 
4509  * since the element is not just returned but pushed against another list 
4510  * as well. This command was originally proposed by Ezra Zygmuntowicz. 
4512 static void rpoplpushcommand(redisClient 
*c
) { 
4517     if ((sobj 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
4518         checkType(c
,sobj
,REDIS_LIST
)) return; 
4519     srclist 
= sobj
->ptr
; 
4520     ln 
= listLast(srclist
); 
4523         addReply(c
,shared
.nullbulk
); 
4525         robj 
*dobj 
= lookupKeyWrite(c
->db
,c
->argv
[2]); 
4526         robj 
*ele 
= listNodeValue(ln
); 
4529         if (dobj 
&& dobj
->type 
!= REDIS_LIST
) { 
4530             addReply(c
,shared
.wrongtypeerr
); 
4534         /* Add the element to the target list (unless it's directly 
4535          * passed to some BLPOP-ing client */ 
4536         if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) { 
4538                 /* Create the list if the key does not exist */ 
4539                 dobj 
= createListObject(); 
4540                 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
); 
4541                 incrRefCount(c
->argv
[2]); 
4543             dstlist 
= dobj
->ptr
; 
4544             listAddNodeHead(dstlist
,ele
); 
4548         /* Send the element to the client as reply as well */ 
4549         addReplyBulk(c
,ele
); 
4551         /* Finally remove the element from the source list */ 
4552         listDelNode(srclist
,ln
); 
4557 /* ==================================== Sets ================================ */ 
4559 static void saddCommand(redisClient 
*c
) { 
4562     set 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4564         set 
= createSetObject(); 
4565         dictAdd(c
->db
->dict
,c
->argv
[1],set
); 
4566         incrRefCount(c
->argv
[1]); 
4568         if (set
->type 
!= REDIS_SET
) { 
4569             addReply(c
,shared
.wrongtypeerr
); 
4573     if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) { 
4574         incrRefCount(c
->argv
[2]); 
4576         addReply(c
,shared
.cone
); 
4578         addReply(c
,shared
.czero
); 
4582 static void sremCommand(redisClient 
*c
) { 
4585     if ((set 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
4586         checkType(c
,set
,REDIS_SET
)) return; 
4588     if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) { 
4590         if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
); 
4591         addReply(c
,shared
.cone
); 
4593         addReply(c
,shared
.czero
); 
4597 static void smoveCommand(redisClient 
*c
) { 
4598     robj 
*srcset
, *dstset
; 
4600     srcset 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4601     dstset 
= lookupKeyWrite(c
->db
,c
->argv
[2]); 
4603     /* If the source key does not exist return 0, if it's of the wrong type 
4605     if (srcset 
== NULL 
|| srcset
->type 
!= REDIS_SET
) { 
4606         addReply(c
, srcset 
? shared
.wrongtypeerr 
: shared
.czero
); 
4609     /* Error if the destination key is not a set as well */ 
4610     if (dstset 
&& dstset
->type 
!= REDIS_SET
) { 
4611         addReply(c
,shared
.wrongtypeerr
); 
4614     /* Remove the element from the source set */ 
4615     if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) { 
4616         /* Key not found in the src set! return zero */ 
4617         addReply(c
,shared
.czero
); 
4621     /* Add the element to the destination set */ 
4623         dstset 
= createSetObject(); 
4624         dictAdd(c
->db
->dict
,c
->argv
[2],dstset
); 
4625         incrRefCount(c
->argv
[2]); 
4627     if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
) 
4628         incrRefCount(c
->argv
[3]); 
4629     addReply(c
,shared
.cone
); 
4632 static void sismemberCommand(redisClient 
*c
) { 
4635     if ((set 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
4636         checkType(c
,set
,REDIS_SET
)) return; 
4638     if (dictFind(set
->ptr
,c
->argv
[2])) 
4639         addReply(c
,shared
.cone
); 
4641         addReply(c
,shared
.czero
); 
4644 static void scardCommand(redisClient 
*c
) { 
4648     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
4649         checkType(c
,o
,REDIS_SET
)) return; 
4652     addReplyUlong(c
,dictSize(s
)); 
4655 static void spopCommand(redisClient 
*c
) { 
4659     if ((set 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
4660         checkType(c
,set
,REDIS_SET
)) return; 
4662     de 
= dictGetRandomKey(set
->ptr
); 
4664         addReply(c
,shared
.nullbulk
); 
4666         robj 
*ele 
= dictGetEntryKey(de
); 
4668         addReplyBulk(c
,ele
); 
4669         dictDelete(set
->ptr
,ele
); 
4670         if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
); 
4675 static void srandmemberCommand(redisClient 
*c
) { 
4679     if ((set 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
4680         checkType(c
,set
,REDIS_SET
)) return; 
4682     de 
= dictGetRandomKey(set
->ptr
); 
4684         addReply(c
,shared
.nullbulk
); 
4686         robj 
*ele 
= dictGetEntryKey(de
); 
4688         addReplyBulk(c
,ele
); 
4692 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) { 
4693     dict 
**d1 
= (void*) s1
, **d2 
= (void*) s2
; 
4695     return dictSize(*d1
)-dictSize(*d2
); 
4698 static void sinterGenericCommand(redisClient 
*c
, robj 
**setskeys
, unsigned long setsnum
, robj 
*dstkey
) { 
4699     dict 
**dv 
= zmalloc(sizeof(dict
*)*setsnum
); 
4702     robj 
*lenobj 
= NULL
, *dstset 
= NULL
; 
4703     unsigned long j
, cardinality 
= 0; 
4705     for (j 
= 0; j 
< setsnum
; j
++) { 
4709                     lookupKeyWrite(c
->db
,setskeys
[j
]) : 
4710                     lookupKeyRead(c
->db
,setskeys
[j
]); 
4714                 if (deleteKey(c
->db
,dstkey
)) 
4716                 addReply(c
,shared
.czero
); 
4718                 addReply(c
,shared
.nullmultibulk
); 
4722         if (setobj
->type 
!= REDIS_SET
) { 
4724             addReply(c
,shared
.wrongtypeerr
); 
4727         dv
[j
] = setobj
->ptr
; 
4729     /* Sort sets from the smallest to largest, this will improve our 
4730      * algorithm's performace */ 
4731     qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
); 
4733     /* The first thing we should output is the total number of elements... 
4734      * since this is a multi-bulk write, but at this stage we don't know 
4735      * the intersection set size, so we use a trick, append an empty object 
4736      * to the output list and save the pointer to later modify it with the 
4739         lenobj 
= createObject(REDIS_STRING
,NULL
); 
4741         decrRefCount(lenobj
); 
4743         /* If we have a target key where to store the resulting set 
4744          * create this key with an empty set inside */ 
4745         dstset 
= createSetObject(); 
4748     /* Iterate all the elements of the first (smallest) set, and test 
4749      * the element against all the other sets, if at least one set does 
4750      * not include the element it is discarded */ 
4751     di 
= dictGetIterator(dv
[0]); 
4753     while((de 
= dictNext(di
)) != NULL
) { 
4756         for (j 
= 1; j 
< setsnum
; j
++) 
4757             if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break; 
4759             continue; /* at least one set does not contain the member */ 
4760         ele 
= dictGetEntryKey(de
); 
4762             addReplyBulk(c
,ele
); 
4765             dictAdd(dstset
->ptr
,ele
,NULL
); 
4769     dictReleaseIterator(di
); 
4772         /* Store the resulting set into the target */ 
4773         deleteKey(c
->db
,dstkey
); 
4774         dictAdd(c
->db
->dict
,dstkey
,dstset
); 
4775         incrRefCount(dstkey
); 
4779         lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
); 
4781         addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n", 
4782             dictSize((dict
*)dstset
->ptr
))); 
4788 static void sinterCommand(redisClient 
*c
) { 
4789     sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
); 
4792 static void sinterstoreCommand(redisClient 
*c
) { 
4793     sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]); 
4796 #define REDIS_OP_UNION 0 
4797 #define REDIS_OP_DIFF 1 
4798 #define REDIS_OP_INTER 2 
4800 static void sunionDiffGenericCommand(redisClient 
*c
, robj 
**setskeys
, int setsnum
, robj 
*dstkey
, int op
) { 
4801     dict 
**dv 
= zmalloc(sizeof(dict
*)*setsnum
); 
4804     robj 
*dstset 
= NULL
; 
4805     int j
, cardinality 
= 0; 
4807     for (j 
= 0; j 
< setsnum
; j
++) { 
4811                     lookupKeyWrite(c
->db
,setskeys
[j
]) : 
4812                     lookupKeyRead(c
->db
,setskeys
[j
]); 
4817         if (setobj
->type 
!= REDIS_SET
) { 
4819             addReply(c
,shared
.wrongtypeerr
); 
4822         dv
[j
] = setobj
->ptr
; 
4825     /* We need a temp set object to store our union. If the dstkey 
4826      * is not NULL (that is, we are inside an SUNIONSTORE operation) then 
4827      * this set object will be the resulting object to set into the target key*/ 
4828     dstset 
= createSetObject(); 
4830     /* Iterate all the elements of all the sets, add every element a single 
4831      * time to the result set */ 
4832     for (j 
= 0; j 
< setsnum
; j
++) { 
4833         if (op 
== REDIS_OP_DIFF 
&& j 
== 0 && !dv
[j
]) break; /* result set is empty */ 
4834         if (!dv
[j
]) continue; /* non existing keys are like empty sets */ 
4836         di 
= dictGetIterator(dv
[j
]); 
4838         while((de 
= dictNext(di
)) != NULL
) { 
4841             /* dictAdd will not add the same element multiple times */ 
4842             ele 
= dictGetEntryKey(de
); 
4843             if (op 
== REDIS_OP_UNION 
|| j 
== 0) { 
4844                 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) { 
4848             } else if (op 
== REDIS_OP_DIFF
) { 
4849                 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) { 
4854         dictReleaseIterator(di
); 
4856         if (op 
== REDIS_OP_DIFF 
&& cardinality 
== 0) break; /* result set is empty */ 
4859     /* Output the content of the resulting set, if not in STORE mode */ 
4861         addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
)); 
4862         di 
= dictGetIterator(dstset
->ptr
); 
4863         while((de 
= dictNext(di
)) != NULL
) { 
4866             ele 
= dictGetEntryKey(de
); 
4867             addReplyBulk(c
,ele
); 
4869         dictReleaseIterator(di
); 
4871         /* If we have a target key where to store the resulting set 
4872          * create this key with the result set inside */ 
4873         deleteKey(c
->db
,dstkey
); 
4874         dictAdd(c
->db
->dict
,dstkey
,dstset
); 
4875         incrRefCount(dstkey
); 
4880         decrRefCount(dstset
); 
4882         addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n", 
4883             dictSize((dict
*)dstset
->ptr
))); 
4889 static void sunionCommand(redisClient 
*c
) { 
4890     sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
); 
4893 static void sunionstoreCommand(redisClient 
*c
) { 
4894     sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
); 
4897 static void sdiffCommand(redisClient 
*c
) { 
4898     sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
); 
4901 static void sdiffstoreCommand(redisClient 
*c
) { 
4902     sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
); 
4905 /* ==================================== ZSets =============================== */ 
4907 /* ZSETs are ordered sets using two data structures to hold the same elements 
4908  * in order to get O(log(N)) INSERT and REMOVE operations into a sorted 
4911  * The elements are added to an hash table mapping Redis objects to scores. 
4912  * At the same time the elements are added to a skip list mapping scores 
4913  * to Redis objects (so objects are sorted by scores in this "view"). */ 
4915 /* This skiplist implementation is almost a C translation of the original 
4916  * algorithm described by William Pugh in "Skip Lists: A Probabilistic 
4917  * Alternative to Balanced Trees", modified in three ways: 
4918  * a) this implementation allows for repeated values. 
4919  * b) the comparison is not just by key (our 'score') but by satellite data. 
4920  * c) there is a back pointer, so it's a doubly linked list with the back 
4921  * pointers being only at "level 1". This allows to traverse the list 
4922  * from tail to head, useful for ZREVRANGE. */ 
4924 static zskiplistNode 
*zslCreateNode(int level
, double score
, robj 
*obj
) { 
4925     zskiplistNode 
*zn 
= zmalloc(sizeof(*zn
)); 
4927     zn
->forward 
= zmalloc(sizeof(zskiplistNode
*) * level
); 
4929         zn
->span 
= zmalloc(sizeof(unsigned int) * (level 
- 1)); 
4935 static zskiplist 
*zslCreate(void) { 
4939     zsl 
= zmalloc(sizeof(*zsl
)); 
4942     zsl
->header 
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
); 
4943     for (j 
= 0; j 
< ZSKIPLIST_MAXLEVEL
; j
++) { 
4944         zsl
->header
->forward
[j
] = NULL
; 
4946         /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */ 
4947         if (j 
< ZSKIPLIST_MAXLEVEL
-1) 
4948             zsl
->header
->span
[j
] = 0; 
4950     zsl
->header
->backward 
= NULL
; 
4955 static void zslFreeNode(zskiplistNode 
*node
) { 
4956     decrRefCount(node
->obj
); 
4957     zfree(node
->forward
); 
4962 static void zslFree(zskiplist 
*zsl
) { 
4963     zskiplistNode 
*node 
= zsl
->header
->forward
[0], *next
; 
4965     zfree(zsl
->header
->forward
); 
4966     zfree(zsl
->header
->span
); 
4969         next 
= node
->forward
[0]; 
4976 static int zslRandomLevel(void) { 
4978     while ((random()&0xFFFF) < (ZSKIPLIST_P 
* 0xFFFF)) 
4983 static void zslInsert(zskiplist 
*zsl
, double score
, robj 
*obj
) { 
4984     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
4985     unsigned int rank
[ZSKIPLIST_MAXLEVEL
]; 
4989     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
4990         /* store rank that is crossed to reach the insert position */ 
4991         rank
[i
] = i 
== (zsl
->level
-1) ? 0 : rank
[i
+1]; 
4993         while (x
->forward
[i
] && 
4994             (x
->forward
[i
]->score 
< score 
|| 
4995                 (x
->forward
[i
]->score 
== score 
&& 
4996                 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) { 
4997             rank
[i
] += i 
> 0 ? x
->span
[i
-1] : 1; 
5002     /* we assume the key is not already inside, since we allow duplicated 
5003      * scores, and the re-insertion of score and redis object should never 
5004      * happpen since the caller of zslInsert() should test in the hash table 
5005      * if the element is already inside or not. */ 
5006     level 
= zslRandomLevel(); 
5007     if (level 
> zsl
->level
) { 
5008         for (i 
= zsl
->level
; i 
< level
; i
++) { 
5010             update
[i
] = zsl
->header
; 
5011             update
[i
]->span
[i
-1] = zsl
->length
; 
5015     x 
= zslCreateNode(level
,score
,obj
); 
5016     for (i 
= 0; i 
< level
; i
++) { 
5017         x
->forward
[i
] = update
[i
]->forward
[i
]; 
5018         update
[i
]->forward
[i
] = x
; 
5020         /* update span covered by update[i] as x is inserted here */ 
5022             x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]); 
5023             update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1; 
5027     /* increment span for untouched levels */ 
5028     for (i 
= level
; i 
< zsl
->level
; i
++) { 
5029         update
[i
]->span
[i
-1]++; 
5032     x
->backward 
= (update
[0] == zsl
->header
) ? NULL 
: update
[0]; 
5034         x
->forward
[0]->backward 
= x
; 
5040 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */ 
5041 void zslDeleteNode(zskiplist 
*zsl
, zskiplistNode 
*x
, zskiplistNode 
**update
) { 
5043     for (i 
= 0; i 
< zsl
->level
; i
++) { 
5044         if (update
[i
]->forward
[i
] == x
) { 
5046                 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1; 
5048             update
[i
]->forward
[i
] = x
->forward
[i
]; 
5050             /* invariant: i > 0, because update[0]->forward[0] 
5051              * is always equal to x */ 
5052             update
[i
]->span
[i
-1] -= 1; 
5055     if (x
->forward
[0]) { 
5056         x
->forward
[0]->backward 
= x
->backward
; 
5058         zsl
->tail 
= x
->backward
; 
5060     while(zsl
->level 
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
) 
5065 /* Delete an element with matching score/object from the skiplist. */ 
5066 static int zslDelete(zskiplist 
*zsl
, double score
, robj 
*obj
) { 
5067     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
5071     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5072         while (x
->forward
[i
] && 
5073             (x
->forward
[i
]->score 
< score 
|| 
5074                 (x
->forward
[i
]->score 
== score 
&& 
5075                 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) 
5079     /* We may have multiple elements with the same score, what we need 
5080      * is to find the element with both the right score and object. */ 
5082     if (x 
&& score 
== x
->score 
&& compareStringObjects(x
->obj
,obj
) == 0) { 
5083         zslDeleteNode(zsl
, x
, update
); 
5087         return 0; /* not found */ 
5089     return 0; /* not found */ 
5092 /* Delete all the elements with score between min and max from the skiplist. 
5093  * Min and mx are inclusive, so a score >= min || score <= max is deleted. 
5094  * Note that this function takes the reference to the hash table view of the 
5095  * sorted set, in order to remove the elements from the hash table too. */ 
5096 static unsigned long zslDeleteRangeByScore(zskiplist 
*zsl
, double min
, double max
, dict 
*dict
) { 
5097     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
5098     unsigned long removed 
= 0; 
5102     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5103         while (x
->forward
[i
] && x
->forward
[i
]->score 
< min
) 
5107     /* We may have multiple elements with the same score, what we need 
5108      * is to find the element with both the right score and object. */ 
5110     while (x 
&& x
->score 
<= max
) { 
5111         zskiplistNode 
*next 
= x
->forward
[0]; 
5112         zslDeleteNode(zsl
, x
, update
); 
5113         dictDelete(dict
,x
->obj
); 
5118     return removed
; /* not found */ 
5121 /* Delete all the elements with rank between start and end from the skiplist. 
5122  * Start and end are inclusive. Note that start and end need to be 1-based */ 
5123 static unsigned long zslDeleteRangeByRank(zskiplist 
*zsl
, unsigned int start
, unsigned int end
, dict 
*dict
) { 
5124     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
5125     unsigned long traversed 
= 0, removed 
= 0; 
5129     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5130         while (x
->forward
[i
] && (traversed 
+ (i 
> 0 ? x
->span
[i
-1] : 1)) < start
) { 
5131             traversed 
+= i 
> 0 ? x
->span
[i
-1] : 1; 
5139     while (x 
&& traversed 
<= end
) { 
5140         zskiplistNode 
*next 
= x
->forward
[0]; 
5141         zslDeleteNode(zsl
, x
, update
); 
5142         dictDelete(dict
,x
->obj
); 
5151 /* Find the first node having a score equal or greater than the specified one. 
5152  * Returns NULL if there is no match. */ 
5153 static zskiplistNode 
*zslFirstWithScore(zskiplist 
*zsl
, double score
) { 
5158     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5159         while (x
->forward
[i
] && x
->forward
[i
]->score 
< score
) 
5162     /* We may have multiple elements with the same score, what we need 
5163      * is to find the element with both the right score and object. */ 
5164     return x
->forward
[0]; 
5167 /* Find the rank for an element by both score and key. 
5168  * Returns 0 when the element cannot be found, rank otherwise. 
5169  * Note that the rank is 1-based due to the span of zsl->header to the 
5171 static unsigned long zslGetRank(zskiplist 
*zsl
, double score
, robj 
*o
) { 
5173     unsigned long rank 
= 0; 
5177     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5178         while (x
->forward
[i
] && 
5179             (x
->forward
[i
]->score 
< score 
|| 
5180                 (x
->forward
[i
]->score 
== score 
&& 
5181                 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) { 
5182             rank 
+= i 
> 0 ? x
->span
[i
-1] : 1; 
5186         /* x might be equal to zsl->header, so test if obj is non-NULL */ 
5187         if (x
->obj 
&& compareStringObjects(x
->obj
,o
) == 0) { 
5194 /* Finds an element by its rank. The rank argument needs to be 1-based. */ 
5195 zskiplistNode
* zslGetElementByRank(zskiplist 
*zsl
, unsigned long rank
) { 
5197     unsigned long traversed 
= 0; 
5201     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5202         while (x
->forward
[i
] && (traversed 
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
) 
5204             traversed 
+= i 
> 0 ? x
->span
[i
-1] : 1; 
5207         if (traversed 
== rank
) { 
5214 /* The actual Z-commands implementations */ 
5216 /* This generic command implements both ZADD and ZINCRBY. 
5217  * scoreval is the score if the operation is a ZADD (doincrement == 0) or 
5218  * the increment if the operation is a ZINCRBY (doincrement == 1). */ 
5219 static void zaddGenericCommand(redisClient 
*c
, robj 
*key
, robj 
*ele
, double scoreval
, int doincrement
) { 
5224     zsetobj 
= lookupKeyWrite(c
->db
,key
); 
5225     if (zsetobj 
== NULL
) { 
5226         zsetobj 
= createZsetObject(); 
5227         dictAdd(c
->db
->dict
,key
,zsetobj
); 
5230         if (zsetobj
->type 
!= REDIS_ZSET
) { 
5231             addReply(c
,shared
.wrongtypeerr
); 
5237     /* Ok now since we implement both ZADD and ZINCRBY here the code 
5238      * needs to handle the two different conditions. It's all about setting 
5239      * '*score', that is, the new score to set, to the right value. */ 
5240     score 
= zmalloc(sizeof(double)); 
5244         /* Read the old score. If the element was not present starts from 0 */ 
5245         de 
= dictFind(zs
->dict
,ele
); 
5247             double *oldscore 
= dictGetEntryVal(de
); 
5248             *score 
= *oldscore 
+ scoreval
; 
5256     /* What follows is a simple remove and re-insert operation that is common 
5257      * to both ZADD and ZINCRBY... */ 
5258     if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) { 
5259         /* case 1: New element */ 
5260         incrRefCount(ele
); /* added to hash */ 
5261         zslInsert(zs
->zsl
,*score
,ele
); 
5262         incrRefCount(ele
); /* added to skiplist */ 
5265             addReplyDouble(c
,*score
); 
5267             addReply(c
,shared
.cone
); 
5272         /* case 2: Score update operation */ 
5273         de 
= dictFind(zs
->dict
,ele
); 
5274         redisAssert(de 
!= NULL
); 
5275         oldscore 
= dictGetEntryVal(de
); 
5276         if (*score 
!= *oldscore
) { 
5279             /* Remove and insert the element in the skip list with new score */ 
5280             deleted 
= zslDelete(zs
->zsl
,*oldscore
,ele
); 
5281             redisAssert(deleted 
!= 0); 
5282             zslInsert(zs
->zsl
,*score
,ele
); 
5284             /* Update the score in the hash table */ 
5285             dictReplace(zs
->dict
,ele
,score
); 
5291             addReplyDouble(c
,*score
); 
5293             addReply(c
,shared
.czero
); 
5297 static void zaddCommand(redisClient 
*c
) { 
5300     scoreval 
= strtod(c
->argv
[2]->ptr
,NULL
); 
5301     zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0); 
5304 static void zincrbyCommand(redisClient 
*c
) { 
5307     scoreval 
= strtod(c
->argv
[2]->ptr
,NULL
); 
5308     zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1); 
5311 static void zremCommand(redisClient 
*c
) { 
5318     if ((zsetobj 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
5319         checkType(c
,zsetobj
,REDIS_ZSET
)) return; 
5322     de 
= dictFind(zs
->dict
,c
->argv
[2]); 
5324         addReply(c
,shared
.czero
); 
5327     /* Delete from the skiplist */ 
5328     oldscore 
= dictGetEntryVal(de
); 
5329     deleted 
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]); 
5330     redisAssert(deleted 
!= 0); 
5332     /* Delete from the hash table */ 
5333     dictDelete(zs
->dict
,c
->argv
[2]); 
5334     if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
); 
5336     addReply(c
,shared
.cone
); 
5339 static void zremrangebyscoreCommand(redisClient 
*c
) { 
5340     double min 
= strtod(c
->argv
[2]->ptr
,NULL
); 
5341     double max 
= strtod(c
->argv
[3]->ptr
,NULL
); 
5346     if ((zsetobj 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
5347         checkType(c
,zsetobj
,REDIS_ZSET
)) return; 
5350     deleted 
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
); 
5351     if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
); 
5352     server
.dirty 
+= deleted
; 
5353     addReplyLong(c
,deleted
); 
5356 static void zremrangebyrankCommand(redisClient 
*c
) { 
5357     int start 
= atoi(c
->argv
[2]->ptr
); 
5358     int end 
= atoi(c
->argv
[3]->ptr
); 
5364     if ((zsetobj 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
5365         checkType(c
,zsetobj
,REDIS_ZSET
)) return; 
5367     llen 
= zs
->zsl
->length
; 
5369     /* convert negative indexes */ 
5370     if (start 
< 0) start 
= llen
+start
; 
5371     if (end 
< 0) end 
= llen
+end
; 
5372     if (start 
< 0) start 
= 0; 
5373     if (end 
< 0) end 
= 0; 
5375     /* indexes sanity checks */ 
5376     if (start 
> end 
|| start 
>= llen
) { 
5377         addReply(c
,shared
.czero
); 
5380     if (end 
>= llen
) end 
= llen
-1; 
5382     /* increment start and end because zsl*Rank functions 
5383      * use 1-based rank */ 
5384     deleted 
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
); 
5385     if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
); 
5386     server
.dirty 
+= deleted
; 
5387     addReplyLong(c
, deleted
); 
5395 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) { 
5396     zsetopsrc 
*d1 
= (void*) s1
, *d2 
= (void*) s2
; 
5397     unsigned long size1
, size2
; 
5398     size1 
= d1
->dict 
? dictSize(d1
->dict
) : 0; 
5399     size2 
= d2
->dict 
? dictSize(d2
->dict
) : 0; 
5400     return size1 
- size2
; 
5403 #define REDIS_AGGR_SUM 1 
5404 #define REDIS_AGGR_MIN 2 
5405 #define REDIS_AGGR_MAX 3 
5407 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) { 
5408     if (aggregate 
== REDIS_AGGR_SUM
) { 
5409         *target 
= *target 
+ val
; 
5410     } else if (aggregate 
== REDIS_AGGR_MIN
) { 
5411         *target 
= val 
< *target 
? val 
: *target
; 
5412     } else if (aggregate 
== REDIS_AGGR_MAX
) { 
5413         *target 
= val 
> *target 
? val 
: *target
; 
5416         redisAssert(0 != 0); 
5420 static void zunionInterGenericCommand(redisClient 
*c
, robj 
*dstkey
, int op
) { 
5422     int aggregate 
= REDIS_AGGR_SUM
; 
5429     /* expect zsetnum input keys to be given */ 
5430     zsetnum 
= atoi(c
->argv
[2]->ptr
); 
5432         addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNION/ZINTER\r\n")); 
5436     /* test if the expected number of keys would overflow */ 
5437     if (3+zsetnum 
> c
->argc
) { 
5438         addReply(c
,shared
.syntaxerr
); 
5442     /* read keys to be used for input */ 
5443     src 
= zmalloc(sizeof(zsetopsrc
) * zsetnum
); 
5444     for (i 
= 0, j 
= 3; i 
< zsetnum
; i
++, j
++) { 
5445         robj 
*zsetobj 
= lookupKeyWrite(c
->db
,c
->argv
[j
]); 
5449             if (zsetobj
->type 
!= REDIS_ZSET
) { 
5451                 addReply(c
,shared
.wrongtypeerr
); 
5454             src
[i
].dict 
= ((zset
*)zsetobj
->ptr
)->dict
; 
5457         /* default all weights to 1 */ 
5458         src
[i
].weight 
= 1.0; 
5461     /* parse optional extra arguments */ 
5463         int remaining 
= c
->argc 
- j
; 
5466             if (remaining 
>= (zsetnum 
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) { 
5468                 for (i 
= 0; i 
< zsetnum
; i
++, j
++, remaining
--) { 
5469                     src
[i
].weight 
= strtod(c
->argv
[j
]->ptr
, NULL
); 
5471             } else if (remaining 
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) { 
5473                 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) { 
5474                     aggregate 
= REDIS_AGGR_SUM
; 
5475                 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) { 
5476                     aggregate 
= REDIS_AGGR_MIN
; 
5477                 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) { 
5478                     aggregate 
= REDIS_AGGR_MAX
; 
5481                     addReply(c
,shared
.syntaxerr
); 
5487                 addReply(c
,shared
.syntaxerr
); 
5493     /* sort sets from the smallest to largest, this will improve our 
5494      * algorithm's performance */ 
5495     qsort(src
,zsetnum
,sizeof(zsetopsrc
), qsortCompareZsetopsrcByCardinality
); 
5497     dstobj 
= createZsetObject(); 
5498     dstzset 
= dstobj
->ptr
; 
5500     if (op 
== REDIS_OP_INTER
) { 
5501         /* skip going over all entries if the smallest zset is NULL or empty */ 
5502         if (src
[0].dict 
&& dictSize(src
[0].dict
) > 0) { 
5503             /* precondition: as src[0].dict is non-empty and the zsets are ordered 
5504              * from small to large, all src[i > 0].dict are non-empty too */ 
5505             di 
= dictGetIterator(src
[0].dict
); 
5506             while((de 
= dictNext(di
)) != NULL
) { 
5507                 double *score 
= zmalloc(sizeof(double)), value
; 
5508                 *score 
= src
[0].weight 
* (*(double*)dictGetEntryVal(de
)); 
5510                 for (j 
= 1; j 
< zsetnum
; j
++) { 
5511                     dictEntry 
*other 
= dictFind(src
[j
].dict
,dictGetEntryKey(de
)); 
5513                         value 
= src
[j
].weight 
* (*(double*)dictGetEntryVal(other
)); 
5514                         zunionInterAggregate(score
, value
, aggregate
); 
5520                 /* skip entry when not present in every source dict */ 
5524                     robj 
*o 
= dictGetEntryKey(de
); 
5525                     dictAdd(dstzset
->dict
,o
,score
); 
5526                     incrRefCount(o
); /* added to dictionary */ 
5527                     zslInsert(dstzset
->zsl
,*score
,o
); 
5528                     incrRefCount(o
); /* added to skiplist */ 
5531             dictReleaseIterator(di
); 
5533     } else if (op 
== REDIS_OP_UNION
) { 
5534         for (i 
= 0; i 
< zsetnum
; i
++) { 
5535             if (!src
[i
].dict
) continue; 
5537             di 
= dictGetIterator(src
[i
].dict
); 
5538             while((de 
= dictNext(di
)) != NULL
) { 
5539                 /* skip key when already processed */ 
5540                 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue; 
5542                 double *score 
= zmalloc(sizeof(double)), value
; 
5543                 *score 
= src
[i
].weight 
* (*(double*)dictGetEntryVal(de
)); 
5545                 /* because the zsets are sorted by size, its only possible 
5546                  * for sets at larger indices to hold this entry */ 
5547                 for (j 
= (i
+1); j 
< zsetnum
; j
++) { 
5548                     dictEntry 
*other 
= dictFind(src
[j
].dict
,dictGetEntryKey(de
)); 
5550                         value 
= src
[j
].weight 
* (*(double*)dictGetEntryVal(other
)); 
5551                         zunionInterAggregate(score
, value
, aggregate
); 
5555                 robj 
*o 
= dictGetEntryKey(de
); 
5556                 dictAdd(dstzset
->dict
,o
,score
); 
5557                 incrRefCount(o
); /* added to dictionary */ 
5558                 zslInsert(dstzset
->zsl
,*score
,o
); 
5559                 incrRefCount(o
); /* added to skiplist */ 
5561             dictReleaseIterator(di
); 
5564         /* unknown operator */ 
5565         redisAssert(op 
== REDIS_OP_INTER 
|| op 
== REDIS_OP_UNION
); 
5568     deleteKey(c
->db
,dstkey
); 
5569     dictAdd(c
->db
->dict
,dstkey
,dstobj
); 
5570     incrRefCount(dstkey
); 
5572     addReplyLong(c
, dstzset
->zsl
->length
); 
5577 static void zunionCommand(redisClient 
*c
) { 
5578     zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
); 
5581 static void zinterCommand(redisClient 
*c
) { 
5582     zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
); 
5585 static void zrangeGenericCommand(redisClient 
*c
, int reverse
) { 
5587     int start 
= atoi(c
->argv
[2]->ptr
); 
5588     int end 
= atoi(c
->argv
[3]->ptr
); 
5597     if (c
->argc 
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) { 
5599     } else if (c
->argc 
>= 5) { 
5600         addReply(c
,shared
.syntaxerr
); 
5604     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL 
|| 
5605         checkType(c
,o
,REDIS_ZSET
)) return; 
5610     /* convert negative indexes */ 
5611     if (start 
< 0) start 
= llen
+start
; 
5612     if (end 
< 0) end 
= llen
+end
; 
5613     if (start 
< 0) start 
= 0; 
5614     if (end 
< 0) end 
= 0; 
5616     /* indexes sanity checks */ 
5617     if (start 
> end 
|| start 
>= llen
) { 
5618         /* Out of range start or start > end result in empty list */ 
5619         addReply(c
,shared
.emptymultibulk
); 
5622     if (end 
>= llen
) end 
= llen
-1; 
5623     rangelen 
= (end
-start
)+1; 
5625     /* check if starting point is trivial, before searching 
5626      * the element in log(N) time */ 
5628         ln 
= start 
== 0 ? zsl
->tail 
: zslGetElementByRank(zsl
, llen
-start
); 
5631             zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1); 
5634     /* Return the result in form of a multi-bulk reply */ 
5635     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n", 
5636         withscores 
? (rangelen
*2) : rangelen
)); 
5637     for (j 
= 0; j 
< rangelen
; j
++) { 
5639         addReplyBulk(c
,ele
); 
5641             addReplyDouble(c
,ln
->score
); 
5642         ln 
= reverse 
? ln
->backward 
: ln
->forward
[0]; 
5646 static void zrangeCommand(redisClient 
*c
) { 
5647     zrangeGenericCommand(c
,0); 
5650 static void zrevrangeCommand(redisClient 
*c
) { 
5651     zrangeGenericCommand(c
,1); 
5654 /* This command implements both ZRANGEBYSCORE and ZCOUNT. 
5655  * If justcount is non-zero, just the count is returned. */ 
5656 static void genericZrangebyscoreCommand(redisClient 
*c
, int justcount
) { 
5659     int minex 
= 0, maxex 
= 0; /* are min or max exclusive? */ 
5660     int offset 
= 0, limit 
= -1; 
5664     /* Parse the min-max interval. If one of the values is prefixed 
5665      * by the "(" character, it's considered "open". For instance 
5666      * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max 
5667      * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */ 
5668     if (((char*)c
->argv
[2]->ptr
)[0] == '(') { 
5669         min 
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
); 
5672         min 
= strtod(c
->argv
[2]->ptr
,NULL
); 
5674     if (((char*)c
->argv
[3]->ptr
)[0] == '(') { 
5675         max 
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
); 
5678         max 
= strtod(c
->argv
[3]->ptr
,NULL
); 
5681     /* Parse "WITHSCORES": note that if the command was called with 
5682      * the name ZCOUNT then we are sure that c->argc == 4, so we'll never 
5683      * enter the following paths to parse WITHSCORES and LIMIT. */ 
5684     if (c
->argc 
== 5 || c
->argc 
== 8) { 
5685         if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0) 
5690     if (c
->argc 
!= (4 + withscores
) && c
->argc 
!= (7 + withscores
)) 
5694             sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n")); 
5699     if (c
->argc 
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) { 
5700         addReply(c
,shared
.syntaxerr
); 
5702     } else if (c
->argc 
== (7 + withscores
)) { 
5703         offset 
= atoi(c
->argv
[5]->ptr
); 
5704         limit 
= atoi(c
->argv
[6]->ptr
); 
5705         if (offset 
< 0) offset 
= 0; 
5708     /* Ok, lookup the key and get the range */ 
5709     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
5711         addReply(c
,justcount 
? shared
.czero 
: shared
.nullmultibulk
); 
5713         if (o
->type 
!= REDIS_ZSET
) { 
5714             addReply(c
,shared
.wrongtypeerr
); 
5716             zset 
*zsetobj 
= o
->ptr
; 
5717             zskiplist 
*zsl 
= zsetobj
->zsl
; 
5719             robj 
*ele
, *lenobj 
= NULL
; 
5720             unsigned long rangelen 
= 0; 
5722             /* Get the first node with the score >= min, or with 
5723              * score > min if 'minex' is true. */ 
5724             ln 
= zslFirstWithScore(zsl
,min
); 
5725             while (minex 
&& ln 
&& ln
->score 
== min
) ln 
= ln
->forward
[0]; 
5728                 /* No element matching the speciifed interval */ 
5729                 addReply(c
,justcount 
? shared
.czero 
: shared
.emptymultibulk
); 
5733             /* We don't know in advance how many matching elements there 
5734              * are in the list, so we push this object that will represent 
5735              * the multi-bulk length in the output buffer, and will "fix" 
5738                 lenobj 
= createObject(REDIS_STRING
,NULL
); 
5740                 decrRefCount(lenobj
); 
5743             while(ln 
&& (maxex 
? (ln
->score 
< max
) : (ln
->score 
<= max
))) { 
5746                     ln 
= ln
->forward
[0]; 
5749                 if (limit 
== 0) break; 
5752                     addReplyBulk(c
,ele
); 
5754                         addReplyDouble(c
,ln
->score
); 
5756                 ln 
= ln
->forward
[0]; 
5758                 if (limit 
> 0) limit
--; 
5761                 addReplyLong(c
,(long)rangelen
); 
5763                 lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n", 
5764                      withscores 
? (rangelen
*2) : rangelen
); 
5770 static void zrangebyscoreCommand(redisClient 
*c
) { 
5771     genericZrangebyscoreCommand(c
,0); 
5774 static void zcountCommand(redisClient 
*c
) { 
5775     genericZrangebyscoreCommand(c
,1); 
5778 static void zcardCommand(redisClient 
*c
) { 
5782     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
5783         checkType(c
,o
,REDIS_ZSET
)) return; 
5786     addReplyUlong(c
,zs
->zsl
->length
); 
5789 static void zscoreCommand(redisClient 
*c
) { 
5794     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
5795         checkType(c
,o
,REDIS_ZSET
)) return; 
5798     de 
= dictFind(zs
->dict
,c
->argv
[2]); 
5800         addReply(c
,shared
.nullbulk
); 
5802         double *score 
= dictGetEntryVal(de
); 
5804         addReplyDouble(c
,*score
); 
5808 static void zrankGenericCommand(redisClient 
*c
, int reverse
) { 
5816     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
5817         checkType(c
,o
,REDIS_ZSET
)) return; 
5821     de 
= dictFind(zs
->dict
,c
->argv
[2]); 
5823         addReply(c
,shared
.nullbulk
); 
5827     score 
= dictGetEntryVal(de
); 
5828     rank 
= zslGetRank(zsl
, *score
, c
->argv
[2]); 
5831             addReplyLong(c
, zsl
->length 
- rank
); 
5833             addReplyLong(c
, rank
-1); 
5836         addReply(c
,shared
.nullbulk
); 
5840 static void zrankCommand(redisClient 
*c
) { 
5841     zrankGenericCommand(c
, 0); 
5844 static void zrevrankCommand(redisClient 
*c
) { 
5845     zrankGenericCommand(c
, 1); 
5848 /* =================================== Hashes =============================== */ 
5849 static void hsetCommand(redisClient 
*c
) { 
5851     robj 
*o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
5854         o 
= createHashObject(); 
5855         dictAdd(c
->db
->dict
,c
->argv
[1],o
); 
5856         incrRefCount(c
->argv
[1]); 
5858         if (o
->type 
!= REDIS_HASH
) { 
5859             addReply(c
,shared
.wrongtypeerr
); 
5863     /* We want to convert the zipmap into an hash table right now if the 
5864      * entry to be added is too big. Note that we check if the object 
5865      * is integer encoded before to try fetching the length in the test below. 
5866      * This is because integers are small, but currently stringObjectLen() 
5867      * performs a slow conversion: not worth it. */ 
5868     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP 
&& 
5869         ((c
->argv
[2]->encoding 
== REDIS_ENCODING_RAW 
&& 
5870           sdslen(c
->argv
[2]->ptr
) > server
.hash_max_zipmap_value
) || 
5871          (c
->argv
[3]->encoding 
== REDIS_ENCODING_RAW 
&& 
5872           sdslen(c
->argv
[3]->ptr
) > server
.hash_max_zipmap_value
))) 
5874         convertToRealHash(o
); 
5877     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
5878         unsigned char *zm 
= o
->ptr
; 
5879         robj 
*valobj 
= getDecodedObject(c
->argv
[3]); 
5881         zm 
= zipmapSet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
), 
5882             valobj
->ptr
,sdslen(valobj
->ptr
),&update
); 
5883         decrRefCount(valobj
); 
5886         /* And here there is the second check for hash conversion... 
5887          * we want to do it only if the operation was not just an update as 
5888          * zipmapLen() is O(N). */ 
5889         if (!update 
&& zipmapLen(zm
) > server
.hash_max_zipmap_entries
) 
5890             convertToRealHash(o
); 
5892         tryObjectEncoding(c
->argv
[2]); 
5893         /* note that c->argv[3] is already encoded, as the latest arg 
5894          * of a bulk command is always integer encoded if possible. */ 
5895         if (dictReplace(o
->ptr
,c
->argv
[2],c
->argv
[3])) { 
5896             incrRefCount(c
->argv
[2]); 
5900         incrRefCount(c
->argv
[3]); 
5903     addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",update 
== 0)); 
5906 static void hgetCommand(redisClient 
*c
) { 
5909     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
5910         checkType(c
,o
,REDIS_HASH
)) return; 
5912     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
5913         unsigned char *zm 
= o
->ptr
; 
5918         field 
= getDecodedObject(c
->argv
[2]); 
5919         if (zipmapGet(zm
,field
->ptr
,sdslen(field
->ptr
), &val
,&vlen
)) { 
5920             addReplySds(c
,sdscatprintf(sdsempty(),"$%u\r\n", vlen
)); 
5921             addReplySds(c
,sdsnewlen(val
,vlen
)); 
5922             addReply(c
,shared
.crlf
); 
5923             decrRefCount(field
); 
5926             addReply(c
,shared
.nullbulk
); 
5927             decrRefCount(field
); 
5931         struct dictEntry 
*de
; 
5933         de 
= dictFind(o
->ptr
,c
->argv
[2]); 
5935             addReply(c
,shared
.nullbulk
); 
5937             robj 
*e 
= dictGetEntryVal(de
); 
5944 static void hdelCommand(redisClient 
*c
) { 
5948     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
5949         checkType(c
,o
,REDIS_HASH
)) return; 
5951     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
5952         robj 
*field 
= getDecodedObject(c
->argv
[2]); 
5954         o
->ptr 
= zipmapDel((unsigned char*) o
->ptr
, 
5955             (unsigned char*) field
->ptr
, 
5956             sdslen(field
->ptr
), &deleted
); 
5957         decrRefCount(field
); 
5959         deleted 
= dictDelete((dict
*)o
->ptr
,c
->argv
[2]) == DICT_OK
; 
5961     addReply(c
,deleted 
? shared
.cone 
: shared
.czero
); 
5964 static void hlenCommand(redisClient 
*c
) { 
5968     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
5969         checkType(c
,o
,REDIS_HASH
)) return; 
5971     len 
= (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) ? 
5972             zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
); 
5973     addReplyUlong(c
,len
); 
5976 #define REDIS_GETALL_KEYS 1 
5977 #define REDIS_GETALL_VALS 2 
5978 static void genericHgetallCommand(redisClient 
*c
, int flags
) { 
5980     unsigned long count 
= 0; 
5982     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL
 
5983         || checkType(c
,o
,REDIS_HASH
)) return; 
5985     lenobj 
= createObject(REDIS_STRING
,NULL
); 
5987     decrRefCount(lenobj
); 
5989     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
5990         unsigned char *p 
= zipmapRewind(o
->ptr
); 
5991         unsigned char *field
, *val
; 
5992         unsigned int flen
, vlen
; 
5994         while((p 
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) { 
5997             if (flags 
& REDIS_GETALL_KEYS
) { 
5998                 aux 
= createStringObject((char*)field
,flen
); 
5999                 addReplyBulk(c
,aux
); 
6003             if (flags 
& REDIS_GETALL_VALS
) { 
6004                 aux 
= createStringObject((char*)val
,vlen
); 
6005                 addReplyBulk(c
,aux
); 
6011         dictIterator 
*di 
= dictGetIterator(o
->ptr
); 
6014         while((de 
= dictNext(di
)) != NULL
) { 
6015             robj 
*fieldobj 
= dictGetEntryKey(de
); 
6016             robj 
*valobj 
= dictGetEntryVal(de
); 
6018             if (flags 
& REDIS_GETALL_KEYS
) { 
6019                 addReplyBulk(c
,fieldobj
); 
6022             if (flags 
& REDIS_GETALL_VALS
) { 
6023                 addReplyBulk(c
,valobj
); 
6027         dictReleaseIterator(di
); 
6029     lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n",count
); 
6032 static void hkeysCommand(redisClient 
*c
) { 
6033     genericHgetallCommand(c
,REDIS_GETALL_KEYS
); 
6036 static void hvalsCommand(redisClient 
*c
) { 
6037     genericHgetallCommand(c
,REDIS_GETALL_VALS
); 
6040 static void hgetallCommand(redisClient 
*c
) { 
6041     genericHgetallCommand(c
,REDIS_GETALL_KEYS
|REDIS_GETALL_VALS
); 
6044 static void hexistsCommand(redisClient 
*c
) { 
6048     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
6049         checkType(c
,o
,REDIS_HASH
)) return; 
6051     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6053         unsigned char *zm 
= o
->ptr
; 
6055         field 
= getDecodedObject(c
->argv
[2]); 
6056         exists 
= zipmapExists(zm
,field
->ptr
,sdslen(field
->ptr
)); 
6057         decrRefCount(field
); 
6059         exists 
= dictFind(o
->ptr
,c
->argv
[2]) != NULL
; 
6061     addReply(c
,exists 
? shared
.cone 
: shared
.czero
); 
6064 static void convertToRealHash(robj 
*o
) { 
6065     unsigned char *key
, *val
, *p
, *zm 
= o
->ptr
; 
6066     unsigned int klen
, vlen
; 
6067     dict 
*dict 
= dictCreate(&hashDictType
,NULL
); 
6069     assert(o
->type 
== REDIS_HASH 
&& o
->encoding 
!= REDIS_ENCODING_HT
); 
6070     p 
= zipmapRewind(zm
); 
6071     while((p 
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) { 
6072         robj 
*keyobj
, *valobj
; 
6074         keyobj 
= createStringObject((char*)key
,klen
); 
6075         valobj 
= createStringObject((char*)val
,vlen
); 
6076         tryObjectEncoding(keyobj
); 
6077         tryObjectEncoding(valobj
); 
6078         dictAdd(dict
,keyobj
,valobj
); 
6080     o
->encoding 
= REDIS_ENCODING_HT
; 
6085 /* ========================= Non type-specific commands  ==================== */ 
6087 static void flushdbCommand(redisClient 
*c
) { 
6088     server
.dirty 
+= dictSize(c
->db
->dict
); 
6089     dictEmpty(c
->db
->dict
); 
6090     dictEmpty(c
->db
->expires
); 
6091     addReply(c
,shared
.ok
); 
6094 static void flushallCommand(redisClient 
*c
) { 
6095     server
.dirty 
+= emptyDb(); 
6096     addReply(c
,shared
.ok
); 
6097     rdbSave(server
.dbfilename
); 
6101 static redisSortOperation 
*createSortOperation(int type
, robj 
*pattern
) { 
6102     redisSortOperation 
*so 
= zmalloc(sizeof(*so
)); 
6104     so
->pattern 
= pattern
; 
6108 /* Return the value associated to the key with a name obtained 
6109  * substituting the first occurence of '*' in 'pattern' with 'subst' */ 
6110 static robj 
*lookupKeyByPattern(redisDb 
*db
, robj 
*pattern
, robj 
*subst
) { 
6114     int prefixlen
, sublen
, postfixlen
; 
6115     /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */ 
6119         char buf
[REDIS_SORTKEY_MAX
+1]; 
6122     /* If the pattern is "#" return the substitution object itself in order 
6123      * to implement the "SORT ... GET #" feature. */ 
6124     spat 
= pattern
->ptr
; 
6125     if (spat
[0] == '#' && spat
[1] == '\0') { 
6129     /* The substitution object may be specially encoded. If so we create 
6130      * a decoded object on the fly. Otherwise getDecodedObject will just 
6131      * increment the ref count, that we'll decrement later. */ 
6132     subst 
= getDecodedObject(subst
); 
6135     if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
; 
6136     p 
= strchr(spat
,'*'); 
6138         decrRefCount(subst
); 
6143     sublen 
= sdslen(ssub
); 
6144     postfixlen 
= sdslen(spat
)-(prefixlen
+1); 
6145     memcpy(keyname
.buf
,spat
,prefixlen
); 
6146     memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
); 
6147     memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
); 
6148     keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0'; 
6149     keyname
.len 
= prefixlen
+sublen
+postfixlen
; 
6151     initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2)) 
6152     decrRefCount(subst
); 
6154     /* printf("lookup '%s' => %p\n", keyname.buf,de); */ 
6155     return lookupKeyRead(db
,&keyobj
); 
6158 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with 
6159  * the additional parameter is not standard but a BSD-specific we have to 
6160  * pass sorting parameters via the global 'server' structure */ 
6161 static int sortCompare(const void *s1
, const void *s2
) { 
6162     const redisSortObject 
*so1 
= s1
, *so2 
= s2
; 
6165     if (!server
.sort_alpha
) { 
6166         /* Numeric sorting. Here it's trivial as we precomputed scores */ 
6167         if (so1
->u
.score 
> so2
->u
.score
) { 
6169         } else if (so1
->u
.score 
< so2
->u
.score
) { 
6175         /* Alphanumeric sorting */ 
6176         if (server
.sort_bypattern
) { 
6177             if (!so1
->u
.cmpobj 
|| !so2
->u
.cmpobj
) { 
6178                 /* At least one compare object is NULL */ 
6179                 if (so1
->u
.cmpobj 
== so2
->u
.cmpobj
) 
6181                 else if (so1
->u
.cmpobj 
== NULL
) 
6186                 /* We have both the objects, use strcoll */ 
6187                 cmp 
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
); 
6190             /* Compare elements directly */ 
6193             dec1 
= getDecodedObject(so1
->obj
); 
6194             dec2 
= getDecodedObject(so2
->obj
); 
6195             cmp 
= strcoll(dec1
->ptr
,dec2
->ptr
); 
6200     return server
.sort_desc 
? -cmp 
: cmp
; 
6203 /* The SORT command is the most complex command in Redis. Warning: this code 
6204  * is optimized for speed and a bit less for readability */ 
6205 static void sortCommand(redisClient 
*c
) { 
6208     int desc 
= 0, alpha 
= 0; 
6209     int limit_start 
= 0, limit_count 
= -1, start
, end
; 
6210     int j
, dontsort 
= 0, vectorlen
; 
6211     int getop 
= 0; /* GET operation counter */ 
6212     robj 
*sortval
, *sortby 
= NULL
, *storekey 
= NULL
; 
6213     redisSortObject 
*vector
; /* Resulting vector to sort */ 
6215     /* Lookup the key to sort. It must be of the right types */ 
6216     sortval 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
6217     if (sortval 
== NULL
) { 
6218         addReply(c
,shared
.nullmultibulk
); 
6221     if (sortval
->type 
!= REDIS_SET 
&& sortval
->type 
!= REDIS_LIST 
&& 
6222         sortval
->type 
!= REDIS_ZSET
) 
6224         addReply(c
,shared
.wrongtypeerr
); 
6228     /* Create a list of operations to perform for every sorted element. 
6229      * Operations can be GET/DEL/INCR/DECR */ 
6230     operations 
= listCreate(); 
6231     listSetFreeMethod(operations
,zfree
); 
6234     /* Now we need to protect sortval incrementing its count, in the future 
6235      * SORT may have options able to overwrite/delete keys during the sorting 
6236      * and the sorted key itself may get destroied */ 
6237     incrRefCount(sortval
); 
6239     /* The SORT command has an SQL-alike syntax, parse it */ 
6240     while(j 
< c
->argc
) { 
6241         int leftargs 
= c
->argc
-j
-1; 
6242         if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) { 
6244         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) { 
6246         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) { 
6248         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs 
>= 2) { 
6249             limit_start 
= atoi(c
->argv
[j
+1]->ptr
); 
6250             limit_count 
= atoi(c
->argv
[j
+2]->ptr
); 
6252         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs 
>= 1) { 
6253             storekey 
= c
->argv
[j
+1]; 
6255         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs 
>= 1) { 
6256             sortby 
= c
->argv
[j
+1]; 
6257             /* If the BY pattern does not contain '*', i.e. it is constant, 
6258              * we don't need to sort nor to lookup the weight keys. */ 
6259             if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort 
= 1; 
6261         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs 
>= 1) { 
6262             listAddNodeTail(operations
,createSortOperation( 
6263                 REDIS_SORT_GET
,c
->argv
[j
+1])); 
6267             decrRefCount(sortval
); 
6268             listRelease(operations
); 
6269             addReply(c
,shared
.syntaxerr
); 
6275     /* Load the sorting vector with all the objects to sort */ 
6276     switch(sortval
->type
) { 
6277     case REDIS_LIST
: vectorlen 
= listLength((list
*)sortval
->ptr
); break; 
6278     case REDIS_SET
: vectorlen 
=  dictSize((dict
*)sortval
->ptr
); break; 
6279     case REDIS_ZSET
: vectorlen 
= dictSize(((zset
*)sortval
->ptr
)->dict
); break; 
6280     default: vectorlen 
= 0; redisAssert(0); /* Avoid GCC warning */ 
6282     vector 
= zmalloc(sizeof(redisSortObject
)*vectorlen
); 
6285     if (sortval
->type 
== REDIS_LIST
) { 
6286         list 
*list 
= sortval
->ptr
; 
6290         listRewind(list
,&li
); 
6291         while((ln 
= listNext(&li
))) { 
6292             robj 
*ele 
= ln
->value
; 
6293             vector
[j
].obj 
= ele
; 
6294             vector
[j
].u
.score 
= 0; 
6295             vector
[j
].u
.cmpobj 
= NULL
; 
6303         if (sortval
->type 
== REDIS_SET
) { 
6306             zset 
*zs 
= sortval
->ptr
; 
6310         di 
= dictGetIterator(set
); 
6311         while((setele 
= dictNext(di
)) != NULL
) { 
6312             vector
[j
].obj 
= dictGetEntryKey(setele
); 
6313             vector
[j
].u
.score 
= 0; 
6314             vector
[j
].u
.cmpobj 
= NULL
; 
6317         dictReleaseIterator(di
); 
6319     redisAssert(j 
== vectorlen
); 
6321     /* Now it's time to load the right scores in the sorting vector */ 
6322     if (dontsort 
== 0) { 
6323         for (j 
= 0; j 
< vectorlen
; j
++) { 
6327                 byval 
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
); 
6328                 if (!byval 
|| byval
->type 
!= REDIS_STRING
) continue; 
6330                     vector
[j
].u
.cmpobj 
= getDecodedObject(byval
); 
6332                     if (byval
->encoding 
== REDIS_ENCODING_RAW
) { 
6333                         vector
[j
].u
.score 
= strtod(byval
->ptr
,NULL
); 
6335                         /* Don't need to decode the object if it's 
6336                          * integer-encoded (the only encoding supported) so 
6337                          * far. We can just cast it */ 
6338                         if (byval
->encoding 
== REDIS_ENCODING_INT
) { 
6339                             vector
[j
].u
.score 
= (long)byval
->ptr
; 
6341                             redisAssert(1 != 1); 
6346                     if (vector
[j
].obj
->encoding 
== REDIS_ENCODING_RAW
) 
6347                         vector
[j
].u
.score 
= strtod(vector
[j
].obj
->ptr
,NULL
); 
6349                         if (vector
[j
].obj
->encoding 
== REDIS_ENCODING_INT
) 
6350                             vector
[j
].u
.score 
= (long) vector
[j
].obj
->ptr
; 
6352                             redisAssert(1 != 1); 
6359     /* We are ready to sort the vector... perform a bit of sanity check 
6360      * on the LIMIT option too. We'll use a partial version of quicksort. */ 
6361     start 
= (limit_start 
< 0) ? 0 : limit_start
; 
6362     end 
= (limit_count 
< 0) ? vectorlen
-1 : start
+limit_count
-1; 
6363     if (start 
>= vectorlen
) { 
6364         start 
= vectorlen
-1; 
6367     if (end 
>= vectorlen
) end 
= vectorlen
-1; 
6369     if (dontsort 
== 0) { 
6370         server
.sort_desc 
= desc
; 
6371         server
.sort_alpha 
= alpha
; 
6372         server
.sort_bypattern 
= sortby 
? 1 : 0; 
6373         if (sortby 
&& (start 
!= 0 || end 
!= vectorlen
-1)) 
6374             pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
); 
6376             qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
); 
6379     /* Send command output to the output buffer, performing the specified 
6380      * GET/DEL/INCR/DECR operations if any. */ 
6381     outputlen 
= getop 
? getop
*(end
-start
+1) : end
-start
+1; 
6382     if (storekey 
== NULL
) { 
6383         /* STORE option not specified, sent the sorting result to client */ 
6384         addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
)); 
6385         for (j 
= start
; j 
<= end
; j
++) { 
6389             if (!getop
) addReplyBulk(c
,vector
[j
].obj
); 
6390             listRewind(operations
,&li
); 
6391             while((ln 
= listNext(&li
))) { 
6392                 redisSortOperation 
*sop 
= ln
->value
; 
6393                 robj 
*val 
= lookupKeyByPattern(c
->db
,sop
->pattern
, 
6396                 if (sop
->type 
== REDIS_SORT_GET
) { 
6397                     if (!val 
|| val
->type 
!= REDIS_STRING
) { 
6398                         addReply(c
,shared
.nullbulk
); 
6400                         addReplyBulk(c
,val
); 
6403                     redisAssert(sop
->type 
== REDIS_SORT_GET
); /* always fails */ 
6408         robj 
*listObject 
= createListObject(); 
6409         list 
*listPtr 
= (list
*) listObject
->ptr
; 
6411         /* STORE option specified, set the sorting result as a List object */ 
6412         for (j 
= start
; j 
<= end
; j
++) { 
6417                 listAddNodeTail(listPtr
,vector
[j
].obj
); 
6418                 incrRefCount(vector
[j
].obj
); 
6420             listRewind(operations
,&li
); 
6421             while((ln 
= listNext(&li
))) { 
6422                 redisSortOperation 
*sop 
= ln
->value
; 
6423                 robj 
*val 
= lookupKeyByPattern(c
->db
,sop
->pattern
, 
6426                 if (sop
->type 
== REDIS_SORT_GET
) { 
6427                     if (!val 
|| val
->type 
!= REDIS_STRING
) { 
6428                         listAddNodeTail(listPtr
,createStringObject("",0)); 
6430                         listAddNodeTail(listPtr
,val
); 
6434                     redisAssert(sop
->type 
== REDIS_SORT_GET
); /* always fails */ 
6438         if (dictReplace(c
->db
->dict
,storekey
,listObject
)) { 
6439             incrRefCount(storekey
); 
6441         /* Note: we add 1 because the DB is dirty anyway since even if the 
6442          * SORT result is empty a new key is set and maybe the old content 
6444         server
.dirty 
+= 1+outputlen
; 
6445         addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
)); 
6449     decrRefCount(sortval
); 
6450     listRelease(operations
); 
6451     for (j 
= 0; j 
< vectorlen
; j
++) { 
6452         if (sortby 
&& alpha 
&& vector
[j
].u
.cmpobj
) 
6453             decrRefCount(vector
[j
].u
.cmpobj
); 
6458 /* Convert an amount of bytes into a human readable string in the form 
6459  * of 100B, 2G, 100M, 4K, and so forth. */ 
6460 static void bytesToHuman(char *s
, unsigned long long n
) { 
6465         sprintf(s
,"%lluB",n
); 
6467     } else if (n 
< (1024*1024)) { 
6468         d 
= (double)n
/(1024); 
6469         sprintf(s
,"%.2fK",d
); 
6470     } else if (n 
< (1024LL*1024*1024)) { 
6471         d 
= (double)n
/(1024*1024); 
6472         sprintf(s
,"%.2fM",d
); 
6473     } else if (n 
< (1024LL*1024*1024*1024)) { 
6474         d 
= (double)n
/(1024LL*1024*1024); 
6475         sprintf(s
,"%.2fG",d
); 
6479 /* Create the string returned by the INFO command. This is decoupled 
6480  * by the INFO command itself as we need to report the same information 
6481  * on memory corruption problems. */ 
6482 static sds 
genRedisInfoString(void) { 
6484     time_t uptime 
= time(NULL
)-server
.stat_starttime
; 
6488     server
.hash_max_zipmap_entries 
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
; 
6489     server
.hash_max_zipmap_value 
= REDIS_HASH_MAX_ZIPMAP_VALUE
; 
6491     bytesToHuman(hmem
,zmalloc_used_memory()); 
6492     info 
= sdscatprintf(sdsempty(), 
6493         "redis_version:%s\r\n" 
6495         "multiplexing_api:%s\r\n" 
6496         "process_id:%ld\r\n" 
6497         "uptime_in_seconds:%ld\r\n" 
6498         "uptime_in_days:%ld\r\n" 
6499         "connected_clients:%d\r\n" 
6500         "connected_slaves:%d\r\n" 
6501         "blocked_clients:%d\r\n" 
6502         "used_memory:%zu\r\n" 
6503         "used_memory_human:%s\r\n" 
6504         "changes_since_last_save:%lld\r\n" 
6505         "bgsave_in_progress:%d\r\n" 
6506         "last_save_time:%ld\r\n" 
6507         "bgrewriteaof_in_progress:%d\r\n" 
6508         "total_connections_received:%lld\r\n" 
6509         "total_commands_processed:%lld\r\n" 
6510         "hash_max_zipmap_entries:%ld\r\n" 
6511         "hash_max_zipmap_value:%ld\r\n" 
6515         (sizeof(long) == 8) ? "64" : "32", 
6520         listLength(server
.clients
)-listLength(server
.slaves
), 
6521         listLength(server
.slaves
), 
6522         server
.blpop_blocked_clients
, 
6523         zmalloc_used_memory(), 
6526         server
.bgsavechildpid 
!= -1, 
6528         server
.bgrewritechildpid 
!= -1, 
6529         server
.stat_numconnections
, 
6530         server
.stat_numcommands
, 
6531         server
.hash_max_zipmap_entries
, 
6532         server
.hash_max_zipmap_value
, 
6533         server
.vm_enabled 
!= 0, 
6534         server
.masterhost 
== NULL 
? "master" : "slave" 
6536     if (server
.masterhost
) { 
6537         info 
= sdscatprintf(info
, 
6538             "master_host:%s\r\n" 
6539             "master_port:%d\r\n" 
6540             "master_link_status:%s\r\n" 
6541             "master_last_io_seconds_ago:%d\r\n" 
6544             (server
.replstate 
== REDIS_REPL_CONNECTED
) ? 
6546             server
.master 
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1 
6549     if (server
.vm_enabled
) { 
6551         info 
= sdscatprintf(info
, 
6552             "vm_conf_max_memory:%llu\r\n" 
6553             "vm_conf_page_size:%llu\r\n" 
6554             "vm_conf_pages:%llu\r\n" 
6555             "vm_stats_used_pages:%llu\r\n" 
6556             "vm_stats_swapped_objects:%llu\r\n" 
6557             "vm_stats_swappin_count:%llu\r\n" 
6558             "vm_stats_swappout_count:%llu\r\n" 
6559             "vm_stats_io_newjobs_len:%lu\r\n" 
6560             "vm_stats_io_processing_len:%lu\r\n" 
6561             "vm_stats_io_processed_len:%lu\r\n" 
6562             "vm_stats_io_active_threads:%lu\r\n" 
6563             "vm_stats_blocked_clients:%lu\r\n" 
6564             ,(unsigned long long) server
.vm_max_memory
, 
6565             (unsigned long long) server
.vm_page_size
, 
6566             (unsigned long long) server
.vm_pages
, 
6567             (unsigned long long) server
.vm_stats_used_pages
, 
6568             (unsigned long long) server
.vm_stats_swapped_objects
, 
6569             (unsigned long long) server
.vm_stats_swapins
, 
6570             (unsigned long long) server
.vm_stats_swapouts
, 
6571             (unsigned long) listLength(server
.io_newjobs
), 
6572             (unsigned long) listLength(server
.io_processing
), 
6573             (unsigned long) listLength(server
.io_processed
), 
6574             (unsigned long) server
.io_active_threads
, 
6575             (unsigned long) server
.vm_blocked_clients
 
6579     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
6580         long long keys
, vkeys
; 
6582         keys 
= dictSize(server
.db
[j
].dict
); 
6583         vkeys 
= dictSize(server
.db
[j
].expires
); 
6584         if (keys 
|| vkeys
) { 
6585             info 
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n", 
6592 static void infoCommand(redisClient 
*c
) { 
6593     sds info 
= genRedisInfoString(); 
6594     addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n", 
6595         (unsigned long)sdslen(info
))); 
6596     addReplySds(c
,info
); 
6597     addReply(c
,shared
.crlf
); 
6600 static void monitorCommand(redisClient 
*c
) { 
6601     /* ignore MONITOR if aleady slave or in monitor mode */ 
6602     if (c
->flags 
& REDIS_SLAVE
) return; 
6604     c
->flags 
|= (REDIS_SLAVE
|REDIS_MONITOR
); 
6606     listAddNodeTail(server
.monitors
,c
); 
6607     addReply(c
,shared
.ok
); 
6610 /* ================================= Expire ================================= */ 
6611 static int removeExpire(redisDb 
*db
, robj 
*key
) { 
6612     if (dictDelete(db
->expires
,key
) == DICT_OK
) { 
6619 static int setExpire(redisDb 
*db
, robj 
*key
, time_t when
) { 
6620     if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) { 
6628 /* Return the expire time of the specified key, or -1 if no expire 
6629  * is associated with this key (i.e. the key is non volatile) */ 
6630 static time_t getExpire(redisDb 
*db
, robj 
*key
) { 
6633     /* No expire? return ASAP */ 
6634     if (dictSize(db
->expires
) == 0 || 
6635        (de 
= dictFind(db
->expires
,key
)) == NULL
) return -1; 
6637     return (time_t) dictGetEntryVal(de
); 
6640 static int expireIfNeeded(redisDb 
*db
, robj 
*key
) { 
6644     /* No expire? return ASAP */ 
6645     if (dictSize(db
->expires
) == 0 || 
6646        (de 
= dictFind(db
->expires
,key
)) == NULL
) return 0; 
6648     /* Lookup the expire */ 
6649     when 
= (time_t) dictGetEntryVal(de
); 
6650     if (time(NULL
) <= when
) return 0; 
6652     /* Delete the key */ 
6653     dictDelete(db
->expires
,key
); 
6654     return dictDelete(db
->dict
,key
) == DICT_OK
; 
6657 static int deleteIfVolatile(redisDb 
*db
, robj 
*key
) { 
6660     /* No expire? return ASAP */ 
6661     if (dictSize(db
->expires
) == 0 || 
6662        (de 
= dictFind(db
->expires
,key
)) == NULL
) return 0; 
6664     /* Delete the key */ 
6666     dictDelete(db
->expires
,key
); 
6667     return dictDelete(db
->dict
,key
) == DICT_OK
; 
6670 static void expireGenericCommand(redisClient 
*c
, robj 
*key
, time_t seconds
) { 
6673     de 
= dictFind(c
->db
->dict
,key
); 
6675         addReply(c
,shared
.czero
); 
6679         if (deleteKey(c
->db
,key
)) server
.dirty
++; 
6680         addReply(c
, shared
.cone
); 
6683         time_t when 
= time(NULL
)+seconds
; 
6684         if (setExpire(c
->db
,key
,when
)) { 
6685             addReply(c
,shared
.cone
); 
6688             addReply(c
,shared
.czero
); 
6694 static void expireCommand(redisClient 
*c
) { 
6695     expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10)); 
6698 static void expireatCommand(redisClient 
*c
) { 
6699     expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10)-time(NULL
)); 
6702 static void ttlCommand(redisClient 
*c
) { 
6706     expire 
= getExpire(c
->db
,c
->argv
[1]); 
6708         ttl 
= (int) (expire
-time(NULL
)); 
6709         if (ttl 
< 0) ttl 
= -1; 
6711     addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
)); 
6714 /* ================================ MULTI/EXEC ============================== */ 
6716 /* Client state initialization for MULTI/EXEC */ 
6717 static void initClientMultiState(redisClient 
*c
) { 
6718     c
->mstate
.commands 
= NULL
; 
6719     c
->mstate
.count 
= 0; 
6722 /* Release all the resources associated with MULTI/EXEC state */ 
6723 static void freeClientMultiState(redisClient 
*c
) { 
6726     for (j 
= 0; j 
< c
->mstate
.count
; j
++) { 
6728         multiCmd 
*mc 
= c
->mstate
.commands
+j
; 
6730         for (i 
= 0; i 
< mc
->argc
; i
++) 
6731             decrRefCount(mc
->argv
[i
]); 
6734     zfree(c
->mstate
.commands
); 
6737 /* Add a new command into the MULTI commands queue */ 
6738 static void queueMultiCommand(redisClient 
*c
, struct redisCommand 
*cmd
) { 
6742     c
->mstate
.commands 
= zrealloc(c
->mstate
.commands
, 
6743             sizeof(multiCmd
)*(c
->mstate
.count
+1)); 
6744     mc 
= c
->mstate
.commands
+c
->mstate
.count
; 
6747     mc
->argv 
= zmalloc(sizeof(robj
*)*c
->argc
); 
6748     memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
); 
6749     for (j 
= 0; j 
< c
->argc
; j
++) 
6750         incrRefCount(mc
->argv
[j
]); 
6754 static void multiCommand(redisClient 
*c
) { 
6755     c
->flags 
|= REDIS_MULTI
; 
6756     addReply(c
,shared
.ok
); 
6759 static void discardCommand(redisClient 
*c
) { 
6760     if (!(c
->flags 
& REDIS_MULTI
)) { 
6761         addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n")); 
6765     freeClientMultiState(c
); 
6766     initClientMultiState(c
); 
6767     c
->flags 
&= (~REDIS_MULTI
); 
6768     addReply(c
,shared
.ok
); 
6771 static void execCommand(redisClient 
*c
) { 
6776     if (!(c
->flags 
& REDIS_MULTI
)) { 
6777         addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n")); 
6781     orig_argv 
= c
->argv
; 
6782     orig_argc 
= c
->argc
; 
6783     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
)); 
6784     for (j 
= 0; j 
< c
->mstate
.count
; j
++) { 
6785         c
->argc 
= c
->mstate
.commands
[j
].argc
; 
6786         c
->argv 
= c
->mstate
.commands
[j
].argv
; 
6787         call(c
,c
->mstate
.commands
[j
].cmd
); 
6789     c
->argv 
= orig_argv
; 
6790     c
->argc 
= orig_argc
; 
6791     freeClientMultiState(c
); 
6792     initClientMultiState(c
); 
6793     c
->flags 
&= (~REDIS_MULTI
); 
6796 /* =========================== Blocking Operations  ========================= */ 
6798 /* Currently Redis blocking operations support is limited to list POP ops, 
6799  * so the current implementation is not fully generic, but it is also not 
6800  * completely specific so it will not require a rewrite to support new 
6801  * kind of blocking operations in the future. 
6803  * Still it's important to note that list blocking operations can be already 
6804  * used as a notification mechanism in order to implement other blocking 
6805  * operations at application level, so there must be a very strong evidence 
6806  * of usefulness and generality before new blocking operations are implemented. 
6808  * This is how the current blocking POP works, we use BLPOP as example: 
6809  * - If the user calls BLPOP and the key exists and contains a non empty list 
6810  *   then LPOP is called instead. So BLPOP is semantically the same as LPOP 
6811  *   if there is not to block. 
6812  * - If instead BLPOP is called and the key does not exists or the list is 
6813  *   empty we need to block. In order to do so we remove the notification for 
6814  *   new data to read in the client socket (so that we'll not serve new 
6815  *   requests if the blocking request is not served). Also we put the client 
6816  *   in a dictionary (db->blockingkeys) mapping keys to a list of clients 
6817  *   blocking for this keys. 
6818  * - If a PUSH operation against a key with blocked clients waiting is 
6819  *   performed, we serve the first in the list: basically instead to push 
6820  *   the new element inside the list we return it to the (first / oldest) 
6821  *   blocking client, unblock the client, and remove it form the list. 
6823  * The above comment and the source code should be enough in order to understand 
6824  * the implementation and modify / fix it later. 
6827 /* Set a client in blocking mode for the specified key, with the specified 
6829 static void blockForKeys(redisClient 
*c
, robj 
**keys
, int numkeys
, time_t timeout
) { 
6834     c
->blockingkeys 
= zmalloc(sizeof(robj
*)*numkeys
); 
6835     c
->blockingkeysnum 
= numkeys
; 
6836     c
->blockingto 
= timeout
; 
6837     for (j 
= 0; j 
< numkeys
; j
++) { 
6838         /* Add the key in the client structure, to map clients -> keys */ 
6839         c
->blockingkeys
[j
] = keys
[j
]; 
6840         incrRefCount(keys
[j
]); 
6842         /* And in the other "side", to map keys -> clients */ 
6843         de 
= dictFind(c
->db
->blockingkeys
,keys
[j
]); 
6847             /* For every key we take a list of clients blocked for it */ 
6849             retval 
= dictAdd(c
->db
->blockingkeys
,keys
[j
],l
); 
6850             incrRefCount(keys
[j
]); 
6851             assert(retval 
== DICT_OK
); 
6853             l 
= dictGetEntryVal(de
); 
6855         listAddNodeTail(l
,c
); 
6857     /* Mark the client as a blocked client */ 
6858     c
->flags 
|= REDIS_BLOCKED
; 
6859     server
.blpop_blocked_clients
++; 
6862 /* Unblock a client that's waiting in a blocking operation such as BLPOP */ 
6863 static void unblockClientWaitingData(redisClient 
*c
) { 
6868     assert(c
->blockingkeys 
!= NULL
); 
6869     /* The client may wait for multiple keys, so unblock it for every key. */ 
6870     for (j 
= 0; j 
< c
->blockingkeysnum
; j
++) { 
6871         /* Remove this client from the list of clients waiting for this key. */ 
6872         de 
= dictFind(c
->db
->blockingkeys
,c
->blockingkeys
[j
]); 
6874         l 
= dictGetEntryVal(de
); 
6875         listDelNode(l
,listSearchKey(l
,c
)); 
6876         /* If the list is empty we need to remove it to avoid wasting memory */ 
6877         if (listLength(l
) == 0) 
6878             dictDelete(c
->db
->blockingkeys
,c
->blockingkeys
[j
]); 
6879         decrRefCount(c
->blockingkeys
[j
]); 
6881     /* Cleanup the client structure */ 
6882     zfree(c
->blockingkeys
); 
6883     c
->blockingkeys 
= NULL
; 
6884     c
->flags 
&= (~REDIS_BLOCKED
); 
6885     server
.blpop_blocked_clients
--; 
6886     /* We want to process data if there is some command waiting 
6887      * in the input buffer. Note that this is safe even if 
6888      * unblockClientWaitingData() gets called from freeClient() because 
6889      * freeClient() will be smart enough to call this function 
6890      * *after* c->querybuf was set to NULL. */ 
6891     if (c
->querybuf 
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
); 
6894 /* This should be called from any function PUSHing into lists. 
6895  * 'c' is the "pushing client", 'key' is the key it is pushing data against, 
6896  * 'ele' is the element pushed. 
6898  * If the function returns 0 there was no client waiting for a list push 
6901  * If the function returns 1 there was a client waiting for a list push 
6902  * against this key, the element was passed to this client thus it's not 
6903  * needed to actually add it to the list and the caller should return asap. */ 
6904 static int handleClientsWaitingListPush(redisClient 
*c
, robj 
*key
, robj 
*ele
) { 
6905     struct dictEntry 
*de
; 
6906     redisClient 
*receiver
; 
6910     de 
= dictFind(c
->db
->blockingkeys
,key
); 
6911     if (de 
== NULL
) return 0; 
6912     l 
= dictGetEntryVal(de
); 
6915     receiver 
= ln
->value
; 
6917     addReplySds(receiver
,sdsnew("*2\r\n")); 
6918     addReplyBulk(receiver
,key
); 
6919     addReplyBulk(receiver
,ele
); 
6920     unblockClientWaitingData(receiver
); 
6924 /* Blocking RPOP/LPOP */ 
6925 static void blockingPopGenericCommand(redisClient 
*c
, int where
) { 
6930     for (j 
= 1; j 
< c
->argc
-1; j
++) { 
6931         o 
= lookupKeyWrite(c
->db
,c
->argv
[j
]); 
6933             if (o
->type 
!= REDIS_LIST
) { 
6934                 addReply(c
,shared
.wrongtypeerr
); 
6937                 list 
*list 
= o
->ptr
; 
6938                 if (listLength(list
) != 0) { 
6939                     /* If the list contains elements fall back to the usual 
6940                      * non-blocking POP operation */ 
6941                     robj 
*argv
[2], **orig_argv
; 
6944                     /* We need to alter the command arguments before to call 
6945                      * popGenericCommand() as the command takes a single key. */ 
6946                     orig_argv 
= c
->argv
; 
6947                     orig_argc 
= c
->argc
; 
6948                     argv
[1] = c
->argv
[j
]; 
6952                     /* Also the return value is different, we need to output 
6953                      * the multi bulk reply header and the key name. The 
6954                      * "real" command will add the last element (the value) 
6955                      * for us. If this souds like an hack to you it's just 
6956                      * because it is... */ 
6957                     addReplySds(c
,sdsnew("*2\r\n")); 
6958                     addReplyBulk(c
,argv
[1]); 
6959                     popGenericCommand(c
,where
); 
6961                     /* Fix the client structure with the original stuff */ 
6962                     c
->argv 
= orig_argv
; 
6963                     c
->argc 
= orig_argc
; 
6969     /* If the list is empty or the key does not exists we must block */ 
6970     timeout 
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10); 
6971     if (timeout 
> 0) timeout 
+= time(NULL
); 
6972     blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
); 
6975 static void blpopCommand(redisClient 
*c
) { 
6976     blockingPopGenericCommand(c
,REDIS_HEAD
); 
6979 static void brpopCommand(redisClient 
*c
) { 
6980     blockingPopGenericCommand(c
,REDIS_TAIL
); 
6983 /* =============================== Replication  ============================= */ 
6985 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) { 
6986     ssize_t nwritten
, ret 
= size
; 
6987     time_t start 
= time(NULL
); 
6991         if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) { 
6992             nwritten 
= write(fd
,ptr
,size
); 
6993             if (nwritten 
== -1) return -1; 
6997         if ((time(NULL
)-start
) > timeout
) { 
7005 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) { 
7006     ssize_t nread
, totread 
= 0; 
7007     time_t start 
= time(NULL
); 
7011         if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) { 
7012             nread 
= read(fd
,ptr
,size
); 
7013             if (nread 
== -1) return -1; 
7018         if ((time(NULL
)-start
) > timeout
) { 
7026 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) { 
7033         if (syncRead(fd
,&c
,1,timeout
) == -1) return -1; 
7036             if (nread 
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0'; 
7047 static void syncCommand(redisClient 
*c
) { 
7048     /* ignore SYNC if aleady slave or in monitor mode */ 
7049     if (c
->flags 
& REDIS_SLAVE
) return; 
7051     /* SYNC can't be issued when the server has pending data to send to 
7052      * the client about already issued commands. We need a fresh reply 
7053      * buffer registering the differences between the BGSAVE and the current 
7054      * dataset, so that we can copy to other slaves if needed. */ 
7055     if (listLength(c
->reply
) != 0) { 
7056         addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n")); 
7060     redisLog(REDIS_NOTICE
,"Slave ask for synchronization"); 
7061     /* Here we need to check if there is a background saving operation 
7062      * in progress, or if it is required to start one */ 
7063     if (server
.bgsavechildpid 
!= -1) { 
7064         /* Ok a background save is in progress. Let's check if it is a good 
7065          * one for replication, i.e. if there is another slave that is 
7066          * registering differences since the server forked to save */ 
7071         listRewind(server
.slaves
,&li
); 
7072         while((ln 
= listNext(&li
))) { 
7074             if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_END
) break; 
7077             /* Perfect, the server is already registering differences for 
7078              * another slave. Set the right state, and copy the buffer. */ 
7079             listRelease(c
->reply
); 
7080             c
->reply 
= listDup(slave
->reply
); 
7081             c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_END
; 
7082             redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC"); 
7084             /* No way, we need to wait for the next BGSAVE in order to 
7085              * register differences */ 
7086             c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_START
; 
7087             redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC"); 
7090         /* Ok we don't have a BGSAVE in progress, let's start one */ 
7091         redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC"); 
7092         if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) { 
7093             redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE"); 
7094             addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n")); 
7097         c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_END
; 
7100     c
->flags 
|= REDIS_SLAVE
; 
7102     listAddNodeTail(server
.slaves
,c
); 
7106 static void sendBulkToSlave(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
7107     redisClient 
*slave 
= privdata
; 
7109     REDIS_NOTUSED(mask
); 
7110     char buf
[REDIS_IOBUF_LEN
]; 
7111     ssize_t nwritten
, buflen
; 
7113     if (slave
->repldboff 
== 0) { 
7114         /* Write the bulk write count before to transfer the DB. In theory here 
7115          * we don't know how much room there is in the output buffer of the 
7116          * socket, but in pratice SO_SNDLOWAT (the minimum count for output 
7117          * operations) will never be smaller than the few bytes we need. */ 
7120         bulkcount 
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long) 
7122         if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
)) 
7130     lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
); 
7131     buflen 
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
); 
7133         redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s", 
7134             (buflen 
== 0) ? "premature EOF" : strerror(errno
)); 
7138     if ((nwritten 
= write(fd
,buf
,buflen
)) == -1) { 
7139         redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s", 
7144     slave
->repldboff 
+= nwritten
; 
7145     if (slave
->repldboff 
== slave
->repldbsize
) { 
7146         close(slave
->repldbfd
); 
7147         slave
->repldbfd 
= -1; 
7148         aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
); 
7149         slave
->replstate 
= REDIS_REPL_ONLINE
; 
7150         if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, 
7151             sendReplyToClient
, slave
) == AE_ERR
) { 
7155         addReplySds(slave
,sdsempty()); 
7156         redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded"); 
7160 /* This function is called at the end of every backgrond saving. 
7161  * The argument bgsaveerr is REDIS_OK if the background saving succeeded 
7162  * otherwise REDIS_ERR is passed to the function. 
7164  * The goal of this function is to handle slaves waiting for a successful 
7165  * background saving in order to perform non-blocking synchronization. */ 
7166 static void updateSlavesWaitingBgsave(int bgsaveerr
) { 
7168     int startbgsave 
= 0; 
7171     listRewind(server
.slaves
,&li
); 
7172     while((ln 
= listNext(&li
))) { 
7173         redisClient 
*slave 
= ln
->value
; 
7175         if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_START
) { 
7177             slave
->replstate 
= REDIS_REPL_WAIT_BGSAVE_END
; 
7178         } else if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_END
) { 
7179             struct redis_stat buf
; 
7181             if (bgsaveerr 
!= REDIS_OK
) { 
7183                 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error"); 
7186             if ((slave
->repldbfd 
= open(server
.dbfilename
,O_RDONLY
)) == -1 || 
7187                 redis_fstat(slave
->repldbfd
,&buf
) == -1) { 
7189                 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
)); 
7192             slave
->repldboff 
= 0; 
7193             slave
->repldbsize 
= buf
.st_size
; 
7194             slave
->replstate 
= REDIS_REPL_SEND_BULK
; 
7195             aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
); 
7196             if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) { 
7203         if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) { 
7206             listRewind(server
.slaves
,&li
); 
7207             redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed"); 
7208             while((ln 
= listNext(&li
))) { 
7209                 redisClient 
*slave 
= ln
->value
; 
7211                 if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_START
) 
7218 static int syncWithMaster(void) { 
7219     char buf
[1024], tmpfile
[256], authcmd
[1024]; 
7221     int fd 
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
); 
7222     int dfd
, maxtries 
= 5; 
7225         redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s", 
7230     /* AUTH with the master if required. */ 
7231     if(server
.masterauth
) { 
7232         snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
); 
7233         if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) { 
7235             redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s", 
7239         /* Read the AUTH result.  */ 
7240         if (syncReadLine(fd
,buf
,1024,3600) == -1) { 
7242             redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s", 
7246         if (buf
[0] != '+') { 
7248             redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?"); 
7253     /* Issue the SYNC command */ 
7254     if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) { 
7256         redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s", 
7260     /* Read the bulk write count */ 
7261     if (syncReadLine(fd
,buf
,1024,3600) == -1) { 
7263         redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s", 
7267     if (buf
[0] != '$') { 
7269         redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?"); 
7272     dumpsize 
= strtol(buf
+1,NULL
,10); 
7273     redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
); 
7274     /* Read the bulk write data on a temp file */ 
7276         snprintf(tmpfile
,256, 
7277             "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid()); 
7278         dfd 
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644); 
7279         if (dfd 
!= -1) break; 
7284         redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
)); 
7288         int nread
, nwritten
; 
7290         nread 
= read(fd
,buf
,(dumpsize 
< 1024)?dumpsize
:1024); 
7292             redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s", 
7298         nwritten 
= write(dfd
,buf
,nread
); 
7299         if (nwritten 
== -1) { 
7300             redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
)); 
7308     if (rename(tmpfile
,server
.dbfilename
) == -1) { 
7309         redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
)); 
7315     if (rdbLoad(server
.dbfilename
) != REDIS_OK
) { 
7316         redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk"); 
7320     server
.master 
= createClient(fd
); 
7321     server
.master
->flags 
|= REDIS_MASTER
; 
7322     server
.master
->authenticated 
= 1; 
7323     server
.replstate 
= REDIS_REPL_CONNECTED
; 
7327 static void slaveofCommand(redisClient 
*c
) { 
7328     if (!strcasecmp(c
->argv
[1]->ptr
,"no") && 
7329         !strcasecmp(c
->argv
[2]->ptr
,"one")) { 
7330         if (server
.masterhost
) { 
7331             sdsfree(server
.masterhost
); 
7332             server
.masterhost 
= NULL
; 
7333             if (server
.master
) freeClient(server
.master
); 
7334             server
.replstate 
= REDIS_REPL_NONE
; 
7335             redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)"); 
7338         sdsfree(server
.masterhost
); 
7339         server
.masterhost 
= sdsdup(c
->argv
[1]->ptr
); 
7340         server
.masterport 
= atoi(c
->argv
[2]->ptr
); 
7341         if (server
.master
) freeClient(server
.master
); 
7342         server
.replstate 
= REDIS_REPL_CONNECT
; 
7343         redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)", 
7344             server
.masterhost
, server
.masterport
); 
7346     addReply(c
,shared
.ok
); 
7349 /* ============================ Maxmemory directive  ======================== */ 
7351 /* Try to free one object form the pre-allocated objects free list. 
7352  * This is useful under low mem conditions as by default we take 1 million 
7353  * free objects allocated. On success REDIS_OK is returned, otherwise 
7355 static int tryFreeOneObjectFromFreelist(void) { 
7358     if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
); 
7359     if (listLength(server
.objfreelist
)) { 
7360         listNode 
*head 
= listFirst(server
.objfreelist
); 
7361         o 
= listNodeValue(head
); 
7362         listDelNode(server
.objfreelist
,head
); 
7363         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
7367         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
7372 /* This function gets called when 'maxmemory' is set on the config file to limit 
7373  * the max memory used by the server, and we are out of memory. 
7374  * This function will try to, in order: 
7376  * - Free objects from the free list 
7377  * - Try to remove keys with an EXPIRE set 
7379  * It is not possible to free enough memory to reach used-memory < maxmemory 
7380  * the server will start refusing commands that will enlarge even more the 
7383 static void freeMemoryIfNeeded(void) { 
7384     while (server
.maxmemory 
&& zmalloc_used_memory() > server
.maxmemory
) { 
7385         int j
, k
, freed 
= 0; 
7387         if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue; 
7388         for (j 
= 0; j 
< server
.dbnum
; j
++) { 
7390             robj 
*minkey 
= NULL
; 
7391             struct dictEntry 
*de
; 
7393             if (dictSize(server
.db
[j
].expires
)) { 
7395                 /* From a sample of three keys drop the one nearest to 
7396                  * the natural expire */ 
7397                 for (k 
= 0; k 
< 3; k
++) { 
7400                     de 
= dictGetRandomKey(server
.db
[j
].expires
); 
7401                     t 
= (time_t) dictGetEntryVal(de
); 
7402                     if (minttl 
== -1 || t 
< minttl
) { 
7403                         minkey 
= dictGetEntryKey(de
); 
7407                 deleteKey(server
.db
+j
,minkey
); 
7410         if (!freed
) return; /* nothing to free... */ 
7414 /* ============================== Append Only file ========================== */ 
7416 static void feedAppendOnlyFile(struct redisCommand 
*cmd
, int dictid
, robj 
**argv
, int argc
) { 
7417     sds buf 
= sdsempty(); 
7423     /* The DB this command was targetting is not the same as the last command 
7424      * we appendend. To issue a SELECT command is needed. */ 
7425     if (dictid 
!= server
.appendseldb
) { 
7428         snprintf(seldb
,sizeof(seldb
),"%d",dictid
); 
7429         buf 
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n", 
7430             (unsigned long)strlen(seldb
),seldb
); 
7431         server
.appendseldb 
= dictid
; 
7434     /* "Fix" the argv vector if the command is EXPIRE. We want to translate 
7435      * EXPIREs into EXPIREATs calls */ 
7436     if (cmd
->proc 
== expireCommand
) { 
7439         tmpargv
[0] = createStringObject("EXPIREAT",8); 
7440         tmpargv
[1] = argv
[1]; 
7441         incrRefCount(argv
[1]); 
7442         when 
= time(NULL
)+strtol(argv
[2]->ptr
,NULL
,10); 
7443         tmpargv
[2] = createObject(REDIS_STRING
, 
7444             sdscatprintf(sdsempty(),"%ld",when
)); 
7448     /* Append the actual command */ 
7449     buf 
= sdscatprintf(buf
,"*%d\r\n",argc
); 
7450     for (j 
= 0; j 
< argc
; j
++) { 
7453         o 
= getDecodedObject(o
); 
7454         buf 
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
)); 
7455         buf 
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
)); 
7456         buf 
= sdscatlen(buf
,"\r\n",2); 
7460     /* Free the objects from the modified argv for EXPIREAT */ 
7461     if (cmd
->proc 
== expireCommand
) { 
7462         for (j 
= 0; j 
< 3; j
++) 
7463             decrRefCount(argv
[j
]); 
7466     /* We want to perform a single write. This should be guaranteed atomic 
7467      * at least if the filesystem we are writing is a real physical one. 
7468      * While this will save us against the server being killed I don't think 
7469      * there is much to do about the whole server stopping for power problems 
7471      nwritten 
= write(server
.appendfd
,buf
,sdslen(buf
)); 
7472      if (nwritten 
!= (signed)sdslen(buf
)) { 
7473         /* Ooops, we are in troubles. The best thing to do for now is 
7474          * to simply exit instead to give the illusion that everything is 
7475          * working as expected. */ 
7476          if (nwritten 
== -1) { 
7477             redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
)); 
7479             redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
)); 
7483     /* If a background append only file rewriting is in progress we want to 
7484      * accumulate the differences between the child DB and the current one 
7485      * in a buffer, so that when the child process will do its work we 
7486      * can append the differences to the new append only file. */ 
7487     if (server
.bgrewritechildpid 
!= -1) 
7488         server
.bgrewritebuf 
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
)); 
7492     if (server
.appendfsync 
== APPENDFSYNC_ALWAYS 
|| 
7493         (server
.appendfsync 
== APPENDFSYNC_EVERYSEC 
&& 
7494          now
-server
.lastfsync 
> 1)) 
7496         fsync(server
.appendfd
); /* Let's try to get this data on the disk */ 
7497         server
.lastfsync 
= now
; 
7501 /* In Redis commands are always executed in the context of a client, so in 
7502  * order to load the append only file we need to create a fake client. */ 
7503 static struct redisClient 
*createFakeClient(void) { 
7504     struct redisClient 
*c 
= zmalloc(sizeof(*c
)); 
7508     c
->querybuf 
= sdsempty(); 
7512     /* We set the fake client as a slave waiting for the synchronization 
7513      * so that Redis will not try to send replies to this client. */ 
7514     c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_START
; 
7515     c
->reply 
= listCreate(); 
7516     listSetFreeMethod(c
->reply
,decrRefCount
); 
7517     listSetDupMethod(c
->reply
,dupClientReplyValue
); 
7521 static void freeFakeClient(struct redisClient 
*c
) { 
7522     sdsfree(c
->querybuf
); 
7523     listRelease(c
->reply
); 
7527 /* Replay the append log file. On error REDIS_OK is returned. On non fatal 
7528  * error (the append only file is zero-length) REDIS_ERR is returned. On 
7529  * fatal error an error message is logged and the program exists. */ 
7530 int loadAppendOnlyFile(char *filename
) { 
7531     struct redisClient 
*fakeClient
; 
7532     FILE *fp 
= fopen(filename
,"r"); 
7533     struct redis_stat sb
; 
7534     unsigned long long loadedkeys 
= 0; 
7536     if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size 
== 0) 
7540         redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
)); 
7544     fakeClient 
= createFakeClient(); 
7551         struct redisCommand 
*cmd
; 
7553         if (fgets(buf
,sizeof(buf
),fp
) == NULL
) { 
7559         if (buf
[0] != '*') goto fmterr
; 
7561         argv 
= zmalloc(sizeof(robj
*)*argc
); 
7562         for (j 
= 0; j 
< argc
; j
++) { 
7563             if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
; 
7564             if (buf
[0] != '$') goto fmterr
; 
7565             len 
= strtol(buf
+1,NULL
,10); 
7566             argsds 
= sdsnewlen(NULL
,len
); 
7567             if (len 
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
; 
7568             argv
[j
] = createObject(REDIS_STRING
,argsds
); 
7569             if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */ 
7572         /* Command lookup */ 
7573         cmd 
= lookupCommand(argv
[0]->ptr
); 
7575             redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
); 
7578         /* Try object sharing and encoding */ 
7579         if (server
.shareobjects
) { 
7581             for(j 
= 1; j 
< argc
; j
++) 
7582                 argv
[j
] = tryObjectSharing(argv
[j
]); 
7584         if (cmd
->flags 
& REDIS_CMD_BULK
) 
7585             tryObjectEncoding(argv
[argc
-1]); 
7586         /* Run the command in the context of a fake client */ 
7587         fakeClient
->argc 
= argc
; 
7588         fakeClient
->argv 
= argv
; 
7589         cmd
->proc(fakeClient
); 
7590         /* Discard the reply objects list from the fake client */ 
7591         while(listLength(fakeClient
->reply
)) 
7592             listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
)); 
7593         /* Clean up, ready for the next command */ 
7594         for (j 
= 0; j 
< argc
; j
++) decrRefCount(argv
[j
]); 
7596         /* Handle swapping while loading big datasets when VM is on */ 
7598         if (server
.vm_enabled 
&& (loadedkeys 
% 5000) == 0) { 
7599             while (zmalloc_used_memory() > server
.vm_max_memory
) { 
7600                 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break; 
7605     freeFakeClient(fakeClient
); 
7610         redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file"); 
7612         redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
)); 
7616     redisLog(REDIS_WARNING
,"Bad file format reading the append only file"); 
7620 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */ 
7621 static int fwriteBulkObject(FILE *fp
, robj 
*obj
) { 
7625     /* Avoid the incr/decr ref count business if possible to help 
7626      * copy-on-write (we are often in a child process when this function 
7628      * Also makes sure that key objects don't get incrRefCount-ed when VM 
7630     if (obj
->encoding 
!= REDIS_ENCODING_RAW
) { 
7631         obj 
= getDecodedObject(obj
); 
7634     snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
)); 
7635     if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
; 
7636     if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0) 
7638     if (fwrite("\r\n",2,1,fp
) == 0) goto err
; 
7639     if (decrrc
) decrRefCount(obj
); 
7642     if (decrrc
) decrRefCount(obj
); 
7646 /* Write binary-safe string into a file in the bulkformat 
7647  * $<count>\r\n<payload>\r\n */ 
7648 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) { 
7651     snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
); 
7652     if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0; 
7653     if (len 
&& fwrite(s
,len
,1,fp
) == 0) return 0; 
7654     if (fwrite("\r\n",2,1,fp
) == 0) return 0; 
7658 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */ 
7659 static int fwriteBulkDouble(FILE *fp
, double d
) { 
7660     char buf
[128], dbuf
[128]; 
7662     snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
); 
7663     snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2); 
7664     if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0; 
7665     if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0; 
7669 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */ 
7670 static int fwriteBulkLong(FILE *fp
, long l
) { 
7671     char buf
[128], lbuf
[128]; 
7673     snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
); 
7674     snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2); 
7675     if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0; 
7676     if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0; 
7680 /* Write a sequence of commands able to fully rebuild the dataset into 
7681  * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */ 
7682 static int rewriteAppendOnlyFile(char *filename
) { 
7683     dictIterator 
*di 
= NULL
; 
7688     time_t now 
= time(NULL
); 
7690     /* Note that we have to use a different temp name here compared to the 
7691      * one used by rewriteAppendOnlyFileBackground() function. */ 
7692     snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid()); 
7693     fp 
= fopen(tmpfile
,"w"); 
7695         redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
)); 
7698     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
7699         char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n"; 
7700         redisDb 
*db 
= server
.db
+j
; 
7702         if (dictSize(d
) == 0) continue; 
7703         di 
= dictGetIterator(d
); 
7709         /* SELECT the new DB */ 
7710         if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
; 
7711         if (fwriteBulkLong(fp
,j
) == 0) goto werr
; 
7713         /* Iterate this DB writing every entry */ 
7714         while((de 
= dictNext(di
)) != NULL
) { 
7719             key 
= dictGetEntryKey(de
); 
7720             /* If the value for this key is swapped, load a preview in memory. 
7721              * We use a "swapped" flag to remember if we need to free the 
7722              * value object instead to just increment the ref count anyway 
7723              * in order to avoid copy-on-write of pages if we are forked() */ 
7724             if (!server
.vm_enabled 
|| key
->storage 
== REDIS_VM_MEMORY 
|| 
7725                 key
->storage 
== REDIS_VM_SWAPPING
) { 
7726                 o 
= dictGetEntryVal(de
); 
7729                 o 
= vmPreviewObject(key
); 
7732             expiretime 
= getExpire(db
,key
); 
7734             /* Save the key and associated value */ 
7735             if (o
->type 
== REDIS_STRING
) { 
7736                 /* Emit a SET command */ 
7737                 char cmd
[]="*3\r\n$3\r\nSET\r\n"; 
7738                 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
7740                 if (fwriteBulkObject(fp
,key
) == 0) goto werr
; 
7741                 if (fwriteBulkObject(fp
,o
) == 0) goto werr
; 
7742             } else if (o
->type 
== REDIS_LIST
) { 
7743                 /* Emit the RPUSHes needed to rebuild the list */ 
7744                 list 
*list 
= o
->ptr
; 
7748                 listRewind(list
,&li
); 
7749                 while((ln 
= listNext(&li
))) { 
7750                     char cmd
[]="*3\r\n$5\r\nRPUSH\r\n"; 
7751                     robj 
*eleobj 
= listNodeValue(ln
); 
7753                     if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
7754                     if (fwriteBulkObject(fp
,key
) == 0) goto werr
; 
7755                     if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
; 
7757             } else if (o
->type 
== REDIS_SET
) { 
7758                 /* Emit the SADDs needed to rebuild the set */ 
7760                 dictIterator 
*di 
= dictGetIterator(set
); 
7763                 while((de 
= dictNext(di
)) != NULL
) { 
7764                     char cmd
[]="*3\r\n$4\r\nSADD\r\n"; 
7765                     robj 
*eleobj 
= dictGetEntryKey(de
); 
7767                     if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
7768                     if (fwriteBulkObject(fp
,key
) == 0) goto werr
; 
7769                     if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
; 
7771                 dictReleaseIterator(di
); 
7772             } else if (o
->type 
== REDIS_ZSET
) { 
7773                 /* Emit the ZADDs needed to rebuild the sorted set */ 
7775                 dictIterator 
*di 
= dictGetIterator(zs
->dict
); 
7778                 while((de 
= dictNext(di
)) != NULL
) { 
7779                     char cmd
[]="*4\r\n$4\r\nZADD\r\n"; 
7780                     robj 
*eleobj 
= dictGetEntryKey(de
); 
7781                     double *score 
= dictGetEntryVal(de
); 
7783                     if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
7784                     if (fwriteBulkObject(fp
,key
) == 0) goto werr
; 
7785                     if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
; 
7786                     if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
; 
7788                 dictReleaseIterator(di
); 
7789             } else if (o
->type 
== REDIS_HASH
) { 
7790                 char cmd
[]="*4\r\n$4\r\nHSET\r\n"; 
7792                 /* Emit the HSETs needed to rebuild the hash */ 
7793                 if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
7794                     unsigned char *p 
= zipmapRewind(o
->ptr
); 
7795                     unsigned char *field
, *val
; 
7796                     unsigned int flen
, vlen
; 
7798                     while((p 
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) { 
7799                         if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
7800                         if (fwriteBulkObject(fp
,key
) == 0) goto werr
; 
7801                         if (fwriteBulkString(fp
,(char*)field
,flen
) == -1) 
7803                         if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1) 
7807                     dictIterator 
*di 
= dictGetIterator(o
->ptr
); 
7810                     while((de 
= dictNext(di
)) != NULL
) { 
7811                         robj 
*field 
= dictGetEntryKey(de
); 
7812                         robj 
*val 
= dictGetEntryVal(de
); 
7814                         if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
7815                         if (fwriteBulkObject(fp
,key
) == 0) goto werr
; 
7816                         if (fwriteBulkObject(fp
,field
) == -1) return -1; 
7817                         if (fwriteBulkObject(fp
,val
) == -1) return -1; 
7819                     dictReleaseIterator(di
); 
7824             /* Save the expire time */ 
7825             if (expiretime 
!= -1) { 
7826                 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n"; 
7827                 /* If this key is already expired skip it */ 
7828                 if (expiretime 
< now
) continue; 
7829                 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
7830                 if (fwriteBulkObject(fp
,key
) == 0) goto werr
; 
7831                 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
; 
7833             if (swapped
) decrRefCount(o
); 
7835         dictReleaseIterator(di
); 
7838     /* Make sure data will not remain on the OS's output buffers */ 
7843     /* Use RENAME to make sure the DB file is changed atomically only 
7844      * if the generate DB file is ok. */ 
7845     if (rename(tmpfile
,filename
) == -1) { 
7846         redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
)); 
7850     redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed"); 
7856     redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
)); 
7857     if (di
) dictReleaseIterator(di
); 
7861 /* This is how rewriting of the append only file in background works: 
7863  * 1) The user calls BGREWRITEAOF 
7864  * 2) Redis calls this function, that forks(): 
7865  *    2a) the child rewrite the append only file in a temp file. 
7866  *    2b) the parent accumulates differences in server.bgrewritebuf. 
7867  * 3) When the child finished '2a' exists. 
7868  * 4) The parent will trap the exit code, if it's OK, will append the 
7869  *    data accumulated into server.bgrewritebuf into the temp file, and 
7870  *    finally will rename(2) the temp file in the actual file name. 
7871  *    The the new file is reopened as the new append only file. Profit! 
7873 static int rewriteAppendOnlyFileBackground(void) { 
7876     if (server
.bgrewritechildpid 
!= -1) return REDIS_ERR
; 
7877     if (server
.vm_enabled
) waitEmptyIOJobsQueue(); 
7878     if ((childpid 
= fork()) == 0) { 
7882         if (server
.vm_enabled
) vmReopenSwapFile(); 
7884         snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid()); 
7885         if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) { 
7892         if (childpid 
== -1) { 
7893             redisLog(REDIS_WARNING
, 
7894                 "Can't rewrite append only file in background: fork: %s", 
7898         redisLog(REDIS_NOTICE
, 
7899             "Background append only file rewriting started by pid %d",childpid
); 
7900         server
.bgrewritechildpid 
= childpid
; 
7901         /* We set appendseldb to -1 in order to force the next call to the 
7902          * feedAppendOnlyFile() to issue a SELECT command, so the differences 
7903          * accumulated by the parent into server.bgrewritebuf will start 
7904          * with a SELECT statement and it will be safe to merge. */ 
7905         server
.appendseldb 
= -1; 
7908     return REDIS_OK
; /* unreached */ 
7911 static void bgrewriteaofCommand(redisClient 
*c
) { 
7912     if (server
.bgrewritechildpid 
!= -1) { 
7913         addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n")); 
7916     if (rewriteAppendOnlyFileBackground() == REDIS_OK
) { 
7917         char *status 
= "+Background append only file rewriting started\r\n"; 
7918         addReplySds(c
,sdsnew(status
)); 
7920         addReply(c
,shared
.err
); 
7924 static void aofRemoveTempFile(pid_t childpid
) { 
7927     snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
); 
7931 /* Virtual Memory is composed mainly of two subsystems: 
7932  * - Blocking Virutal Memory 
7933  * - Threaded Virtual Memory I/O 
7934  * The two parts are not fully decoupled, but functions are split among two 
7935  * different sections of the source code (delimited by comments) in order to 
7936  * make more clear what functionality is about the blocking VM and what about 
7937  * the threaded (not blocking) VM. 
7941  * Redis VM is a blocking VM (one that blocks reading swapped values from 
7942  * disk into memory when a value swapped out is needed in memory) that is made 
7943  * unblocking by trying to examine the command argument vector in order to 
7944  * load in background values that will likely be needed in order to exec 
7945  * the command. The command is executed only once all the relevant keys 
7946  * are loaded into memory. 
7948  * This basically is almost as simple of a blocking VM, but almost as parallel 
7949  * as a fully non-blocking VM. 
7952 /* =================== Virtual Memory - Blocking Side  ====================== */ 
7954 /* substitute the first occurrence of '%p' with the process pid in the 
7955  * swap file name. */ 
7956 static void expandVmSwapFilename(void) { 
7957     char *p 
= strstr(server
.vm_swap_file
,"%p"); 
7963     new = sdscat(new,server
.vm_swap_file
); 
7964     new = sdscatprintf(new,"%ld",(long) getpid()); 
7965     new = sdscat(new,p
+2); 
7966     zfree(server
.vm_swap_file
); 
7967     server
.vm_swap_file 
= new; 
7970 static void vmInit(void) { 
7975     if (server
.vm_max_threads 
!= 0) 
7976         zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */ 
7978     expandVmSwapFilename(); 
7979     redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
); 
7980     if ((server
.vm_fp 
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) { 
7981         server
.vm_fp 
= fopen(server
.vm_swap_file
,"w+b"); 
7983     if (server
.vm_fp 
== NULL
) { 
7984         redisLog(REDIS_WARNING
, 
7985             "Impossible to open the swap file: %s. Exiting.", 
7989     server
.vm_fd 
= fileno(server
.vm_fp
); 
7990     server
.vm_next_page 
= 0; 
7991     server
.vm_near_pages 
= 0; 
7992     server
.vm_stats_used_pages 
= 0; 
7993     server
.vm_stats_swapped_objects 
= 0; 
7994     server
.vm_stats_swapouts 
= 0; 
7995     server
.vm_stats_swapins 
= 0; 
7996     totsize 
= server
.vm_pages
*server
.vm_page_size
; 
7997     redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
); 
7998     if (ftruncate(server
.vm_fd
,totsize
) == -1) { 
7999         redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.", 
8003         redisLog(REDIS_NOTICE
,"Swap file allocated with success"); 
8005     server
.vm_bitmap 
= zmalloc((server
.vm_pages
+7)/8); 
8006     redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages", 
8007         (long long) (server
.vm_pages
+7)/8, server
.vm_pages
); 
8008     memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8); 
8010     /* Initialize threaded I/O (used by Virtual Memory) */ 
8011     server
.io_newjobs 
= listCreate(); 
8012     server
.io_processing 
= listCreate(); 
8013     server
.io_processed 
= listCreate(); 
8014     server
.io_ready_clients 
= listCreate(); 
8015     pthread_mutex_init(&server
.io_mutex
,NULL
); 
8016     pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
); 
8017     pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
); 
8018     server
.io_active_threads 
= 0; 
8019     if (pipe(pipefds
) == -1) { 
8020         redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting." 
8024     server
.io_ready_pipe_read 
= pipefds
[0]; 
8025     server
.io_ready_pipe_write 
= pipefds
[1]; 
8026     redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
); 
8027     /* LZF requires a lot of stack */ 
8028     pthread_attr_init(&server
.io_threads_attr
); 
8029     pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
); 
8030     while (stacksize 
< REDIS_THREAD_STACK_SIZE
) stacksize 
*= 2; 
8031     pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
); 
8032     /* Listen for events in the threaded I/O pipe */ 
8033     if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
, 
8034         vmThreadedIOCompletedJob
, NULL
) == AE_ERR
) 
8035         oom("creating file event"); 
8038 /* Mark the page as used */ 
8039 static void vmMarkPageUsed(off_t page
) { 
8040     off_t byte 
= page
/8; 
8042     redisAssert(vmFreePage(page
) == 1); 
8043     server
.vm_bitmap
[byte
] |= 1<<bit
; 
8046 /* Mark N contiguous pages as used, with 'page' being the first. */ 
8047 static void vmMarkPagesUsed(off_t page
, off_t count
) { 
8050     for (j 
= 0; j 
< count
; j
++) 
8051         vmMarkPageUsed(page
+j
); 
8052     server
.vm_stats_used_pages 
+= count
; 
8053     redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n", 
8054         (long long)count
, (long long)page
); 
8057 /* Mark the page as free */ 
8058 static void vmMarkPageFree(off_t page
) { 
8059     off_t byte 
= page
/8; 
8061     redisAssert(vmFreePage(page
) == 0); 
8062     server
.vm_bitmap
[byte
] &= ~(1<<bit
); 
8065 /* Mark N contiguous pages as free, with 'page' being the first. */ 
8066 static void vmMarkPagesFree(off_t page
, off_t count
) { 
8069     for (j 
= 0; j 
< count
; j
++) 
8070         vmMarkPageFree(page
+j
); 
8071     server
.vm_stats_used_pages 
-= count
; 
8072     redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n", 
8073         (long long)count
, (long long)page
); 
8076 /* Test if the page is free */ 
8077 static int vmFreePage(off_t page
) { 
8078     off_t byte 
= page
/8; 
8080     return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0; 
8083 /* Find N contiguous free pages storing the first page of the cluster in *first. 
8084  * Returns REDIS_OK if it was able to find N contiguous pages, otherwise  
8085  * REDIS_ERR is returned. 
8087  * This function uses a simple algorithm: we try to allocate 
8088  * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start 
8089  * again from the start of the swap file searching for free spaces. 
8091  * If it looks pretty clear that there are no free pages near our offset 
8092  * we try to find less populated places doing a forward jump of 
8093  * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages 
8094  * without hurry, and then we jump again and so forth... 
8096  * This function can be improved using a free list to avoid to guess 
8097  * too much, since we could collect data about freed pages. 
8099  * note: I implemented this function just after watching an episode of 
8100  * Battlestar Galactica, where the hybrid was continuing to say "JUMP!" 
8102 static int vmFindContiguousPages(off_t 
*first
, off_t n
) { 
8103     off_t base
, offset 
= 0, since_jump 
= 0, numfree 
= 0; 
8105     if (server
.vm_near_pages 
== REDIS_VM_MAX_NEAR_PAGES
) { 
8106         server
.vm_near_pages 
= 0; 
8107         server
.vm_next_page 
= 0; 
8109     server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */ 
8110     base 
= server
.vm_next_page
; 
8112     while(offset 
< server
.vm_pages
) { 
8113         off_t 
this = base
+offset
; 
8115         /* If we overflow, restart from page zero */ 
8116         if (this >= server
.vm_pages
) { 
8117             this -= server
.vm_pages
; 
8119                 /* Just overflowed, what we found on tail is no longer 
8120                  * interesting, as it's no longer contiguous. */ 
8124         if (vmFreePage(this)) { 
8125             /* This is a free page */ 
8127             /* Already got N free pages? Return to the caller, with success */ 
8129                 *first 
= this-(n
-1); 
8130                 server
.vm_next_page 
= this+1; 
8131                 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
); 
8135             /* The current one is not a free page */ 
8139         /* Fast-forward if the current page is not free and we already 
8140          * searched enough near this place. */ 
8142         if (!numfree 
&& since_jump 
>= REDIS_VM_MAX_RANDOM_JUMP
/4) { 
8143             offset 
+= random() % REDIS_VM_MAX_RANDOM_JUMP
; 
8145             /* Note that even if we rewind after the jump, we are don't need 
8146              * to make sure numfree is set to zero as we only jump *if* it 
8147              * is set to zero. */ 
8149             /* Otherwise just check the next page */ 
8156 /* Write the specified object at the specified page of the swap file */ 
8157 static int vmWriteObjectOnSwap(robj 
*o
, off_t page
) { 
8158     if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
); 
8159     if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) { 
8160         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
); 
8161         redisLog(REDIS_WARNING
, 
8162             "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s", 
8166     rdbSaveObject(server
.vm_fp
,o
); 
8167     fflush(server
.vm_fp
); 
8168     if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
); 
8172 /* Swap the 'val' object relative to 'key' into disk. Store all the information 
8173  * needed to later retrieve the object into the key object. 
8174  * If we can't find enough contiguous empty pages to swap the object on disk 
8175  * REDIS_ERR is returned. */ 
8176 static int vmSwapObjectBlocking(robj 
*key
, robj 
*val
) { 
8177     off_t pages 
= rdbSavedObjectPages(val
,NULL
); 
8180     assert(key
->storage 
== REDIS_VM_MEMORY
); 
8181     assert(key
->refcount 
== 1); 
8182     if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
; 
8183     if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
; 
8184     key
->vm
.page 
= page
; 
8185     key
->vm
.usedpages 
= pages
; 
8186     key
->storage 
= REDIS_VM_SWAPPED
; 
8187     key
->vtype 
= val
->type
; 
8188     decrRefCount(val
); /* Deallocate the object from memory. */ 
8189     vmMarkPagesUsed(page
,pages
); 
8190     redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)", 
8191         (unsigned char*) key
->ptr
, 
8192         (unsigned long long) page
, (unsigned long long) pages
); 
8193     server
.vm_stats_swapped_objects
++; 
8194     server
.vm_stats_swapouts
++; 
8198 static robj 
*vmReadObjectFromSwap(off_t page
, int type
) { 
8201     if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
); 
8202     if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) { 
8203         redisLog(REDIS_WARNING
, 
8204             "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s", 
8208     o 
= rdbLoadObject(type
,server
.vm_fp
); 
8210         redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
)); 
8213     if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
); 
8217 /* Load the value object relative to the 'key' object from swap to memory. 
8218  * The newly allocated object is returned. 
8220  * If preview is true the unserialized object is returned to the caller but 
8221  * no changes are made to the key object, nor the pages are marked as freed */ 
8222 static robj 
*vmGenericLoadObject(robj 
*key
, int preview
) { 
8225     redisAssert(key
->storage 
== REDIS_VM_SWAPPED 
|| key
->storage 
== REDIS_VM_LOADING
); 
8226     val 
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
); 
8228         key
->storage 
= REDIS_VM_MEMORY
; 
8229         key
->vm
.atime 
= server
.unixtime
; 
8230         vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
); 
8231         redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk", 
8232             (unsigned char*) key
->ptr
); 
8233         server
.vm_stats_swapped_objects
--; 
8235         redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk", 
8236             (unsigned char*) key
->ptr
); 
8238     server
.vm_stats_swapins
++; 
8242 /* Plain object loading, from swap to memory */ 
8243 static robj 
*vmLoadObject(robj 
*key
) { 
8244     /* If we are loading the object in background, stop it, we 
8245      * need to load this object synchronously ASAP. */ 
8246     if (key
->storage 
== REDIS_VM_LOADING
) 
8247         vmCancelThreadedIOJob(key
); 
8248     return vmGenericLoadObject(key
,0); 
8251 /* Just load the value on disk, without to modify the key. 
8252  * This is useful when we want to perform some operation on the value 
8253  * without to really bring it from swap to memory, like while saving the 
8254  * dataset or rewriting the append only log. */ 
8255 static robj 
*vmPreviewObject(robj 
*key
) { 
8256     return vmGenericLoadObject(key
,1); 
8259 /* How a good candidate is this object for swapping? 
8260  * The better candidate it is, the greater the returned value. 
8262  * Currently we try to perform a fast estimation of the object size in 
8263  * memory, and combine it with aging informations. 
8265  * Basically swappability = idle-time * log(estimated size) 
8267  * Bigger objects are preferred over smaller objects, but not 
8268  * proportionally, this is why we use the logarithm. This algorithm is 
8269  * just a first try and will probably be tuned later. */ 
8270 static double computeObjectSwappability(robj 
*o
) { 
8271     time_t age 
= server
.unixtime 
- o
->vm
.atime
; 
8275     struct dictEntry 
*de
; 
8278     if (age 
<= 0) return 0; 
8281         if (o
->encoding 
!= REDIS_ENCODING_RAW
) { 
8284             asize 
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2; 
8289         listNode 
*ln 
= listFirst(l
); 
8291         asize 
= sizeof(list
); 
8293             robj 
*ele 
= ln
->value
; 
8296             elesize 
= (ele
->encoding 
== REDIS_ENCODING_RAW
) ? 
8297                             (sizeof(*o
)+sdslen(ele
->ptr
)) : 
8299             asize 
+= (sizeof(listNode
)+elesize
)*listLength(l
); 
8304         z 
= (o
->type 
== REDIS_ZSET
); 
8305         d 
= z 
? ((zset
*)o
->ptr
)->dict 
: o
->ptr
; 
8307         asize 
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
)); 
8308         if (z
) asize 
+= sizeof(zset
)-sizeof(dict
); 
8313             de 
= dictGetRandomKey(d
); 
8314             ele 
= dictGetEntryKey(de
); 
8315             elesize 
= (ele
->encoding 
== REDIS_ENCODING_RAW
) ? 
8316                             (sizeof(*o
)+sdslen(ele
->ptr
)) : 
8318             asize 
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
); 
8319             if (z
) asize 
+= sizeof(zskiplistNode
)*dictSize(d
); 
8323     return (double)age
*log(1+asize
); 
8326 /* Try to swap an object that's a good candidate for swapping. 
8327  * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible 
8328  * to swap any object at all. 
8330  * If 'usethreaded' is true, Redis will try to swap the object in background 
8331  * using I/O threads. */ 
8332 static int vmSwapOneObject(int usethreads
) { 
8334     struct dictEntry 
*best 
= NULL
; 
8335     double best_swappability 
= 0; 
8336     redisDb 
*best_db 
= NULL
; 
8339     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
8340         redisDb 
*db 
= server
.db
+j
; 
8341         /* Why maxtries is set to 100? 
8342          * Because this way (usually) we'll find 1 object even if just 1% - 2% 
8343          * are swappable objects */ 
8346         if (dictSize(db
->dict
) == 0) continue; 
8347         for (i 
= 0; i 
< 5; i
++) { 
8349             double swappability
; 
8351             if (maxtries
) maxtries
--; 
8352             de 
= dictGetRandomKey(db
->dict
); 
8353             key 
= dictGetEntryKey(de
); 
8354             val 
= dictGetEntryVal(de
); 
8355             /* Only swap objects that are currently in memory. 
8357              * Also don't swap shared objects if threaded VM is on, as we 
8358              * try to ensure that the main thread does not touch the 
8359              * object while the I/O thread is using it, but we can't 
8360              * control other keys without adding additional mutex. */ 
8361             if (key
->storage 
!= REDIS_VM_MEMORY 
|| 
8362                 (server
.vm_max_threads 
!= 0 && val
->refcount 
!= 1)) { 
8363                 if (maxtries
) i
--; /* don't count this try */ 
8366             swappability 
= computeObjectSwappability(val
); 
8367             if (!best 
|| swappability 
> best_swappability
) { 
8369                 best_swappability 
= swappability
; 
8374     if (best 
== NULL
) return REDIS_ERR
; 
8375     key 
= dictGetEntryKey(best
); 
8376     val 
= dictGetEntryVal(best
); 
8378     redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f", 
8379         key
->ptr
, best_swappability
); 
8381     /* Unshare the key if needed */ 
8382     if (key
->refcount 
> 1) { 
8383         robj 
*newkey 
= dupStringObject(key
); 
8385         key 
= dictGetEntryKey(best
) = newkey
; 
8389         vmSwapObjectThreaded(key
,val
,best_db
); 
8392         if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) { 
8393             dictGetEntryVal(best
) = NULL
; 
8401 static int vmSwapOneObjectBlocking() { 
8402     return vmSwapOneObject(0); 
8405 static int vmSwapOneObjectThreaded() { 
8406     return vmSwapOneObject(1); 
8409 /* Return true if it's safe to swap out objects in a given moment. 
8410  * Basically we don't want to swap objects out while there is a BGSAVE 
8411  * or a BGAEOREWRITE running in backgroud. */ 
8412 static int vmCanSwapOut(void) { 
8413     return (server
.bgsavechildpid 
== -1 && server
.bgrewritechildpid 
== -1); 
8416 /* Delete a key if swapped. Returns 1 if the key was found, was swapped 
8417  * and was deleted. Otherwise 0 is returned. */ 
8418 static int deleteIfSwapped(redisDb 
*db
, robj 
*key
) { 
8422     if ((de 
= dictFind(db
->dict
,key
)) == NULL
) return 0; 
8423     foundkey 
= dictGetEntryKey(de
); 
8424     if (foundkey
->storage 
== REDIS_VM_MEMORY
) return 0; 
8429 /* =================== Virtual Memory - Threaded I/O  ======================= */ 
8431 static void freeIOJob(iojob 
*j
) { 
8432     if ((j
->type 
== REDIS_IOJOB_PREPARE_SWAP 
|| 
8433         j
->type 
== REDIS_IOJOB_DO_SWAP 
|| 
8434         j
->type 
== REDIS_IOJOB_LOAD
) && j
->val 
!= NULL
) 
8435         decrRefCount(j
->val
); 
8436     decrRefCount(j
->key
); 
8440 /* Every time a thread finished a Job, it writes a byte into the write side 
8441  * of an unix pipe in order to "awake" the main thread, and this function 
8443 static void vmThreadedIOCompletedJob(aeEventLoop 
*el
, int fd
, void *privdata
, 
8447     int retval
, processed 
= 0, toprocess 
= -1, trytoswap 
= 1; 
8449     REDIS_NOTUSED(mask
); 
8450     REDIS_NOTUSED(privdata
); 
8452     /* For every byte we read in the read side of the pipe, there is one 
8453      * I/O job completed to process. */ 
8454     while((retval 
= read(fd
,buf
,1)) == 1) { 
8458         struct dictEntry 
*de
; 
8460         redisLog(REDIS_DEBUG
,"Processing I/O completed job"); 
8462         /* Get the processed element (the oldest one) */ 
8464         assert(listLength(server
.io_processed
) != 0); 
8465         if (toprocess 
== -1) { 
8466             toprocess 
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100; 
8467             if (toprocess 
<= 0) toprocess 
= 1; 
8469         ln 
= listFirst(server
.io_processed
); 
8471         listDelNode(server
.io_processed
,ln
); 
8473         /* If this job is marked as canceled, just ignore it */ 
8478         /* Post process it in the main thread, as there are things we 
8479          * can do just here to avoid race conditions and/or invasive locks */ 
8480         redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
); 
8481         de 
= dictFind(j
->db
->dict
,j
->key
); 
8483         key 
= dictGetEntryKey(de
); 
8484         if (j
->type 
== REDIS_IOJOB_LOAD
) { 
8487             /* Key loaded, bring it at home */ 
8488             key
->storage 
= REDIS_VM_MEMORY
; 
8489             key
->vm
.atime 
= server
.unixtime
; 
8490             vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
); 
8491             redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)", 
8492                 (unsigned char*) key
->ptr
); 
8493             server
.vm_stats_swapped_objects
--; 
8494             server
.vm_stats_swapins
++; 
8495             dictGetEntryVal(de
) = j
->val
; 
8496             incrRefCount(j
->val
); 
8499             /* Handle clients waiting for this key to be loaded. */ 
8500             handleClientsBlockedOnSwappedKey(db
,key
); 
8501         } else if (j
->type 
== REDIS_IOJOB_PREPARE_SWAP
) { 
8502             /* Now we know the amount of pages required to swap this object. 
8503              * Let's find some space for it, and queue this task again 
8504              * rebranded as REDIS_IOJOB_DO_SWAP. */ 
8505             if (!vmCanSwapOut() || 
8506                 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
) 
8508                 /* Ooops... no space or we can't swap as there is 
8509                  * a fork()ed Redis trying to save stuff on disk. */ 
8511                 key
->storage 
= REDIS_VM_MEMORY
; /* undo operation */ 
8513                 /* Note that we need to mark this pages as used now, 
8514                  * if the job will be canceled, we'll mark them as freed 
8516                 vmMarkPagesUsed(j
->page
,j
->pages
); 
8517                 j
->type 
= REDIS_IOJOB_DO_SWAP
; 
8522         } else if (j
->type 
== REDIS_IOJOB_DO_SWAP
) { 
8525             /* Key swapped. We can finally free some memory. */ 
8526             if (key
->storage 
!= REDIS_VM_SWAPPING
) { 
8527                 printf("key->storage: %d\n",key
->storage
); 
8528                 printf("key->name: %s\n",(char*)key
->ptr
); 
8529                 printf("key->refcount: %d\n",key
->refcount
); 
8530                 printf("val: %p\n",(void*)j
->val
); 
8531                 printf("val->type: %d\n",j
->val
->type
); 
8532                 printf("val->ptr: %s\n",(char*)j
->val
->ptr
); 
8534             redisAssert(key
->storage 
== REDIS_VM_SWAPPING
); 
8535             val 
= dictGetEntryVal(de
); 
8536             key
->vm
.page 
= j
->page
; 
8537             key
->vm
.usedpages 
= j
->pages
; 
8538             key
->storage 
= REDIS_VM_SWAPPED
; 
8539             key
->vtype 
= j
->val
->type
; 
8540             decrRefCount(val
); /* Deallocate the object from memory. */ 
8541             dictGetEntryVal(de
) = NULL
; 
8542             redisLog(REDIS_DEBUG
, 
8543                 "VM: object %s swapped out at %lld (%lld pages) (threaded)", 
8544                 (unsigned char*) key
->ptr
, 
8545                 (unsigned long long) j
->page
, (unsigned long long) j
->pages
); 
8546             server
.vm_stats_swapped_objects
++; 
8547             server
.vm_stats_swapouts
++; 
8549             /* Put a few more swap requests in queue if we are still 
8551             if (trytoswap 
&& vmCanSwapOut() && 
8552                 zmalloc_used_memory() > server
.vm_max_memory
) 
8557                     more 
= listLength(server
.io_newjobs
) < 
8558                             (unsigned) server
.vm_max_threads
; 
8560                     /* Don't waste CPU time if swappable objects are rare. */ 
8561                     if (vmSwapOneObjectThreaded() == REDIS_ERR
) { 
8569         if (processed 
== toprocess
) return; 
8571     if (retval 
< 0 && errno 
!= EAGAIN
) { 
8572         redisLog(REDIS_WARNING
, 
8573             "WARNING: read(2) error in vmThreadedIOCompletedJob() %s", 
8578 static void lockThreadedIO(void) { 
8579     pthread_mutex_lock(&server
.io_mutex
); 
8582 static void unlockThreadedIO(void) { 
8583     pthread_mutex_unlock(&server
.io_mutex
); 
8586 /* Remove the specified object from the threaded I/O queue if still not 
8587  * processed, otherwise make sure to flag it as canceled. */ 
8588 static void vmCancelThreadedIOJob(robj 
*o
) { 
8590         server
.io_newjobs
,      /* 0 */ 
8591         server
.io_processing
,   /* 1 */ 
8592         server
.io_processed     
/* 2 */ 
8596     assert(o
->storage 
== REDIS_VM_LOADING 
|| o
->storage 
== REDIS_VM_SWAPPING
); 
8599     /* Search for a matching key in one of the queues */ 
8600     for (i 
= 0; i 
< 3; i
++) { 
8604         listRewind(lists
[i
],&li
); 
8605         while ((ln 
= listNext(&li
)) != NULL
) { 
8606             iojob 
*job 
= ln
->value
; 
8608             if (job
->canceled
) continue; /* Skip this, already canceled. */ 
8609             if (compareStringObjects(job
->key
,o
) == 0) { 
8610                 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n", 
8611                     (void*)job
, (char*)o
->ptr
, job
->type
, i
); 
8612                 /* Mark the pages as free since the swap didn't happened 
8613                  * or happened but is now discarded. */ 
8614                 if (i 
!= 1 && job
->type 
== REDIS_IOJOB_DO_SWAP
) 
8615                     vmMarkPagesFree(job
->page
,job
->pages
); 
8616                 /* Cancel the job. It depends on the list the job is 
8619                 case 0: /* io_newjobs */ 
8620                     /* If the job was yet not processed the best thing to do 
8621                      * is to remove it from the queue at all */ 
8623                     listDelNode(lists
[i
],ln
); 
8625                 case 1: /* io_processing */ 
8626                     /* Oh Shi- the thread is messing with the Job: 
8628                      * Probably it's accessing the object if this is a 
8629                      * PREPARE_SWAP or DO_SWAP job. 
8630                      * If it's a LOAD job it may be reading from disk and 
8631                      * if we don't wait for the job to terminate before to 
8632                      * cancel it, maybe in a few microseconds data can be 
8633                      * corrupted in this pages. So the short story is: 
8635                      * Better to wait for the job to move into the 
8636                      * next queue (processed)... */ 
8638                     /* We try again and again until the job is completed. */ 
8640                     /* But let's wait some time for the I/O thread 
8641                      * to finish with this job. After all this condition 
8642                      * should be very rare. */ 
8645                 case 2: /* io_processed */ 
8646                     /* The job was already processed, that's easy... 
8647                      * just mark it as canceled so that we'll ignore it 
8648                      * when processing completed jobs. */ 
8652                 /* Finally we have to adjust the storage type of the object 
8653                  * in order to "UNDO" the operaiton. */ 
8654                 if (o
->storage 
== REDIS_VM_LOADING
) 
8655                     o
->storage 
= REDIS_VM_SWAPPED
; 
8656                 else if (o
->storage 
== REDIS_VM_SWAPPING
) 
8657                     o
->storage 
= REDIS_VM_MEMORY
; 
8664     assert(1 != 1); /* We should never reach this */ 
8667 static void *IOThreadEntryPoint(void *arg
) { 
8672     pthread_detach(pthread_self()); 
8674         /* Get a new job to process */ 
8676         if (listLength(server
.io_newjobs
) == 0) { 
8677             /* No new jobs in queue, exit. */ 
8678             redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do", 
8679                 (long) pthread_self()); 
8680             server
.io_active_threads
--; 
8684         ln 
= listFirst(server
.io_newjobs
); 
8686         listDelNode(server
.io_newjobs
,ln
); 
8687         /* Add the job in the processing queue */ 
8688         j
->thread 
= pthread_self(); 
8689         listAddNodeTail(server
.io_processing
,j
); 
8690         ln 
= listLast(server
.io_processing
); /* We use ln later to remove it */ 
8692         redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'", 
8693             (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
); 
8695         /* Process the Job */ 
8696         if (j
->type 
== REDIS_IOJOB_LOAD
) { 
8697             j
->val 
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
); 
8698         } else if (j
->type 
== REDIS_IOJOB_PREPARE_SWAP
) { 
8699             FILE *fp 
= fopen("/dev/null","w+"); 
8700             j
->pages 
= rdbSavedObjectPages(j
->val
,fp
); 
8702         } else if (j
->type 
== REDIS_IOJOB_DO_SWAP
) { 
8703             if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
) 
8707         /* Done: insert the job into the processed queue */ 
8708         redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)", 
8709             (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
); 
8711         listDelNode(server
.io_processing
,ln
); 
8712         listAddNodeTail(server
.io_processed
,j
); 
8715         /* Signal the main thread there is new stuff to process */ 
8716         assert(write(server
.io_ready_pipe_write
,"x",1) == 1); 
8718     return NULL
; /* never reached */ 
8721 static void spawnIOThread(void) { 
8723     sigset_t mask
, omask
; 
8726     sigaddset(&mask
,SIGCHLD
); 
8727     sigaddset(&mask
,SIGHUP
); 
8728     sigaddset(&mask
,SIGPIPE
); 
8729     pthread_sigmask(SIG_SETMASK
, &mask
, &omask
); 
8730     pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
); 
8731     pthread_sigmask(SIG_SETMASK
, &omask
, NULL
); 
8732     server
.io_active_threads
++; 
8735 /* We need to wait for the last thread to exit before we are able to 
8736  * fork() in order to BGSAVE or BGREWRITEAOF. */ 
8737 static void waitEmptyIOJobsQueue(void) { 
8739         int io_processed_len
; 
8742         if (listLength(server
.io_newjobs
) == 0 && 
8743             listLength(server
.io_processing
) == 0 && 
8744             server
.io_active_threads 
== 0) 
8749         /* While waiting for empty jobs queue condition we post-process some 
8750          * finshed job, as I/O threads may be hanging trying to write against 
8751          * the io_ready_pipe_write FD but there are so much pending jobs that 
8753         io_processed_len 
= listLength(server
.io_processed
); 
8755         if (io_processed_len
) { 
8756             vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0); 
8757             usleep(1000); /* 1 millisecond */ 
8759             usleep(10000); /* 10 milliseconds */ 
8764 static void vmReopenSwapFile(void) { 
8765     /* Note: we don't close the old one as we are in the child process 
8766      * and don't want to mess at all with the original file object. */ 
8767     server
.vm_fp 
= fopen(server
.vm_swap_file
,"r+b"); 
8768     if (server
.vm_fp 
== NULL
) { 
8769         redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.", 
8770             server
.vm_swap_file
); 
8773     server
.vm_fd 
= fileno(server
.vm_fp
); 
8776 /* This function must be called while with threaded IO locked */ 
8777 static void queueIOJob(iojob 
*j
) { 
8778     redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n", 
8779         (void*)j
, j
->type
, (char*)j
->key
->ptr
); 
8780     listAddNodeTail(server
.io_newjobs
,j
); 
8781     if (server
.io_active_threads 
< server
.vm_max_threads
) 
8785 static int vmSwapObjectThreaded(robj 
*key
, robj 
*val
, redisDb 
*db
) { 
8788     assert(key
->storage 
== REDIS_VM_MEMORY
); 
8789     assert(key
->refcount 
== 1); 
8791     j 
= zmalloc(sizeof(*j
)); 
8792     j
->type 
= REDIS_IOJOB_PREPARE_SWAP
; 
8794     j
->key 
= dupStringObject(key
); 
8798     j
->thread 
= (pthread_t
) -1; 
8799     key
->storage 
= REDIS_VM_SWAPPING
; 
8807 /* ============ Virtual Memory - Blocking clients on missing keys =========== */ 
8809 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded. 
8810  * If there is not already a job loading the key, it is craeted. 
8811  * The key is added to the io_keys list in the client structure, and also 
8812  * in the hash table mapping swapped keys to waiting clients, that is, 
8813  * server.io_waited_keys. */ 
8814 static int waitForSwappedKey(redisClient 
*c
, robj 
*key
) { 
8815     struct dictEntry 
*de
; 
8819     /* If the key does not exist or is already in RAM we don't need to 
8820      * block the client at all. */ 
8821     de 
= dictFind(c
->db
->dict
,key
); 
8822     if (de 
== NULL
) return 0; 
8823     o 
= dictGetEntryKey(de
); 
8824     if (o
->storage 
== REDIS_VM_MEMORY
) { 
8826     } else if (o
->storage 
== REDIS_VM_SWAPPING
) { 
8827         /* We were swapping the key, undo it! */ 
8828         vmCancelThreadedIOJob(o
); 
8832     /* OK: the key is either swapped, or being loaded just now. */ 
8834     /* Add the key to the list of keys this client is waiting for. 
8835      * This maps clients to keys they are waiting for. */ 
8836     listAddNodeTail(c
->io_keys
,key
); 
8839     /* Add the client to the swapped keys => clients waiting map. */ 
8840     de 
= dictFind(c
->db
->io_keys
,key
); 
8844         /* For every key we take a list of clients blocked for it */ 
8846         retval 
= dictAdd(c
->db
->io_keys
,key
,l
); 
8848         assert(retval 
== DICT_OK
); 
8850         l 
= dictGetEntryVal(de
); 
8852     listAddNodeTail(l
,c
); 
8854     /* Are we already loading the key from disk? If not create a job */ 
8855     if (o
->storage 
== REDIS_VM_SWAPPED
) { 
8858         o
->storage 
= REDIS_VM_LOADING
; 
8859         j 
= zmalloc(sizeof(*j
)); 
8860         j
->type 
= REDIS_IOJOB_LOAD
; 
8862         j
->key 
= dupStringObject(key
); 
8863         j
->key
->vtype 
= o
->vtype
; 
8864         j
->page 
= o
->vm
.page
; 
8867         j
->thread 
= (pthread_t
) -1; 
8875 /* Preload keys needed for the ZUNION and ZINTER commands. */ 
8876 static void zunionInterBlockClientOnSwappedKeys(redisClient 
*c
) { 
8878     num 
= atoi(c
->argv
[2]->ptr
); 
8879     for (i 
= 0; i 
< num
; i
++) { 
8880         waitForSwappedKey(c
,c
->argv
[3+i
]); 
8884 /* Is this client attempting to run a command against swapped keys? 
8885  * If so, block it ASAP, load the keys in background, then resume it. 
8887  * The important idea about this function is that it can fail! If keys will 
8888  * still be swapped when the client is resumed, this key lookups will 
8889  * just block loading keys from disk. In practical terms this should only 
8890  * happen with SORT BY command or if there is a bug in this function. 
8892  * Return 1 if the client is marked as blocked, 0 if the client can 
8893  * continue as the keys it is going to access appear to be in memory. */ 
8894 static int blockClientOnSwappedKeys(struct redisCommand 
*cmd
, redisClient 
*c
) { 
8897     if (cmd
->vm_preload_proc 
!= NULL
) { 
8898         cmd
->vm_preload_proc(c
); 
8900         if (cmd
->vm_firstkey 
== 0) return 0; 
8901         last 
= cmd
->vm_lastkey
; 
8902         if (last 
< 0) last 
= c
->argc
+last
; 
8903         for (j 
= cmd
->vm_firstkey
; j 
<= last
; j 
+= cmd
->vm_keystep
) 
8904             waitForSwappedKey(c
,c
->argv
[j
]); 
8907     /* If the client was blocked for at least one key, mark it as blocked. */ 
8908     if (listLength(c
->io_keys
)) { 
8909         c
->flags 
|= REDIS_IO_WAIT
; 
8910         aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
); 
8911         server
.vm_blocked_clients
++; 
8918 /* Remove the 'key' from the list of blocked keys for a given client. 
8920  * The function returns 1 when there are no longer blocking keys after 
8921  * the current one was removed (and the client can be unblocked). */ 
8922 static int dontWaitForSwappedKey(redisClient 
*c
, robj 
*key
) { 
8926     struct dictEntry 
*de
; 
8928     /* Remove the key from the list of keys this client is waiting for. */ 
8929     listRewind(c
->io_keys
,&li
); 
8930     while ((ln 
= listNext(&li
)) != NULL
) { 
8931         if (compareStringObjects(ln
->value
,key
) == 0) { 
8932             listDelNode(c
->io_keys
,ln
); 
8938     /* Remove the client form the key => waiting clients map. */ 
8939     de 
= dictFind(c
->db
->io_keys
,key
); 
8941     l 
= dictGetEntryVal(de
); 
8942     ln 
= listSearchKey(l
,c
); 
8945     if (listLength(l
) == 0) 
8946         dictDelete(c
->db
->io_keys
,key
); 
8948     return listLength(c
->io_keys
) == 0; 
8951 static void handleClientsBlockedOnSwappedKey(redisDb 
*db
, robj 
*key
) { 
8952     struct dictEntry 
*de
; 
8957     de 
= dictFind(db
->io_keys
,key
); 
8960     l 
= dictGetEntryVal(de
); 
8961     len 
= listLength(l
); 
8962     /* Note: we can't use something like while(listLength(l)) as the list 
8963      * can be freed by the calling function when we remove the last element. */ 
8966         redisClient 
*c 
= ln
->value
; 
8968         if (dontWaitForSwappedKey(c
,key
)) { 
8969             /* Put the client in the list of clients ready to go as we 
8970              * loaded all the keys about it. */ 
8971             listAddNodeTail(server
.io_ready_clients
,c
); 
8976 /* ================================= Debugging ============================== */ 
8978 static void debugCommand(redisClient 
*c
) { 
8979     if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) { 
8981     } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) { 
8982         if (rdbSave(server
.dbfilename
) != REDIS_OK
) { 
8983             addReply(c
,shared
.err
); 
8987         if (rdbLoad(server
.dbfilename
) != REDIS_OK
) { 
8988             addReply(c
,shared
.err
); 
8991         redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD"); 
8992         addReply(c
,shared
.ok
); 
8993     } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) { 
8995         if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) { 
8996             addReply(c
,shared
.err
); 
8999         redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF"); 
9000         addReply(c
,shared
.ok
); 
9001     } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc 
== 3) { 
9002         dictEntry 
*de 
= dictFind(c
->db
->dict
,c
->argv
[2]); 
9006             addReply(c
,shared
.nokeyerr
); 
9009         key 
= dictGetEntryKey(de
); 
9010         val 
= dictGetEntryVal(de
); 
9011         if (!server
.vm_enabled 
|| (key
->storage 
== REDIS_VM_MEMORY 
|| 
9012                                    key
->storage 
== REDIS_VM_SWAPPING
)) { 
9016             if (val
->encoding 
< (sizeof(strencoding
)/sizeof(char*))) { 
9017                 strenc 
= strencoding
[val
->encoding
]; 
9019                 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
); 
9022             addReplySds(c
,sdscatprintf(sdsempty(), 
9023                 "+Key at:%p refcount:%d, value at:%p refcount:%d " 
9024                 "encoding:%s serializedlength:%lld\r\n", 
9025                 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
, 
9026                 strenc
, (long long) rdbSavedObjectLen(val
,NULL
))); 
9028             addReplySds(c
,sdscatprintf(sdsempty(), 
9029                 "+Key at:%p refcount:%d, value swapped at: page %llu " 
9030                 "using %llu pages\r\n", 
9031                 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
, 
9032                 (unsigned long long) key
->vm
.usedpages
)); 
9034     } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc 
== 3) { 
9035         dictEntry 
*de 
= dictFind(c
->db
->dict
,c
->argv
[2]); 
9038         if (!server
.vm_enabled
) { 
9039             addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n")); 
9043             addReply(c
,shared
.nokeyerr
); 
9046         key 
= dictGetEntryKey(de
); 
9047         val 
= dictGetEntryVal(de
); 
9048         /* If the key is shared we want to create a copy */ 
9049         if (key
->refcount 
> 1) { 
9050             robj 
*newkey 
= dupStringObject(key
); 
9052             key 
= dictGetEntryKey(de
) = newkey
; 
9055         if (key
->storage 
!= REDIS_VM_MEMORY
) { 
9056             addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n")); 
9057         } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) { 
9058             dictGetEntryVal(de
) = NULL
; 
9059             addReply(c
,shared
.ok
); 
9061             addReply(c
,shared
.err
); 
9064         addReplySds(c
,sdsnew( 
9065             "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPOUT <key>|RELOAD]\r\n")); 
9069 static void _redisAssert(char *estr
, char *file
, int line
) { 
9070     redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ==="); 
9071     redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true\n",file
,line
,estr
); 
9072 #ifdef HAVE_BACKTRACE 
9073     redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)"); 
9078 /* =================================== Main! ================================ */ 
9081 int linuxOvercommitMemoryValue(void) { 
9082     FILE *fp 
= fopen("/proc/sys/vm/overcommit_memory","r"); 
9086     if (fgets(buf
,64,fp
) == NULL
) { 
9095 void linuxOvercommitMemoryWarning(void) { 
9096     if (linuxOvercommitMemoryValue() == 0) { 
9097         redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low condition memory. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect."); 
9100 #endif /* __linux__ */ 
9102 static void daemonize(void) { 
9106     if (fork() != 0) exit(0); /* parent exits */ 
9107     setsid(); /* create a new session */ 
9109     /* Every output goes to /dev/null. If Redis is daemonized but 
9110      * the 'logfile' is set to 'stdout' in the configuration file 
9111      * it will not log at all. */ 
9112     if ((fd 
= open("/dev/null", O_RDWR
, 0)) != -1) { 
9113         dup2(fd
, STDIN_FILENO
); 
9114         dup2(fd
, STDOUT_FILENO
); 
9115         dup2(fd
, STDERR_FILENO
); 
9116         if (fd 
> STDERR_FILENO
) close(fd
); 
9118     /* Try to write the pid file */ 
9119     fp 
= fopen(server
.pidfile
,"w"); 
9121         fprintf(fp
,"%d\n",getpid()); 
9126 int main(int argc
, char **argv
) { 
9131         resetServerSaveParams(); 
9132         loadServerConfig(argv
[1]); 
9133     } else if (argc 
> 2) { 
9134         fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n"); 
9137         redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'"); 
9139     if (server
.daemonize
) daemonize(); 
9141     redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
); 
9143     linuxOvercommitMemoryWarning(); 
9146     if (server
.appendonly
) { 
9147         if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
) 
9148             redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
); 
9150         if (rdbLoad(server
.dbfilename
) == REDIS_OK
) 
9151             redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
); 
9153     redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
); 
9154     aeSetBeforeSleepProc(server
.el
,beforeSleep
); 
9156     aeDeleteEventLoop(server
.el
); 
9160 /* ============================= Backtrace support ========================= */ 
9162 #ifdef HAVE_BACKTRACE 
9163 static char *findFuncName(void *pointer
, unsigned long *offset
); 
9165 static void *getMcontextEip(ucontext_t 
*uc
) { 
9166 #if defined(__FreeBSD__) 
9167     return (void*) uc
->uc_mcontext
.mc_eip
; 
9168 #elif defined(__dietlibc__) 
9169     return (void*) uc
->uc_mcontext
.eip
; 
9170 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6) 
9172     return (void*) uc
->uc_mcontext
->__ss
.__rip
; 
9174     return (void*) uc
->uc_mcontext
->__ss
.__eip
; 
9176 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6) 
9177   #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__) 
9178     return (void*) uc
->uc_mcontext
->__ss
.__rip
; 
9180     return (void*) uc
->uc_mcontext
->__ss
.__eip
; 
9182 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__) 
9183     return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */ 
9184 #elif defined(__ia64__) /* Linux IA64 */ 
9185     return (void*) uc
->uc_mcontext
.sc_ip
; 
9191 static void segvHandler(int sig
, siginfo_t 
*info
, void *secret
) { 
9193     char **messages 
= NULL
; 
9194     int i
, trace_size 
= 0; 
9195     unsigned long offset
=0; 
9196     ucontext_t 
*uc 
= (ucontext_t
*) secret
; 
9198     REDIS_NOTUSED(info
); 
9200     redisLog(REDIS_WARNING
, 
9201         "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
); 
9202     infostring 
= genRedisInfoString(); 
9203     redisLog(REDIS_WARNING
, "%s",infostring
); 
9204     /* It's not safe to sdsfree() the returned string under memory 
9205      * corruption conditions. Let it leak as we are going to abort */ 
9207     trace_size 
= backtrace(trace
, 100); 
9208     /* overwrite sigaction with caller's address */ 
9209     if (getMcontextEip(uc
) != NULL
) { 
9210         trace
[1] = getMcontextEip(uc
); 
9212     messages 
= backtrace_symbols(trace
, trace_size
); 
9214     for (i
=1; i
<trace_size
; ++i
) { 
9215         char *fn 
= findFuncName(trace
[i
], &offset
), *p
; 
9217         p 
= strchr(messages
[i
],'+'); 
9218         if (!fn 
|| (p 
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) { 
9219             redisLog(REDIS_WARNING
,"%s", messages
[i
]); 
9221             redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
); 
9224     /* free(messages); Don't call free() with possibly corrupted memory. */ 
9228 static void setupSigSegvAction(void) { 
9229     struct sigaction act
; 
9231     sigemptyset (&act
.sa_mask
); 
9232     /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction 
9233      * is used. Otherwise, sa_handler is used */ 
9234     act
.sa_flags 
= SA_NODEFER 
| SA_ONSTACK 
| SA_RESETHAND 
| SA_SIGINFO
; 
9235     act
.sa_sigaction 
= segvHandler
; 
9236     sigaction (SIGSEGV
, &act
, NULL
); 
9237     sigaction (SIGBUS
, &act
, NULL
); 
9238     sigaction (SIGFPE
, &act
, NULL
); 
9239     sigaction (SIGILL
, &act
, NULL
); 
9240     sigaction (SIGBUS
, &act
, NULL
); 
9244 #include "staticsymbols.h" 
9245 /* This function try to convert a pointer into a function name. It's used in 
9246  * oreder to provide a backtrace under segmentation fault that's able to 
9247  * display functions declared as static (otherwise the backtrace is useless). */ 
9248 static char *findFuncName(void *pointer
, unsigned long *offset
){ 
9250     unsigned long off
, minoff 
= 0; 
9252     /* Try to match against the Symbol with the smallest offset */ 
9253     for (i
=0; symsTable
[i
].pointer
; i
++) { 
9254         unsigned long lp 
= (unsigned long) pointer
; 
9256         if (lp 
!= (unsigned long)-1 && lp 
>= symsTable
[i
].pointer
) { 
9257             off
=lp
-symsTable
[i
].pointer
; 
9258             if (ret 
< 0 || off 
< minoff
) { 
9264     if (ret 
== -1) return NULL
; 
9266     return symsTable
[ret
].name
; 
9268 #else /* HAVE_BACKTRACE */ 
9269 static void setupSigSegvAction(void) { 
9271 #endif /* HAVE_BACKTRACE */