2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "2.1.1"
45 #endif /* HAVE_BACKTRACE */
53 #include <arpa/inet.h>
57 #include <sys/resource.h>
65 #include "solarisfixes.h"
69 #include "ae.h" /* Event driven programming library */
70 #include "sds.h" /* Dynamic safe strings */
71 #include "anet.h" /* Networking the easy way */
72 #include "dict.h" /* Hash tables */
73 #include "adlist.h" /* Linked lists */
74 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
75 #include "lzf.h" /* LZF compression library */
76 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
77 #include "zipmap.h" /* Compact dictionary-alike data structure */
78 #include "sha1.h" /* SHA1 is used for DEBUG DIGEST */
79 #include "release.h" /* Release and/or git repository information */
85 /* Static server configuration */
86 #define REDIS_SERVERPORT 6379 /* TCP port */
87 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
88 #define REDIS_IOBUF_LEN 1024
89 #define REDIS_LOADBUF_LEN 1024
90 #define REDIS_STATIC_ARGS 8
91 #define REDIS_DEFAULT_DBNUM 16
92 #define REDIS_CONFIGLINE_MAX 1024
93 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
94 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
95 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* lookup 10 expires per loop */
96 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
97 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
99 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
100 #define REDIS_WRITEV_THRESHOLD 3
101 /* Max number of iovecs used for each writev call */
102 #define REDIS_WRITEV_IOVEC_COUNT 256
104 /* Hash table parameters */
105 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
108 #define REDIS_CMD_BULK 1 /* Bulk write command */
109 #define REDIS_CMD_INLINE 2 /* Inline command */
110 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
111 this flags will return an error when the 'maxmemory' option is set in the
112 config file and the server is using more than maxmemory bytes of memory.
113 In short this commands are denied on low memory conditions. */
114 #define REDIS_CMD_DENYOOM 4
115 #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */
118 #define REDIS_STRING 0
124 /* Objects encoding. Some kind of objects like Strings and Hashes can be
125 * internally represented in multiple ways. The 'encoding' field of the object
126 * is set to one of this fields for this object. */
127 #define REDIS_ENCODING_RAW 0 /* Raw representation */
128 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
129 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
130 #define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
132 static char* strencoding
[] = {
133 "raw", "int", "zipmap", "hashtable"
136 /* Object types only used for dumping to disk */
137 #define REDIS_EXPIRETIME 253
138 #define REDIS_SELECTDB 254
139 #define REDIS_EOF 255
141 /* Defines related to the dump file format. To store 32 bits lengths for short
142 * keys requires a lot of space, so we check the most significant 2 bits of
143 * the first byte to interpreter the length:
145 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
146 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
147 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
148 * 11|000000 this means: specially encoded object will follow. The six bits
149 * number specify the kind of object that follows.
150 * See the REDIS_RDB_ENC_* defines.
152 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
153 * values, will fit inside. */
154 #define REDIS_RDB_6BITLEN 0
155 #define REDIS_RDB_14BITLEN 1
156 #define REDIS_RDB_32BITLEN 2
157 #define REDIS_RDB_ENCVAL 3
158 #define REDIS_RDB_LENERR UINT_MAX
160 /* When a length of a string object stored on disk has the first two bits
161 * set, the remaining two bits specify a special encoding for the object
162 * accordingly to the following defines: */
163 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
164 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
165 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
166 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
168 /* Virtual memory object->where field. */
169 #define REDIS_VM_MEMORY 0 /* The object is on memory */
170 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
171 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
172 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
174 /* Virtual memory static configuration stuff.
175 * Check vmFindContiguousPages() to know more about this magic numbers. */
176 #define REDIS_VM_MAX_NEAR_PAGES 65536
177 #define REDIS_VM_MAX_RANDOM_JUMP 4096
178 #define REDIS_VM_MAX_THREADS 32
179 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
180 /* The following is the *percentage* of completed I/O jobs to process when the
181 * handelr is called. While Virtual Memory I/O operations are performed by
182 * threads, this operations must be processed by the main thread when completed
183 * in order to take effect. */
184 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
187 #define REDIS_SLAVE 1 /* This client is a slave server */
188 #define REDIS_MASTER 2 /* This client is a master server */
189 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
190 #define REDIS_MULTI 8 /* This client is in a MULTI context */
191 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
192 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
193 #define REDIS_DIRTY_CAS 64 /* Watched keys modified. EXEC will fail. */
195 /* Slave replication state - slave side */
196 #define REDIS_REPL_NONE 0 /* No active replication */
197 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
198 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
200 /* Slave replication state - from the point of view of master
201 * Note that in SEND_BULK and ONLINE state the slave receives new updates
202 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
203 * to start the next background saving in order to send updates to it. */
204 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
205 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
206 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
207 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
209 /* List related stuff */
213 /* Sort operations */
214 #define REDIS_SORT_GET 0
215 #define REDIS_SORT_ASC 1
216 #define REDIS_SORT_DESC 2
217 #define REDIS_SORTKEY_MAX 1024
220 #define REDIS_DEBUG 0
221 #define REDIS_VERBOSE 1
222 #define REDIS_NOTICE 2
223 #define REDIS_WARNING 3
225 /* Anti-warning macro... */
226 #define REDIS_NOTUSED(V) ((void) V)
228 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
229 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
231 /* Append only defines */
232 #define APPENDFSYNC_NO 0
233 #define APPENDFSYNC_ALWAYS 1
234 #define APPENDFSYNC_EVERYSEC 2
236 /* Hashes related defaults */
237 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
238 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
240 /* We can print the stacktrace, so our assert is defined this way: */
241 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
242 #define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1)
243 static void _redisAssert(char *estr
, char *file
, int line
);
244 static void _redisPanic(char *msg
, char *file
, int line
);
246 /*================================= Data types ============================== */
248 /* A redis object, that is a type able to hold a string / list / set */
250 /* The VM object structure */
251 struct redisObjectVM
{
252 off_t page
; /* the page at witch the object is stored on disk */
253 off_t usedpages
; /* number of pages used on disk */
254 time_t atime
; /* Last access time */
257 /* The actual Redis Object */
258 typedef struct redisObject
{
261 unsigned char encoding
;
262 unsigned char storage
; /* If this object is a key, where is the value?
263 * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
264 unsigned char vtype
; /* If this object is a key, and value is swapped out,
265 * this is the type of the swapped out object. */
267 /* VM fields, this are only allocated if VM is active, otherwise the
268 * object allocation function will just allocate
269 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
270 * Redis without VM active will not have any overhead. */
271 struct redisObjectVM vm
;
274 /* Macro used to initalize a Redis object allocated on the stack.
275 * Note that this macro is taken near the structure definition to make sure
276 * we'll update it when the structure is changed, to avoid bugs like
277 * bug #85 introduced exactly in this way. */
278 #define initStaticStringObject(_var,_ptr) do { \
280 _var.type = REDIS_STRING; \
281 _var.encoding = REDIS_ENCODING_RAW; \
283 if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \
286 typedef struct redisDb
{
287 dict
*dict
; /* The keyspace for this DB */
288 dict
*expires
; /* Timeout of keys with a timeout set */
289 dict
*blocking_keys
; /* Keys with clients waiting for data (BLPOP) */
290 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
291 dict
*watched_keys
; /* WATCHED keys for MULTI/EXEC CAS */
295 /* Client MULTI/EXEC state */
296 typedef struct multiCmd
{
299 struct redisCommand
*cmd
;
302 typedef struct multiState
{
303 multiCmd
*commands
; /* Array of MULTI commands */
304 int count
; /* Total number of MULTI commands */
307 /* With multiplexing we need to take per-clinet state.
308 * Clients are taken in a liked list. */
309 typedef struct redisClient
{
314 robj
**argv
, **mbargv
;
316 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
317 int multibulk
; /* multi bulk command format active */
320 time_t lastinteraction
; /* time of the last interaction, used for timeout */
321 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
322 int slaveseldb
; /* slave selected db, if this client is a slave */
323 int authenticated
; /* when requirepass is non-NULL */
324 int replstate
; /* replication state if this is a slave */
325 int repldbfd
; /* replication DB file descriptor */
326 long repldboff
; /* replication DB file offset */
327 off_t repldbsize
; /* replication DB file size */
328 multiState mstate
; /* MULTI/EXEC state */
329 robj
**blocking_keys
; /* The key we are waiting to terminate a blocking
330 * operation such as BLPOP. Otherwise NULL. */
331 int blocking_keys_num
; /* Number of blocking keys */
332 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
333 * is >= blockingto then the operation timed out. */
334 list
*io_keys
; /* Keys this client is waiting to be loaded from the
335 * swap file in order to continue. */
336 list
*watched_keys
; /* Keys WATCHED for MULTI/EXEC CAS */
337 dict
*pubsub_channels
; /* channels a client is interested in (SUBSCRIBE) */
338 list
*pubsub_patterns
; /* patterns a client is interested in (SUBSCRIBE) */
346 /* Global server state structure */
351 long long dirty
; /* changes to DB from the last save */
353 list
*slaves
, *monitors
;
354 char neterr
[ANET_ERR_LEN
];
356 int cronloops
; /* number of times the cron function run */
357 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
358 time_t lastsave
; /* Unix time of last save succeeede */
359 /* Fields used only for stats */
360 time_t stat_starttime
; /* server start time */
361 long long stat_numcommands
; /* number of processed commands */
362 long long stat_numconnections
; /* number of connections received */
363 long long stat_expiredkeys
; /* number of expired keys */
377 pid_t bgsavechildpid
;
378 pid_t bgrewritechildpid
;
379 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
380 sds aofbuf
; /* AOF buffer, written before entering the event loop */
381 struct saveparam
*saveparams
;
386 char *appendfilename
;
390 /* Replication related */
395 redisClient
*master
; /* client that is master for this slave */
397 unsigned int maxclients
;
398 unsigned long long maxmemory
;
399 unsigned int blpop_blocked_clients
;
400 unsigned int vm_blocked_clients
;
401 /* Sort parameters - qsort_r() is only available under BSD so we
402 * have to take this state global, in order to pass it to sortCompare() */
406 /* Virtual memory configuration */
411 unsigned long long vm_max_memory
;
413 size_t hash_max_zipmap_entries
;
414 size_t hash_max_zipmap_value
;
415 /* Virtual memory state */
418 off_t vm_next_page
; /* Next probably empty page */
419 off_t vm_near_pages
; /* Number of pages allocated sequentially */
420 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
421 time_t unixtime
; /* Unix time sampled every second. */
422 /* Virtual memory I/O threads stuff */
423 /* An I/O thread process an element taken from the io_jobs queue and
424 * put the result of the operation in the io_done list. While the
425 * job is being processed, it's put on io_processing queue. */
426 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
427 list
*io_processing
; /* List of VM I/O jobs being processed */
428 list
*io_processed
; /* List of VM I/O jobs already processed */
429 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
430 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
431 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
432 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
433 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
434 int io_active_threads
; /* Number of running I/O threads */
435 int vm_max_threads
; /* Max number of I/O threads running at the same time */
436 /* Our main thread is blocked on the event loop, locking for sockets ready
437 * to be read or written, so when a threaded I/O operation is ready to be
438 * processed by the main thread, the I/O thread will use a unix pipe to
439 * awake the main thread. The followings are the two pipe FDs. */
440 int io_ready_pipe_read
;
441 int io_ready_pipe_write
;
442 /* Virtual memory stats */
443 unsigned long long vm_stats_used_pages
;
444 unsigned long long vm_stats_swapped_objects
;
445 unsigned long long vm_stats_swapouts
;
446 unsigned long long vm_stats_swapins
;
448 dict
*pubsub_channels
; /* Map channels to list of subscribed clients */
449 list
*pubsub_patterns
; /* A list of pubsub_patterns */
454 typedef struct pubsubPattern
{
459 typedef void redisCommandProc(redisClient
*c
);
460 typedef void redisVmPreloadProc(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
461 struct redisCommand
{
463 redisCommandProc
*proc
;
466 /* Use a function to determine which keys need to be loaded
467 * in the background prior to executing this command. Takes precedence
468 * over vm_firstkey and others, ignored when NULL */
469 redisVmPreloadProc
*vm_preload_proc
;
470 /* What keys should be loaded in background when calling this command? */
471 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
472 int vm_lastkey
; /* THe last argument that's a key */
473 int vm_keystep
; /* The step between first and last key */
476 struct redisFunctionSym
{
478 unsigned long pointer
;
481 typedef struct _redisSortObject
{
489 typedef struct _redisSortOperation
{
492 } redisSortOperation
;
494 /* ZSETs use a specialized version of Skiplists */
496 typedef struct zskiplistNode
{
497 struct zskiplistNode
**forward
;
498 struct zskiplistNode
*backward
;
504 typedef struct zskiplist
{
505 struct zskiplistNode
*header
, *tail
;
506 unsigned long length
;
510 typedef struct zset
{
515 /* Our shared "common" objects */
517 #define REDIS_SHARED_INTEGERS 10000
518 struct sharedObjectsStruct
{
519 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
520 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
521 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
522 *outofrangeerr
, *plus
,
523 *select0
, *select1
, *select2
, *select3
, *select4
,
524 *select5
, *select6
, *select7
, *select8
, *select9
,
525 *messagebulk
, *pmessagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
,
526 *mbulk4
, *psubscribebulk
, *punsubscribebulk
,
527 *integers
[REDIS_SHARED_INTEGERS
];
530 /* Global vars that are actally used as constants. The following double
531 * values are used for double on-disk serialization, and are initialized
532 * at runtime to avoid strange compiler optimizations. */
534 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
536 /* VM threaded I/O request message */
537 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
538 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
539 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
540 typedef struct iojob
{
541 int type
; /* Request type, REDIS_IOJOB_* */
542 redisDb
*db
;/* Redis database */
543 robj
*key
; /* This I/O request is about swapping this key */
544 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
545 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
546 off_t page
; /* Swap page where to read/write the object */
547 off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */
548 int canceled
; /* True if this command was canceled by blocking side of VM */
549 pthread_t thread
; /* ID of the thread processing this entry */
552 /*================================ Prototypes =============================== */
554 static void freeStringObject(robj
*o
);
555 static void freeListObject(robj
*o
);
556 static void freeSetObject(robj
*o
);
557 static void decrRefCount(void *o
);
558 static robj
*createObject(int type
, void *ptr
);
559 static void freeClient(redisClient
*c
);
560 static int rdbLoad(char *filename
);
561 static void addReply(redisClient
*c
, robj
*obj
);
562 static void addReplySds(redisClient
*c
, sds s
);
563 static void incrRefCount(robj
*o
);
564 static int rdbSaveBackground(char *filename
);
565 static robj
*createStringObject(char *ptr
, size_t len
);
566 static robj
*dupStringObject(robj
*o
);
567 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
);
568 static void replicationFeedMonitors(list
*monitors
, int dictid
, robj
**argv
, int argc
);
569 static void flushAppendOnlyFile(void);
570 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
571 static int syncWithMaster(void);
572 static robj
*tryObjectEncoding(robj
*o
);
573 static robj
*getDecodedObject(robj
*o
);
574 static int removeExpire(redisDb
*db
, robj
*key
);
575 static int expireIfNeeded(redisDb
*db
, robj
*key
);
576 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
577 static int deleteIfSwapped(redisDb
*db
, robj
*key
);
578 static int deleteKey(redisDb
*db
, robj
*key
);
579 static time_t getExpire(redisDb
*db
, robj
*key
);
580 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
581 static void updateSlavesWaitingBgsave(int bgsaveerr
);
582 static void freeMemoryIfNeeded(void);
583 static int processCommand(redisClient
*c
);
584 static void setupSigSegvAction(void);
585 static void rdbRemoveTempFile(pid_t childpid
);
586 static void aofRemoveTempFile(pid_t childpid
);
587 static size_t stringObjectLen(robj
*o
);
588 static void processInputBuffer(redisClient
*c
);
589 static zskiplist
*zslCreate(void);
590 static void zslFree(zskiplist
*zsl
);
591 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
592 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
593 static void initClientMultiState(redisClient
*c
);
594 static void freeClientMultiState(redisClient
*c
);
595 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
596 static void unblockClientWaitingData(redisClient
*c
);
597 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
598 static void vmInit(void);
599 static void vmMarkPagesFree(off_t page
, off_t count
);
600 static robj
*vmLoadObject(robj
*key
);
601 static robj
*vmPreviewObject(robj
*key
);
602 static int vmSwapOneObjectBlocking(void);
603 static int vmSwapOneObjectThreaded(void);
604 static int vmCanSwapOut(void);
605 static int tryFreeOneObjectFromFreelist(void);
606 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
607 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
608 static void vmCancelThreadedIOJob(robj
*o
);
609 static void lockThreadedIO(void);
610 static void unlockThreadedIO(void);
611 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
612 static void freeIOJob(iojob
*j
);
613 static void queueIOJob(iojob
*j
);
614 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
615 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
616 static void waitEmptyIOJobsQueue(void);
617 static void vmReopenSwapFile(void);
618 static int vmFreePage(off_t page
);
619 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
620 static void execBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
621 static int blockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
);
622 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
623 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
624 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
625 static struct redisCommand
*lookupCommand(char *name
);
626 static void call(redisClient
*c
, struct redisCommand
*cmd
);
627 static void resetClient(redisClient
*c
);
628 static void convertToRealHash(robj
*o
);
629 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
);
630 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
);
631 static void freePubsubPattern(void *p
);
632 static int listMatchPubsubPattern(void *a
, void *b
);
633 static int compareStringObjects(robj
*a
, robj
*b
);
634 static int equalStringObjects(robj
*a
, robj
*b
);
636 static int rewriteAppendOnlyFileBackground(void);
637 static int vmSwapObjectBlocking(robj
*key
, robj
*val
);
638 static int prepareForShutdown();
639 static void touchWatchedKey(redisDb
*db
, robj
*key
);
640 static void touchWatchedKeysOnFlush(int dbid
);
641 static void unwatchAllKeys(redisClient
*c
);
643 static void authCommand(redisClient
*c
);
644 static void pingCommand(redisClient
*c
);
645 static void echoCommand(redisClient
*c
);
646 static void setCommand(redisClient
*c
);
647 static void setnxCommand(redisClient
*c
);
648 static void setexCommand(redisClient
*c
);
649 static void getCommand(redisClient
*c
);
650 static void delCommand(redisClient
*c
);
651 static void existsCommand(redisClient
*c
);
652 static void incrCommand(redisClient
*c
);
653 static void decrCommand(redisClient
*c
);
654 static void incrbyCommand(redisClient
*c
);
655 static void decrbyCommand(redisClient
*c
);
656 static void selectCommand(redisClient
*c
);
657 static void randomkeyCommand(redisClient
*c
);
658 static void keysCommand(redisClient
*c
);
659 static void dbsizeCommand(redisClient
*c
);
660 static void lastsaveCommand(redisClient
*c
);
661 static void saveCommand(redisClient
*c
);
662 static void bgsaveCommand(redisClient
*c
);
663 static void bgrewriteaofCommand(redisClient
*c
);
664 static void shutdownCommand(redisClient
*c
);
665 static void moveCommand(redisClient
*c
);
666 static void renameCommand(redisClient
*c
);
667 static void renamenxCommand(redisClient
*c
);
668 static void lpushCommand(redisClient
*c
);
669 static void rpushCommand(redisClient
*c
);
670 static void lpopCommand(redisClient
*c
);
671 static void rpopCommand(redisClient
*c
);
672 static void llenCommand(redisClient
*c
);
673 static void lindexCommand(redisClient
*c
);
674 static void lrangeCommand(redisClient
*c
);
675 static void ltrimCommand(redisClient
*c
);
676 static void typeCommand(redisClient
*c
);
677 static void lsetCommand(redisClient
*c
);
678 static void saddCommand(redisClient
*c
);
679 static void sremCommand(redisClient
*c
);
680 static void smoveCommand(redisClient
*c
);
681 static void sismemberCommand(redisClient
*c
);
682 static void scardCommand(redisClient
*c
);
683 static void spopCommand(redisClient
*c
);
684 static void srandmemberCommand(redisClient
*c
);
685 static void sinterCommand(redisClient
*c
);
686 static void sinterstoreCommand(redisClient
*c
);
687 static void sunionCommand(redisClient
*c
);
688 static void sunionstoreCommand(redisClient
*c
);
689 static void sdiffCommand(redisClient
*c
);
690 static void sdiffstoreCommand(redisClient
*c
);
691 static void syncCommand(redisClient
*c
);
692 static void flushdbCommand(redisClient
*c
);
693 static void flushallCommand(redisClient
*c
);
694 static void sortCommand(redisClient
*c
);
695 static void lremCommand(redisClient
*c
);
696 static void rpoplpushcommand(redisClient
*c
);
697 static void infoCommand(redisClient
*c
);
698 static void mgetCommand(redisClient
*c
);
699 static void monitorCommand(redisClient
*c
);
700 static void expireCommand(redisClient
*c
);
701 static void expireatCommand(redisClient
*c
);
702 static void getsetCommand(redisClient
*c
);
703 static void ttlCommand(redisClient
*c
);
704 static void slaveofCommand(redisClient
*c
);
705 static void debugCommand(redisClient
*c
);
706 static void msetCommand(redisClient
*c
);
707 static void msetnxCommand(redisClient
*c
);
708 static void zaddCommand(redisClient
*c
);
709 static void zincrbyCommand(redisClient
*c
);
710 static void zrangeCommand(redisClient
*c
);
711 static void zrangebyscoreCommand(redisClient
*c
);
712 static void zcountCommand(redisClient
*c
);
713 static void zrevrangeCommand(redisClient
*c
);
714 static void zcardCommand(redisClient
*c
);
715 static void zremCommand(redisClient
*c
);
716 static void zscoreCommand(redisClient
*c
);
717 static void zremrangebyscoreCommand(redisClient
*c
);
718 static void multiCommand(redisClient
*c
);
719 static void execCommand(redisClient
*c
);
720 static void discardCommand(redisClient
*c
);
721 static void blpopCommand(redisClient
*c
);
722 static void brpopCommand(redisClient
*c
);
723 static void appendCommand(redisClient
*c
);
724 static void substrCommand(redisClient
*c
);
725 static void zrankCommand(redisClient
*c
);
726 static void zrevrankCommand(redisClient
*c
);
727 static void hsetCommand(redisClient
*c
);
728 static void hsetnxCommand(redisClient
*c
);
729 static void hgetCommand(redisClient
*c
);
730 static void hmsetCommand(redisClient
*c
);
731 static void hmgetCommand(redisClient
*c
);
732 static void hdelCommand(redisClient
*c
);
733 static void hlenCommand(redisClient
*c
);
734 static void zremrangebyrankCommand(redisClient
*c
);
735 static void zunionstoreCommand(redisClient
*c
);
736 static void zinterstoreCommand(redisClient
*c
);
737 static void hkeysCommand(redisClient
*c
);
738 static void hvalsCommand(redisClient
*c
);
739 static void hgetallCommand(redisClient
*c
);
740 static void hexistsCommand(redisClient
*c
);
741 static void configCommand(redisClient
*c
);
742 static void hincrbyCommand(redisClient
*c
);
743 static void subscribeCommand(redisClient
*c
);
744 static void unsubscribeCommand(redisClient
*c
);
745 static void psubscribeCommand(redisClient
*c
);
746 static void punsubscribeCommand(redisClient
*c
);
747 static void publishCommand(redisClient
*c
);
748 static void watchCommand(redisClient
*c
);
749 static void unwatchCommand(redisClient
*c
);
751 /*================================= Globals ================================= */
754 static struct redisServer server
; /* server global state */
755 static struct redisCommand cmdTable
[] = {
756 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
757 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
758 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
759 {"setex",setexCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
760 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
761 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
762 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
763 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
764 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
765 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
766 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
767 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
768 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
769 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
770 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
771 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
772 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
773 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
774 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
775 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
776 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
777 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
778 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
779 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
780 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
781 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
782 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
783 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
784 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
785 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
786 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
787 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
788 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
789 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
790 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
791 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
792 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
793 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
794 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
795 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
796 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
797 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
798 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
799 {"zunionstore",zunionstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
800 {"zinterstore",zinterstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
801 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
802 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
803 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
804 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
805 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
806 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
807 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
808 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
809 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
810 {"hsetnx",hsetnxCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
811 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
812 {"hmset",hmsetCommand
,-4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
813 {"hmget",hmgetCommand
,-3,REDIS_CMD_BULK
,NULL
,1,1,1},
814 {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
815 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
816 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
817 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
818 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
819 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
820 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
821 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
822 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
823 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
824 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
825 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
826 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
827 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
828 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
829 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
830 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
831 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
832 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
833 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
834 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
835 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
836 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
837 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
838 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
839 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
840 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
841 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
842 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
843 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
844 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
845 {"exec",execCommand
,1,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,execBlockClientOnSwappedKeys
,0,0,0},
846 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
847 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
848 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
849 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
850 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
851 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
852 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
853 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
854 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
855 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
856 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
857 {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
858 {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
859 {"psubscribe",psubscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
860 {"punsubscribe",punsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
861 {"publish",publishCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_FORCE_REPLICATION
,NULL
,0,0,0},
862 {"watch",watchCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
863 {"unwatch",unwatchCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
864 {NULL
,NULL
,0,0,NULL
,0,0,0}
867 /*============================ Utility functions ============================ */
869 /* Glob-style pattern matching. */
870 static int stringmatchlen(const char *pattern
, int patternLen
,
871 const char *string
, int stringLen
, int nocase
)
876 while (pattern
[1] == '*') {
881 return 1; /* match */
883 if (stringmatchlen(pattern
+1, patternLen
-1,
884 string
, stringLen
, nocase
))
885 return 1; /* match */
889 return 0; /* no match */
893 return 0; /* no match */
903 not = pattern
[0] == '^';
910 if (pattern
[0] == '\\') {
913 if (pattern
[0] == string
[0])
915 } else if (pattern
[0] == ']') {
917 } else if (patternLen
== 0) {
921 } else if (pattern
[1] == '-' && patternLen
>= 3) {
922 int start
= pattern
[0];
923 int end
= pattern
[2];
931 start
= tolower(start
);
937 if (c
>= start
&& c
<= end
)
941 if (pattern
[0] == string
[0])
944 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
954 return 0; /* no match */
960 if (patternLen
>= 2) {
967 if (pattern
[0] != string
[0])
968 return 0; /* no match */
970 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
971 return 0; /* no match */
979 if (stringLen
== 0) {
980 while(*pattern
== '*') {
987 if (patternLen
== 0 && stringLen
== 0)
992 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
993 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
996 /* Convert a string representing an amount of memory into the number of
997 * bytes, so for instance memtoll("1Gi") will return 1073741824 that is
1000 * On parsing error, if *err is not NULL, it's set to 1, otherwise it's
1002 static long long memtoll(const char *p
, int *err
) {
1005 long mul
; /* unit multiplier */
1007 unsigned int digits
;
1010 /* Search the first non digit character. */
1013 while(*u
&& isdigit(*u
)) u
++;
1014 if (*u
== '\0' || !strcasecmp(u
,"b")) {
1016 } else if (!strcasecmp(u
,"k")) {
1018 } else if (!strcasecmp(u
,"kb")) {
1020 } else if (!strcasecmp(u
,"m")) {
1022 } else if (!strcasecmp(u
,"mb")) {
1024 } else if (!strcasecmp(u
,"g")) {
1025 mul
= 1000L*1000*1000;
1026 } else if (!strcasecmp(u
,"gb")) {
1027 mul
= 1024L*1024*1024;
1033 if (digits
>= sizeof(buf
)) {
1037 memcpy(buf
,p
,digits
);
1039 val
= strtoll(buf
,NULL
,10);
1043 /* Convert a long long into a string. Returns the number of
1044 * characters needed to represent the number, that can be shorter if passed
1045 * buffer length is not enough to store the whole number. */
1046 static int ll2string(char *s
, size_t len
, long long value
) {
1048 unsigned long long v
;
1051 if (len
== 0) return 0;
1052 v
= (value
< 0) ? -value
: value
;
1053 p
= buf
+31; /* point to the last character */
1058 if (value
< 0) *p
-- = '-';
1061 if (l
+1 > len
) l
= len
-1; /* Make sure it fits, including the nul term */
1067 static void redisLog(int level
, const char *fmt
, ...) {
1071 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
1075 if (level
>= server
.verbosity
) {
1081 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
1082 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
1083 vfprintf(fp
, fmt
, ap
);
1089 if (server
.logfile
) fclose(fp
);
1092 /*====================== Hash table type implementation ==================== */
1094 /* This is an hash table type that uses the SDS dynamic strings libary as
1095 * keys and radis objects as values (objects can hold SDS strings,
1098 static void dictVanillaFree(void *privdata
, void *val
)
1100 DICT_NOTUSED(privdata
);
1104 static void dictListDestructor(void *privdata
, void *val
)
1106 DICT_NOTUSED(privdata
);
1107 listRelease((list
*)val
);
1110 static int sdsDictKeyCompare(void *privdata
, const void *key1
,
1114 DICT_NOTUSED(privdata
);
1116 l1
= sdslen((sds
)key1
);
1117 l2
= sdslen((sds
)key2
);
1118 if (l1
!= l2
) return 0;
1119 return memcmp(key1
, key2
, l1
) == 0;
1122 static void dictRedisObjectDestructor(void *privdata
, void *val
)
1124 DICT_NOTUSED(privdata
);
1126 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
1130 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1133 const robj
*o1
= key1
, *o2
= key2
;
1134 return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1137 static unsigned int dictObjHash(const void *key
) {
1138 const robj
*o
= key
;
1139 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1142 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1145 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1148 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1149 o2
->encoding
== REDIS_ENCODING_INT
)
1150 return o1
->ptr
== o2
->ptr
;
1152 o1
= getDecodedObject(o1
);
1153 o2
= getDecodedObject(o2
);
1154 cmp
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1160 static unsigned int dictEncObjHash(const void *key
) {
1161 robj
*o
= (robj
*) key
;
1163 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1164 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1166 if (o
->encoding
== REDIS_ENCODING_INT
) {
1170 len
= ll2string(buf
,32,(long)o
->ptr
);
1171 return dictGenHashFunction((unsigned char*)buf
, len
);
1175 o
= getDecodedObject(o
);
1176 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1183 /* Sets type and expires */
1184 static dictType setDictType
= {
1185 dictEncObjHash
, /* hash function */
1188 dictEncObjKeyCompare
, /* key compare */
1189 dictRedisObjectDestructor
, /* key destructor */
1190 NULL
/* val destructor */
1193 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1194 static dictType zsetDictType
= {
1195 dictEncObjHash
, /* hash function */
1198 dictEncObjKeyCompare
, /* key compare */
1199 dictRedisObjectDestructor
, /* key destructor */
1200 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1204 static dictType dbDictType
= {
1205 dictObjHash
, /* hash function */
1208 dictObjKeyCompare
, /* key compare */
1209 dictRedisObjectDestructor
, /* key destructor */
1210 dictRedisObjectDestructor
/* val destructor */
1214 static dictType keyptrDictType
= {
1215 dictObjHash
, /* hash function */
1218 dictObjKeyCompare
, /* key compare */
1219 dictRedisObjectDestructor
, /* key destructor */
1220 NULL
/* val destructor */
1223 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1224 static dictType hashDictType
= {
1225 dictEncObjHash
, /* hash function */
1228 dictEncObjKeyCompare
, /* key compare */
1229 dictRedisObjectDestructor
, /* key destructor */
1230 dictRedisObjectDestructor
/* val destructor */
1233 /* Keylist hash table type has unencoded redis objects as keys and
1234 * lists as values. It's used for blocking operations (BLPOP) and to
1235 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1236 static dictType keylistDictType
= {
1237 dictObjHash
, /* hash function */
1240 dictObjKeyCompare
, /* key compare */
1241 dictRedisObjectDestructor
, /* key destructor */
1242 dictListDestructor
/* val destructor */
1245 static void version();
1247 /* ========================= Random utility functions ======================= */
1249 /* Redis generally does not try to recover from out of memory conditions
1250 * when allocating objects or strings, it is not clear if it will be possible
1251 * to report this condition to the client since the networking layer itself
1252 * is based on heap allocation for send buffers, so we simply abort.
1253 * At least the code will be simpler to read... */
1254 static void oom(const char *msg
) {
1255 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1260 /* ====================== Redis server networking stuff ===================== */
1261 static void closeTimedoutClients(void) {
1264 time_t now
= time(NULL
);
1267 listRewind(server
.clients
,&li
);
1268 while ((ln
= listNext(&li
)) != NULL
) {
1269 c
= listNodeValue(ln
);
1270 if (server
.maxidletime
&&
1271 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1272 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1273 dictSize(c
->pubsub_channels
) == 0 && /* no timeout for pubsub */
1274 listLength(c
->pubsub_patterns
) == 0 &&
1275 (now
- c
->lastinteraction
> server
.maxidletime
))
1277 redisLog(REDIS_VERBOSE
,"Closing idle client");
1279 } else if (c
->flags
& REDIS_BLOCKED
) {
1280 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1281 addReply(c
,shared
.nullmultibulk
);
1282 unblockClientWaitingData(c
);
1288 static int htNeedsResize(dict
*dict
) {
1289 long long size
, used
;
1291 size
= dictSlots(dict
);
1292 used
= dictSize(dict
);
1293 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1294 (used
*100/size
< REDIS_HT_MINFILL
));
1297 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1298 * we resize the hash table to save memory */
1299 static void tryResizeHashTables(void) {
1302 for (j
= 0; j
< server
.dbnum
; j
++) {
1303 if (htNeedsResize(server
.db
[j
].dict
))
1304 dictResize(server
.db
[j
].dict
);
1305 if (htNeedsResize(server
.db
[j
].expires
))
1306 dictResize(server
.db
[j
].expires
);
1310 /* Our hash table implementation performs rehashing incrementally while
1311 * we write/read from the hash table. Still if the server is idle, the hash
1312 * table will use two tables for a long time. So we try to use 1 millisecond
1313 * of CPU time at every serverCron() loop in order to rehash some key. */
1314 static void incrementallyRehash(void) {
1317 for (j
= 0; j
< server
.dbnum
; j
++) {
1318 if (dictIsRehashing(server
.db
[j
].dict
)) {
1319 dictRehashMilliseconds(server
.db
[j
].dict
,1);
1320 break; /* already used our millisecond for this loop... */
1325 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1326 void backgroundSaveDoneHandler(int statloc
) {
1327 int exitcode
= WEXITSTATUS(statloc
);
1328 int bysignal
= WIFSIGNALED(statloc
);
1330 if (!bysignal
&& exitcode
== 0) {
1331 redisLog(REDIS_NOTICE
,
1332 "Background saving terminated with success");
1334 server
.lastsave
= time(NULL
);
1335 } else if (!bysignal
&& exitcode
!= 0) {
1336 redisLog(REDIS_WARNING
, "Background saving error");
1338 redisLog(REDIS_WARNING
,
1339 "Background saving terminated by signal %d", WTERMSIG(statloc
));
1340 rdbRemoveTempFile(server
.bgsavechildpid
);
1342 server
.bgsavechildpid
= -1;
1343 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1344 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1345 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1348 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1350 void backgroundRewriteDoneHandler(int statloc
) {
1351 int exitcode
= WEXITSTATUS(statloc
);
1352 int bysignal
= WIFSIGNALED(statloc
);
1354 if (!bysignal
&& exitcode
== 0) {
1358 redisLog(REDIS_NOTICE
,
1359 "Background append only file rewriting terminated with success");
1360 /* Now it's time to flush the differences accumulated by the parent */
1361 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1362 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1364 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1367 /* Flush our data... */
1368 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1369 (signed) sdslen(server
.bgrewritebuf
)) {
1370 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1374 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1375 /* Now our work is to rename the temp file into the stable file. And
1376 * switch the file descriptor used by the server for append only. */
1377 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1378 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1382 /* Mission completed... almost */
1383 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1384 if (server
.appendfd
!= -1) {
1385 /* If append only is actually enabled... */
1386 close(server
.appendfd
);
1387 server
.appendfd
= fd
;
1389 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1390 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1392 /* If append only is disabled we just generate a dump in this
1393 * format. Why not? */
1396 } else if (!bysignal
&& exitcode
!= 0) {
1397 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1399 redisLog(REDIS_WARNING
,
1400 "Background append only file rewriting terminated by signal %d",
1404 sdsfree(server
.bgrewritebuf
);
1405 server
.bgrewritebuf
= sdsempty();
1406 aofRemoveTempFile(server
.bgrewritechildpid
);
1407 server
.bgrewritechildpid
= -1;
1410 /* This function is called once a background process of some kind terminates,
1411 * as we want to avoid resizing the hash tables when there is a child in order
1412 * to play well with copy-on-write (otherwise when a resize happens lots of
1413 * memory pages are copied). The goal of this function is to update the ability
1414 * for dict.c to resize the hash tables accordingly to the fact we have o not
1415 * running childs. */
1416 static void updateDictResizePolicy(void) {
1417 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1)
1420 dictDisableResize();
1423 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1424 int j
, loops
= server
.cronloops
++;
1425 REDIS_NOTUSED(eventLoop
);
1427 REDIS_NOTUSED(clientData
);
1429 /* We take a cached value of the unix time in the global state because
1430 * with virtual memory and aging there is to store the current time
1431 * in objects at every object access, and accuracy is not needed.
1432 * To access a global var is faster than calling time(NULL) */
1433 server
.unixtime
= time(NULL
);
1435 /* We received a SIGTERM, shutting down here in a safe way, as it is
1436 * not ok doing so inside the signal handler. */
1437 if (server
.shutdown_asap
) {
1438 if (prepareForShutdown() == REDIS_OK
) exit(0);
1439 redisLog(REDIS_WARNING
,"SIGTERM received but errors trying to shut down the server, check the logs for more information");
1442 /* Show some info about non-empty databases */
1443 for (j
= 0; j
< server
.dbnum
; j
++) {
1444 long long size
, used
, vkeys
;
1446 size
= dictSlots(server
.db
[j
].dict
);
1447 used
= dictSize(server
.db
[j
].dict
);
1448 vkeys
= dictSize(server
.db
[j
].expires
);
1449 if (!(loops
% 50) && (used
|| vkeys
)) {
1450 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1451 /* dictPrintStats(server.dict); */
1455 /* We don't want to resize the hash tables while a bacground saving
1456 * is in progress: the saving child is created using fork() that is
1457 * implemented with a copy-on-write semantic in most modern systems, so
1458 * if we resize the HT while there is the saving child at work actually
1459 * a lot of memory movements in the parent will cause a lot of pages
1461 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1) {
1462 if (!(loops
% 10)) tryResizeHashTables();
1463 if (server
.activerehashing
) incrementallyRehash();
1466 /* Show information about connected clients */
1467 if (!(loops
% 50)) {
1468 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use",
1469 listLength(server
.clients
)-listLength(server
.slaves
),
1470 listLength(server
.slaves
),
1471 zmalloc_used_memory());
1474 /* Close connections of timedout clients */
1475 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1476 closeTimedoutClients();
1478 /* Check if a background saving or AOF rewrite in progress terminated */
1479 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1483 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1484 if (pid
== server
.bgsavechildpid
) {
1485 backgroundSaveDoneHandler(statloc
);
1487 backgroundRewriteDoneHandler(statloc
);
1489 updateDictResizePolicy();
1492 /* If there is not a background saving in progress check if
1493 * we have to save now */
1494 time_t now
= time(NULL
);
1495 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1496 struct saveparam
*sp
= server
.saveparams
+j
;
1498 if (server
.dirty
>= sp
->changes
&&
1499 now
-server
.lastsave
> sp
->seconds
) {
1500 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1501 sp
->changes
, sp
->seconds
);
1502 rdbSaveBackground(server
.dbfilename
);
1508 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1509 * will use few CPU cycles if there are few expiring keys, otherwise
1510 * it will get more aggressive to avoid that too much memory is used by
1511 * keys that can be removed from the keyspace. */
1512 for (j
= 0; j
< server
.dbnum
; j
++) {
1514 redisDb
*db
= server
.db
+j
;
1516 /* Continue to expire if at the end of the cycle more than 25%
1517 * of the keys were expired. */
1519 long num
= dictSize(db
->expires
);
1520 time_t now
= time(NULL
);
1523 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1524 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1529 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1530 t
= (time_t) dictGetEntryVal(de
);
1532 deleteKey(db
,dictGetEntryKey(de
));
1534 server
.stat_expiredkeys
++;
1537 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1540 /* Swap a few keys on disk if we are over the memory limit and VM
1541 * is enbled. Try to free objects from the free list first. */
1542 if (vmCanSwapOut()) {
1543 while (server
.vm_enabled
&& zmalloc_used_memory() >
1544 server
.vm_max_memory
)
1548 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1549 retval
= (server
.vm_max_threads
== 0) ?
1550 vmSwapOneObjectBlocking() :
1551 vmSwapOneObjectThreaded();
1552 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1553 zmalloc_used_memory() >
1554 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1556 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1558 /* Note that when using threade I/O we free just one object,
1559 * because anyway when the I/O thread in charge to swap this
1560 * object out will finish, the handler of completed jobs
1561 * will try to swap more objects if we are still out of memory. */
1562 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1566 /* Check if we should connect to a MASTER */
1567 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1568 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1569 if (syncWithMaster() == REDIS_OK
) {
1570 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1571 if (server
.appendonly
) rewriteAppendOnlyFileBackground();
1577 /* This function gets called every time Redis is entering the
1578 * main loop of the event driven library, that is, before to sleep
1579 * for ready file descriptors. */
1580 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1581 REDIS_NOTUSED(eventLoop
);
1583 /* Awake clients that got all the swapped keys they requested */
1584 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1588 listRewind(server
.io_ready_clients
,&li
);
1589 while((ln
= listNext(&li
))) {
1590 redisClient
*c
= ln
->value
;
1591 struct redisCommand
*cmd
;
1593 /* Resume the client. */
1594 listDelNode(server
.io_ready_clients
,ln
);
1595 c
->flags
&= (~REDIS_IO_WAIT
);
1596 server
.vm_blocked_clients
--;
1597 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1598 readQueryFromClient
, c
);
1599 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1600 assert(cmd
!= NULL
);
1603 /* There may be more data to process in the input buffer. */
1604 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1605 processInputBuffer(c
);
1608 /* Write the AOF buffer on disk */
1609 flushAppendOnlyFile();
1612 static void createSharedObjects(void) {
1615 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1616 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1617 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1618 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1619 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1620 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1621 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1622 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1623 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1624 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1625 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1626 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1627 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1628 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1629 "-ERR no such key\r\n"));
1630 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1631 "-ERR syntax error\r\n"));
1632 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1633 "-ERR source and destination objects are the same\r\n"));
1634 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1635 "-ERR index out of range\r\n"));
1636 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1637 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1638 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1639 shared
.select0
= createStringObject("select 0\r\n",10);
1640 shared
.select1
= createStringObject("select 1\r\n",10);
1641 shared
.select2
= createStringObject("select 2\r\n",10);
1642 shared
.select3
= createStringObject("select 3\r\n",10);
1643 shared
.select4
= createStringObject("select 4\r\n",10);
1644 shared
.select5
= createStringObject("select 5\r\n",10);
1645 shared
.select6
= createStringObject("select 6\r\n",10);
1646 shared
.select7
= createStringObject("select 7\r\n",10);
1647 shared
.select8
= createStringObject("select 8\r\n",10);
1648 shared
.select9
= createStringObject("select 9\r\n",10);
1649 shared
.messagebulk
= createStringObject("$7\r\nmessage\r\n",13);
1650 shared
.pmessagebulk
= createStringObject("$8\r\npmessage\r\n",14);
1651 shared
.subscribebulk
= createStringObject("$9\r\nsubscribe\r\n",15);
1652 shared
.unsubscribebulk
= createStringObject("$11\r\nunsubscribe\r\n",18);
1653 shared
.psubscribebulk
= createStringObject("$10\r\npsubscribe\r\n",17);
1654 shared
.punsubscribebulk
= createStringObject("$12\r\npunsubscribe\r\n",19);
1655 shared
.mbulk3
= createStringObject("*3\r\n",4);
1656 shared
.mbulk4
= createStringObject("*4\r\n",4);
1657 for (j
= 0; j
< REDIS_SHARED_INTEGERS
; j
++) {
1658 shared
.integers
[j
] = createObject(REDIS_STRING
,(void*)(long)j
);
1659 shared
.integers
[j
]->encoding
= REDIS_ENCODING_INT
;
1663 static void appendServerSaveParams(time_t seconds
, int changes
) {
1664 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1665 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1666 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1667 server
.saveparamslen
++;
1670 static void resetServerSaveParams() {
1671 zfree(server
.saveparams
);
1672 server
.saveparams
= NULL
;
1673 server
.saveparamslen
= 0;
1676 static void initServerConfig() {
1677 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1678 server
.port
= REDIS_SERVERPORT
;
1679 server
.verbosity
= REDIS_VERBOSE
;
1680 server
.maxidletime
= REDIS_MAXIDLETIME
;
1681 server
.saveparams
= NULL
;
1682 server
.logfile
= NULL
; /* NULL = log on standard output */
1683 server
.bindaddr
= NULL
;
1684 server
.glueoutputbuf
= 1;
1685 server
.daemonize
= 0;
1686 server
.appendonly
= 0;
1687 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1688 server
.lastfsync
= time(NULL
);
1689 server
.appendfd
= -1;
1690 server
.appendseldb
= -1; /* Make sure the first time will not match */
1691 server
.pidfile
= zstrdup("/var/run/redis.pid");
1692 server
.dbfilename
= zstrdup("dump.rdb");
1693 server
.appendfilename
= zstrdup("appendonly.aof");
1694 server
.requirepass
= NULL
;
1695 server
.rdbcompression
= 1;
1696 server
.activerehashing
= 1;
1697 server
.maxclients
= 0;
1698 server
.blpop_blocked_clients
= 0;
1699 server
.maxmemory
= 0;
1700 server
.vm_enabled
= 0;
1701 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1702 server
.vm_page_size
= 256; /* 256 bytes per page */
1703 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1704 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1705 server
.vm_max_threads
= 4;
1706 server
.vm_blocked_clients
= 0;
1707 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1708 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1709 server
.shutdown_asap
= 0;
1711 resetServerSaveParams();
1713 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1714 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1715 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1716 /* Replication related */
1718 server
.masterauth
= NULL
;
1719 server
.masterhost
= NULL
;
1720 server
.masterport
= 6379;
1721 server
.master
= NULL
;
1722 server
.replstate
= REDIS_REPL_NONE
;
1724 /* Double constants initialization */
1726 R_PosInf
= 1.0/R_Zero
;
1727 R_NegInf
= -1.0/R_Zero
;
1728 R_Nan
= R_Zero
/R_Zero
;
1731 static void initServer() {
1734 signal(SIGHUP
, SIG_IGN
);
1735 signal(SIGPIPE
, SIG_IGN
);
1736 setupSigSegvAction();
1738 server
.devnull
= fopen("/dev/null","w");
1739 if (server
.devnull
== NULL
) {
1740 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1743 server
.clients
= listCreate();
1744 server
.slaves
= listCreate();
1745 server
.monitors
= listCreate();
1746 server
.objfreelist
= listCreate();
1747 createSharedObjects();
1748 server
.el
= aeCreateEventLoop();
1749 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1750 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1751 if (server
.fd
== -1) {
1752 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1755 for (j
= 0; j
< server
.dbnum
; j
++) {
1756 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1757 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1758 server
.db
[j
].blocking_keys
= dictCreate(&keylistDictType
,NULL
);
1759 server
.db
[j
].watched_keys
= dictCreate(&keylistDictType
,NULL
);
1760 if (server
.vm_enabled
)
1761 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1762 server
.db
[j
].id
= j
;
1764 server
.pubsub_channels
= dictCreate(&keylistDictType
,NULL
);
1765 server
.pubsub_patterns
= listCreate();
1766 listSetFreeMethod(server
.pubsub_patterns
,freePubsubPattern
);
1767 listSetMatchMethod(server
.pubsub_patterns
,listMatchPubsubPattern
);
1768 server
.cronloops
= 0;
1769 server
.bgsavechildpid
= -1;
1770 server
.bgrewritechildpid
= -1;
1771 server
.bgrewritebuf
= sdsempty();
1772 server
.aofbuf
= sdsempty();
1773 server
.lastsave
= time(NULL
);
1775 server
.stat_numcommands
= 0;
1776 server
.stat_numconnections
= 0;
1777 server
.stat_expiredkeys
= 0;
1778 server
.stat_starttime
= time(NULL
);
1779 server
.unixtime
= time(NULL
);
1780 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1781 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1782 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1784 if (server
.appendonly
) {
1785 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1786 if (server
.appendfd
== -1) {
1787 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1793 if (server
.vm_enabled
) vmInit();
1796 /* Empty the whole database */
1797 static long long emptyDb() {
1799 long long removed
= 0;
1801 for (j
= 0; j
< server
.dbnum
; j
++) {
1802 removed
+= dictSize(server
.db
[j
].dict
);
1803 dictEmpty(server
.db
[j
].dict
);
1804 dictEmpty(server
.db
[j
].expires
);
1809 static int yesnotoi(char *s
) {
1810 if (!strcasecmp(s
,"yes")) return 1;
1811 else if (!strcasecmp(s
,"no")) return 0;
1815 /* I agree, this is a very rudimental way to load a configuration...
1816 will improve later if the config gets more complex */
1817 static void loadServerConfig(char *filename
) {
1819 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1823 if (filename
[0] == '-' && filename
[1] == '\0')
1826 if ((fp
= fopen(filename
,"r")) == NULL
) {
1827 redisLog(REDIS_WARNING
, "Fatal error, can't open config file '%s'", filename
);
1832 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1838 line
= sdstrim(line
," \t\r\n");
1840 /* Skip comments and blank lines*/
1841 if (line
[0] == '#' || line
[0] == '\0') {
1846 /* Split into arguments */
1847 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1848 sdstolower(argv
[0]);
1850 /* Execute config directives */
1851 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1852 server
.maxidletime
= atoi(argv
[1]);
1853 if (server
.maxidletime
< 0) {
1854 err
= "Invalid timeout value"; goto loaderr
;
1856 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1857 server
.port
= atoi(argv
[1]);
1858 if (server
.port
< 1 || server
.port
> 65535) {
1859 err
= "Invalid port"; goto loaderr
;
1861 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1862 server
.bindaddr
= zstrdup(argv
[1]);
1863 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1864 int seconds
= atoi(argv
[1]);
1865 int changes
= atoi(argv
[2]);
1866 if (seconds
< 1 || changes
< 0) {
1867 err
= "Invalid save parameters"; goto loaderr
;
1869 appendServerSaveParams(seconds
,changes
);
1870 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1871 if (chdir(argv
[1]) == -1) {
1872 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1873 argv
[1], strerror(errno
));
1876 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1877 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1878 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1879 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1880 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1882 err
= "Invalid log level. Must be one of debug, notice, warning";
1885 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1888 server
.logfile
= zstrdup(argv
[1]);
1889 if (!strcasecmp(server
.logfile
,"stdout")) {
1890 zfree(server
.logfile
);
1891 server
.logfile
= NULL
;
1893 if (server
.logfile
) {
1894 /* Test if we are able to open the file. The server will not
1895 * be able to abort just for this problem later... */
1896 logfp
= fopen(server
.logfile
,"a");
1897 if (logfp
== NULL
) {
1898 err
= sdscatprintf(sdsempty(),
1899 "Can't open the log file: %s", strerror(errno
));
1904 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1905 server
.dbnum
= atoi(argv
[1]);
1906 if (server
.dbnum
< 1) {
1907 err
= "Invalid number of databases"; goto loaderr
;
1909 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1910 loadServerConfig(argv
[1]);
1911 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1912 server
.maxclients
= atoi(argv
[1]);
1913 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1914 server
.maxmemory
= memtoll(argv
[1],NULL
);
1915 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1916 server
.masterhost
= sdsnew(argv
[1]);
1917 server
.masterport
= atoi(argv
[2]);
1918 server
.replstate
= REDIS_REPL_CONNECT
;
1919 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1920 server
.masterauth
= zstrdup(argv
[1]);
1921 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1922 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1923 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1925 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1926 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1927 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1929 } else if (!strcasecmp(argv
[0],"activerehashing") && argc
== 2) {
1930 if ((server
.activerehashing
= yesnotoi(argv
[1])) == -1) {
1931 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1933 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1934 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1935 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1937 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1938 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1939 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1941 } else if (!strcasecmp(argv
[0],"appendfilename") && argc
== 2) {
1942 zfree(server
.appendfilename
);
1943 server
.appendfilename
= zstrdup(argv
[1]);
1944 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
1945 if (!strcasecmp(argv
[1],"no")) {
1946 server
.appendfsync
= APPENDFSYNC_NO
;
1947 } else if (!strcasecmp(argv
[1],"always")) {
1948 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1949 } else if (!strcasecmp(argv
[1],"everysec")) {
1950 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1952 err
= "argument must be 'no', 'always' or 'everysec'";
1955 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
1956 server
.requirepass
= zstrdup(argv
[1]);
1957 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
1958 zfree(server
.pidfile
);
1959 server
.pidfile
= zstrdup(argv
[1]);
1960 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
1961 zfree(server
.dbfilename
);
1962 server
.dbfilename
= zstrdup(argv
[1]);
1963 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
1964 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
1965 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1967 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
1968 zfree(server
.vm_swap_file
);
1969 server
.vm_swap_file
= zstrdup(argv
[1]);
1970 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
1971 server
.vm_max_memory
= memtoll(argv
[1],NULL
);
1972 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
1973 server
.vm_page_size
= memtoll(argv
[1], NULL
);
1974 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
1975 server
.vm_pages
= memtoll(argv
[1], NULL
);
1976 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1977 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1978 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
1979 server
.hash_max_zipmap_entries
= memtoll(argv
[1], NULL
);
1980 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
1981 server
.hash_max_zipmap_value
= memtoll(argv
[1], NULL
);
1983 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
1985 for (j
= 0; j
< argc
; j
++)
1990 if (fp
!= stdin
) fclose(fp
);
1994 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
1995 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
1996 fprintf(stderr
, ">>> '%s'\n", line
);
1997 fprintf(stderr
, "%s\n", err
);
2001 static void freeClientArgv(redisClient
*c
) {
2004 for (j
= 0; j
< c
->argc
; j
++)
2005 decrRefCount(c
->argv
[j
]);
2006 for (j
= 0; j
< c
->mbargc
; j
++)
2007 decrRefCount(c
->mbargv
[j
]);
2012 static void freeClient(redisClient
*c
) {
2015 /* Note that if the client we are freeing is blocked into a blocking
2016 * call, we have to set querybuf to NULL *before* to call
2017 * unblockClientWaitingData() to avoid processInputBuffer() will get
2018 * called. Also it is important to remove the file events after
2019 * this, because this call adds the READABLE event. */
2020 sdsfree(c
->querybuf
);
2022 if (c
->flags
& REDIS_BLOCKED
)
2023 unblockClientWaitingData(c
);
2025 /* UNWATCH all the keys */
2027 listRelease(c
->watched_keys
);
2028 /* Unsubscribe from all the pubsub channels */
2029 pubsubUnsubscribeAllChannels(c
,0);
2030 pubsubUnsubscribeAllPatterns(c
,0);
2031 dictRelease(c
->pubsub_channels
);
2032 listRelease(c
->pubsub_patterns
);
2033 /* Obvious cleanup */
2034 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
2035 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2036 listRelease(c
->reply
);
2039 /* Remove from the list of clients */
2040 ln
= listSearchKey(server
.clients
,c
);
2041 redisAssert(ln
!= NULL
);
2042 listDelNode(server
.clients
,ln
);
2043 /* Remove from the list of clients that are now ready to be restarted
2044 * after waiting for swapped keys */
2045 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
2046 ln
= listSearchKey(server
.io_ready_clients
,c
);
2048 listDelNode(server
.io_ready_clients
,ln
);
2049 server
.vm_blocked_clients
--;
2052 /* Remove from the list of clients waiting for swapped keys */
2053 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
2054 ln
= listFirst(c
->io_keys
);
2055 dontWaitForSwappedKey(c
,ln
->value
);
2057 listRelease(c
->io_keys
);
2058 /* Master/slave cleanup */
2059 if (c
->flags
& REDIS_SLAVE
) {
2060 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
2062 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
2063 ln
= listSearchKey(l
,c
);
2064 redisAssert(ln
!= NULL
);
2067 if (c
->flags
& REDIS_MASTER
) {
2068 server
.master
= NULL
;
2069 server
.replstate
= REDIS_REPL_CONNECT
;
2071 /* Release memory */
2074 freeClientMultiState(c
);
2078 #define GLUEREPLY_UP_TO (1024)
2079 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
2081 char buf
[GLUEREPLY_UP_TO
];
2086 listRewind(c
->reply
,&li
);
2087 while((ln
= listNext(&li
))) {
2091 objlen
= sdslen(o
->ptr
);
2092 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
2093 memcpy(buf
+copylen
,o
->ptr
,objlen
);
2095 listDelNode(c
->reply
,ln
);
2097 if (copylen
== 0) return;
2101 /* Now the output buffer is empty, add the new single element */
2102 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
2103 listAddNodeHead(c
->reply
,o
);
2106 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2107 redisClient
*c
= privdata
;
2108 int nwritten
= 0, totwritten
= 0, objlen
;
2111 REDIS_NOTUSED(mask
);
2113 /* Use writev() if we have enough buffers to send */
2114 if (!server
.glueoutputbuf
&&
2115 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
2116 !(c
->flags
& REDIS_MASTER
))
2118 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
2122 while(listLength(c
->reply
)) {
2123 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
2124 glueReplyBuffersIfNeeded(c
);
2126 o
= listNodeValue(listFirst(c
->reply
));
2127 objlen
= sdslen(o
->ptr
);
2130 listDelNode(c
->reply
,listFirst(c
->reply
));
2134 if (c
->flags
& REDIS_MASTER
) {
2135 /* Don't reply to a master */
2136 nwritten
= objlen
- c
->sentlen
;
2138 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
2139 if (nwritten
<= 0) break;
2141 c
->sentlen
+= nwritten
;
2142 totwritten
+= nwritten
;
2143 /* If we fully sent the object on head go to the next one */
2144 if (c
->sentlen
== objlen
) {
2145 listDelNode(c
->reply
,listFirst(c
->reply
));
2148 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
2149 * bytes, in a single threaded server it's a good idea to serve
2150 * other clients as well, even if a very large request comes from
2151 * super fast link that is always able to accept data (in real world
2152 * scenario think about 'KEYS *' against the loopback interfae) */
2153 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
2155 if (nwritten
== -1) {
2156 if (errno
== EAGAIN
) {
2159 redisLog(REDIS_VERBOSE
,
2160 "Error writing to client: %s", strerror(errno
));
2165 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
2166 if (listLength(c
->reply
) == 0) {
2168 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2172 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
2174 redisClient
*c
= privdata
;
2175 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
2177 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
2178 int offset
, ion
= 0;
2180 REDIS_NOTUSED(mask
);
2183 while (listLength(c
->reply
)) {
2184 offset
= c
->sentlen
;
2188 /* fill-in the iov[] array */
2189 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
2190 o
= listNodeValue(node
);
2191 objlen
= sdslen(o
->ptr
);
2193 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
2196 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2197 break; /* no more iovecs */
2199 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2200 iov
[ion
].iov_len
= objlen
- offset
;
2201 willwrite
+= objlen
- offset
;
2202 offset
= 0; /* just for the first item */
2209 /* write all collected blocks at once */
2210 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2211 if (errno
!= EAGAIN
) {
2212 redisLog(REDIS_VERBOSE
,
2213 "Error writing to client: %s", strerror(errno
));
2220 totwritten
+= nwritten
;
2221 offset
= c
->sentlen
;
2223 /* remove written robjs from c->reply */
2224 while (nwritten
&& listLength(c
->reply
)) {
2225 o
= listNodeValue(listFirst(c
->reply
));
2226 objlen
= sdslen(o
->ptr
);
2228 if(nwritten
>= objlen
- offset
) {
2229 listDelNode(c
->reply
, listFirst(c
->reply
));
2230 nwritten
-= objlen
- offset
;
2234 c
->sentlen
+= nwritten
;
2242 c
->lastinteraction
= time(NULL
);
2244 if (listLength(c
->reply
) == 0) {
2246 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2250 static struct redisCommand
*lookupCommand(char *name
) {
2252 while(cmdTable
[j
].name
!= NULL
) {
2253 if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
];
2259 /* resetClient prepare the client to process the next command */
2260 static void resetClient(redisClient
*c
) {
2266 /* Call() is the core of Redis execution of a command */
2267 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2270 dirty
= server
.dirty
;
2272 dirty
= server
.dirty
-dirty
;
2274 if (server
.appendonly
&& dirty
)
2275 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2276 if ((dirty
|| cmd
->flags
& REDIS_CMD_FORCE_REPLICATION
) &&
2277 listLength(server
.slaves
))
2278 replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
);
2279 if (listLength(server
.monitors
))
2280 replicationFeedMonitors(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
);
2281 server
.stat_numcommands
++;
2284 /* If this function gets called we already read a whole
2285 * command, argments are in the client argv/argc fields.
2286 * processCommand() execute the command or prepare the
2287 * server for a bulk read from the client.
2289 * If 1 is returned the client is still alive and valid and
2290 * and other operations can be performed by the caller. Otherwise
2291 * if 0 is returned the client was destroied (i.e. after QUIT). */
2292 static int processCommand(redisClient
*c
) {
2293 struct redisCommand
*cmd
;
2295 /* Free some memory if needed (maxmemory setting) */
2296 if (server
.maxmemory
) freeMemoryIfNeeded();
2298 /* Handle the multi bulk command type. This is an alternative protocol
2299 * supported by Redis in order to receive commands that are composed of
2300 * multiple binary-safe "bulk" arguments. The latency of processing is
2301 * a bit higher but this allows things like multi-sets, so if this
2302 * protocol is used only for MSET and similar commands this is a big win. */
2303 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2304 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2305 if (c
->multibulk
<= 0) {
2309 decrRefCount(c
->argv
[c
->argc
-1]);
2313 } else if (c
->multibulk
) {
2314 if (c
->bulklen
== -1) {
2315 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2316 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2320 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2321 decrRefCount(c
->argv
[0]);
2322 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2324 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2329 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2333 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2334 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2338 if (c
->multibulk
== 0) {
2342 /* Here we need to swap the multi-bulk argc/argv with the
2343 * normal argc/argv of the client structure. */
2345 c
->argv
= c
->mbargv
;
2346 c
->mbargv
= auxargv
;
2349 c
->argc
= c
->mbargc
;
2350 c
->mbargc
= auxargc
;
2352 /* We need to set bulklen to something different than -1
2353 * in order for the code below to process the command without
2354 * to try to read the last argument of a bulk command as
2355 * a special argument. */
2357 /* continue below and process the command */
2364 /* -- end of multi bulk commands processing -- */
2366 /* The QUIT command is handled as a special case. Normal command
2367 * procs are unable to close the client connection safely */
2368 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2373 /* Now lookup the command and check ASAP about trivial error conditions
2374 * such wrong arity, bad command name and so forth. */
2375 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2378 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2379 (char*)c
->argv
[0]->ptr
));
2382 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2383 (c
->argc
< -cmd
->arity
)) {
2385 sdscatprintf(sdsempty(),
2386 "-ERR wrong number of arguments for '%s' command\r\n",
2390 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2391 /* This is a bulk command, we have to read the last argument yet. */
2392 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2394 decrRefCount(c
->argv
[c
->argc
-1]);
2395 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2397 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2402 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2403 /* It is possible that the bulk read is already in the
2404 * buffer. Check this condition and handle it accordingly.
2405 * This is just a fast path, alternative to call processInputBuffer().
2406 * It's a good idea since the code is small and this condition
2407 * happens most of the times. */
2408 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2409 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2411 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2413 /* Otherwise return... there is to read the last argument
2414 * from the socket. */
2418 /* Let's try to encode the bulk object to save space. */
2419 if (cmd
->flags
& REDIS_CMD_BULK
)
2420 c
->argv
[c
->argc
-1] = tryObjectEncoding(c
->argv
[c
->argc
-1]);
2422 /* Check if the user is authenticated */
2423 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2424 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2429 /* Handle the maxmemory directive */
2430 if (server
.maxmemory
&& (cmd
->flags
& REDIS_CMD_DENYOOM
) &&
2431 zmalloc_used_memory() > server
.maxmemory
)
2433 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2438 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
2439 if ((dictSize(c
->pubsub_channels
) > 0 || listLength(c
->pubsub_patterns
) > 0)
2441 cmd
->proc
!= subscribeCommand
&& cmd
->proc
!= unsubscribeCommand
&&
2442 cmd
->proc
!= psubscribeCommand
&& cmd
->proc
!= punsubscribeCommand
) {
2443 addReplySds(c
,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n"));
2448 /* Exec the command */
2449 if (c
->flags
& REDIS_MULTI
&&
2450 cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
&&
2451 cmd
->proc
!= multiCommand
&& cmd
->proc
!= watchCommand
)
2453 queueMultiCommand(c
,cmd
);
2454 addReply(c
,shared
.queued
);
2456 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2457 blockClientOnSwappedKeys(c
,cmd
)) return 1;
2461 /* Prepare the client for the next command */
2466 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
) {
2471 /* We need 1+(ARGS*3) objects since commands are using the new protocol
2472 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
2473 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
2474 robj
*static_outv
[REDIS_STATIC_ARGS
*3+1];
2477 if (argc
<= REDIS_STATIC_ARGS
) {
2480 outv
= zmalloc(sizeof(robj
*)*(argc
*3+1));
2483 lenobj
= createObject(REDIS_STRING
,
2484 sdscatprintf(sdsempty(), "*%d\r\n", argc
));
2485 lenobj
->refcount
= 0;
2486 outv
[outc
++] = lenobj
;
2487 for (j
= 0; j
< argc
; j
++) {
2488 lenobj
= createObject(REDIS_STRING
,
2489 sdscatprintf(sdsempty(),"$%lu\r\n",
2490 (unsigned long) stringObjectLen(argv
[j
])));
2491 lenobj
->refcount
= 0;
2492 outv
[outc
++] = lenobj
;
2493 outv
[outc
++] = argv
[j
];
2494 outv
[outc
++] = shared
.crlf
;
2497 /* Increment all the refcounts at start and decrement at end in order to
2498 * be sure to free objects if there is no slave in a replication state
2499 * able to be feed with commands */
2500 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2501 listRewind(slaves
,&li
);
2502 while((ln
= listNext(&li
))) {
2503 redisClient
*slave
= ln
->value
;
2505 /* Don't feed slaves that are still waiting for BGSAVE to start */
2506 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2508 /* Feed all the other slaves, MONITORs and so on */
2509 if (slave
->slaveseldb
!= dictid
) {
2513 case 0: selectcmd
= shared
.select0
; break;
2514 case 1: selectcmd
= shared
.select1
; break;
2515 case 2: selectcmd
= shared
.select2
; break;
2516 case 3: selectcmd
= shared
.select3
; break;
2517 case 4: selectcmd
= shared
.select4
; break;
2518 case 5: selectcmd
= shared
.select5
; break;
2519 case 6: selectcmd
= shared
.select6
; break;
2520 case 7: selectcmd
= shared
.select7
; break;
2521 case 8: selectcmd
= shared
.select8
; break;
2522 case 9: selectcmd
= shared
.select9
; break;
2524 selectcmd
= createObject(REDIS_STRING
,
2525 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2526 selectcmd
->refcount
= 0;
2529 addReply(slave
,selectcmd
);
2530 slave
->slaveseldb
= dictid
;
2532 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2534 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2535 if (outv
!= static_outv
) zfree(outv
);
2538 static sds
sdscatrepr(sds s
, char *p
, size_t len
) {
2539 s
= sdscatlen(s
,"\"",1);
2544 s
= sdscatprintf(s
,"\\%c",*p
);
2546 case '\n': s
= sdscatlen(s
,"\\n",1); break;
2547 case '\r': s
= sdscatlen(s
,"\\r",1); break;
2548 case '\t': s
= sdscatlen(s
,"\\t",1); break;
2549 case '\a': s
= sdscatlen(s
,"\\a",1); break;
2550 case '\b': s
= sdscatlen(s
,"\\b",1); break;
2553 s
= sdscatprintf(s
,"%c",*p
);
2555 s
= sdscatprintf(s
,"\\x%02x",(unsigned char)*p
);
2560 return sdscatlen(s
,"\"",1);
2563 static void replicationFeedMonitors(list
*monitors
, int dictid
, robj
**argv
, int argc
) {
2567 sds cmdrepr
= sdsnew("+");
2571 gettimeofday(&tv
,NULL
);
2572 cmdrepr
= sdscatprintf(cmdrepr
,"%ld.%ld ",(long)tv
.tv_sec
,(long)tv
.tv_usec
);
2573 if (dictid
!= 0) cmdrepr
= sdscatprintf(cmdrepr
,"(db %d) ", dictid
);
2575 for (j
= 0; j
< argc
; j
++) {
2576 if (argv
[j
]->encoding
== REDIS_ENCODING_INT
) {
2577 cmdrepr
= sdscatprintf(cmdrepr
, "%ld", (long)argv
[j
]->ptr
);
2579 cmdrepr
= sdscatrepr(cmdrepr
,(char*)argv
[j
]->ptr
,
2580 sdslen(argv
[j
]->ptr
));
2583 cmdrepr
= sdscatlen(cmdrepr
," ",1);
2585 cmdrepr
= sdscatlen(cmdrepr
,"\r\n",2);
2586 cmdobj
= createObject(REDIS_STRING
,cmdrepr
);
2588 listRewind(monitors
,&li
);
2589 while((ln
= listNext(&li
))) {
2590 redisClient
*monitor
= ln
->value
;
2591 addReply(monitor
,cmdobj
);
2593 decrRefCount(cmdobj
);
2596 static void processInputBuffer(redisClient
*c
) {
2598 /* Before to process the input buffer, make sure the client is not
2599 * waitig for a blocking operation such as BLPOP. Note that the first
2600 * iteration the client is never blocked, otherwise the processInputBuffer
2601 * would not be called at all, but after the execution of the first commands
2602 * in the input buffer the client may be blocked, and the "goto again"
2603 * will try to reiterate. The following line will make it return asap. */
2604 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2605 if (c
->bulklen
== -1) {
2606 /* Read the first line of the query */
2607 char *p
= strchr(c
->querybuf
,'\n');
2614 query
= c
->querybuf
;
2615 c
->querybuf
= sdsempty();
2616 querylen
= 1+(p
-(query
));
2617 if (sdslen(query
) > querylen
) {
2618 /* leave data after the first line of the query in the buffer */
2619 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2621 *p
= '\0'; /* remove "\n" */
2622 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2623 sdsupdatelen(query
);
2625 /* Now we can split the query in arguments */
2626 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2629 if (c
->argv
) zfree(c
->argv
);
2630 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2632 for (j
= 0; j
< argc
; j
++) {
2633 if (sdslen(argv
[j
])) {
2634 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2642 /* Execute the command. If the client is still valid
2643 * after processCommand() return and there is something
2644 * on the query buffer try to process the next command. */
2645 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2647 /* Nothing to process, argc == 0. Just process the query
2648 * buffer if it's not empty or return to the caller */
2649 if (sdslen(c
->querybuf
)) goto again
;
2652 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2653 redisLog(REDIS_VERBOSE
, "Client protocol error");
2658 /* Bulk read handling. Note that if we are at this point
2659 the client already sent a command terminated with a newline,
2660 we are reading the bulk data that is actually the last
2661 argument of the command. */
2662 int qbl
= sdslen(c
->querybuf
);
2664 if (c
->bulklen
<= qbl
) {
2665 /* Copy everything but the final CRLF as final argument */
2666 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2668 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2669 /* Process the command. If the client is still valid after
2670 * the processing and there is more data in the buffer
2671 * try to parse it. */
2672 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2678 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2679 redisClient
*c
= (redisClient
*) privdata
;
2680 char buf
[REDIS_IOBUF_LEN
];
2683 REDIS_NOTUSED(mask
);
2685 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2687 if (errno
== EAGAIN
) {
2690 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2694 } else if (nread
== 0) {
2695 redisLog(REDIS_VERBOSE
, "Client closed connection");
2700 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2701 c
->lastinteraction
= time(NULL
);
2705 processInputBuffer(c
);
2708 static int selectDb(redisClient
*c
, int id
) {
2709 if (id
< 0 || id
>= server
.dbnum
)
2711 c
->db
= &server
.db
[id
];
2715 static void *dupClientReplyValue(void *o
) {
2716 incrRefCount((robj
*)o
);
2720 static int listMatchObjects(void *a
, void *b
) {
2721 return equalStringObjects(a
,b
);
2724 static redisClient
*createClient(int fd
) {
2725 redisClient
*c
= zmalloc(sizeof(*c
));
2727 anetNonBlock(NULL
,fd
);
2728 anetTcpNoDelay(NULL
,fd
);
2729 if (!c
) return NULL
;
2732 c
->querybuf
= sdsempty();
2741 c
->lastinteraction
= time(NULL
);
2742 c
->authenticated
= 0;
2743 c
->replstate
= REDIS_REPL_NONE
;
2744 c
->reply
= listCreate();
2745 listSetFreeMethod(c
->reply
,decrRefCount
);
2746 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2747 c
->blocking_keys
= NULL
;
2748 c
->blocking_keys_num
= 0;
2749 c
->io_keys
= listCreate();
2750 c
->watched_keys
= listCreate();
2751 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2752 c
->pubsub_channels
= dictCreate(&setDictType
,NULL
);
2753 c
->pubsub_patterns
= listCreate();
2754 listSetFreeMethod(c
->pubsub_patterns
,decrRefCount
);
2755 listSetMatchMethod(c
->pubsub_patterns
,listMatchObjects
);
2756 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2757 readQueryFromClient
, c
) == AE_ERR
) {
2761 listAddNodeTail(server
.clients
,c
);
2762 initClientMultiState(c
);
2766 static void addReply(redisClient
*c
, robj
*obj
) {
2767 if (listLength(c
->reply
) == 0 &&
2768 (c
->replstate
== REDIS_REPL_NONE
||
2769 c
->replstate
== REDIS_REPL_ONLINE
) &&
2770 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2771 sendReplyToClient
, c
) == AE_ERR
) return;
2773 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2774 obj
= dupStringObject(obj
);
2775 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2777 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2780 static void addReplySds(redisClient
*c
, sds s
) {
2781 robj
*o
= createObject(REDIS_STRING
,s
);
2786 static void addReplyDouble(redisClient
*c
, double d
) {
2789 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2790 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2791 (unsigned long) strlen(buf
),buf
));
2794 static void addReplyLongLong(redisClient
*c
, long long ll
) {
2799 addReply(c
,shared
.czero
);
2801 } else if (ll
== 1) {
2802 addReply(c
,shared
.cone
);
2806 len
= ll2string(buf
+1,sizeof(buf
)-1,ll
);
2809 addReplySds(c
,sdsnewlen(buf
,len
+3));
2812 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2817 addReply(c
,shared
.czero
);
2819 } else if (ul
== 1) {
2820 addReply(c
,shared
.cone
);
2823 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2824 addReplySds(c
,sdsnewlen(buf
,len
));
2827 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2831 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2832 len
= sdslen(obj
->ptr
);
2834 long n
= (long)obj
->ptr
;
2836 /* Compute how many bytes will take this integer as a radix 10 string */
2842 while((n
= n
/10) != 0) {
2847 intlen
= ll2string(buf
+1,sizeof(buf
)-1,(long long)len
);
2848 buf
[intlen
+1] = '\r';
2849 buf
[intlen
+2] = '\n';
2850 addReplySds(c
,sdsnewlen(buf
,intlen
+3));
2853 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2854 addReplyBulkLen(c
,obj
);
2856 addReply(c
,shared
.crlf
);
2859 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2860 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2862 addReply(c
,shared
.nullbulk
);
2864 robj
*o
= createStringObject(s
,strlen(s
));
2870 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2875 REDIS_NOTUSED(mask
);
2876 REDIS_NOTUSED(privdata
);
2878 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2879 if (cfd
== AE_ERR
) {
2880 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2883 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2884 if ((c
= createClient(cfd
)) == NULL
) {
2885 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2886 close(cfd
); /* May be already closed, just ingore errors */
2889 /* If maxclient directive is set and this is one client more... close the
2890 * connection. Note that we create the client instead to check before
2891 * for this condition, since now the socket is already set in nonblocking
2892 * mode and we can send an error for free using the Kernel I/O */
2893 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2894 char *err
= "-ERR max number of clients reached\r\n";
2896 /* That's a best effort error message, don't check write errors */
2897 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2898 /* Nothing to do, Just to avoid the warning... */
2903 server
.stat_numconnections
++;
2906 /* ======================= Redis objects implementation ===================== */
2908 static robj
*createObject(int type
, void *ptr
) {
2911 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2912 if (listLength(server
.objfreelist
)) {
2913 listNode
*head
= listFirst(server
.objfreelist
);
2914 o
= listNodeValue(head
);
2915 listDelNode(server
.objfreelist
,head
);
2916 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2918 if (server
.vm_enabled
) {
2919 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2920 o
= zmalloc(sizeof(*o
));
2922 o
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
));
2926 o
->encoding
= REDIS_ENCODING_RAW
;
2929 if (server
.vm_enabled
) {
2930 /* Note that this code may run in the context of an I/O thread
2931 * and accessing to server.unixtime in theory is an error
2932 * (no locks). But in practice this is safe, and even if we read
2933 * garbage Redis will not fail, as it's just a statistical info */
2934 o
->vm
.atime
= server
.unixtime
;
2935 o
->storage
= REDIS_VM_MEMORY
;
2940 static robj
*createStringObject(char *ptr
, size_t len
) {
2941 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
2944 static robj
*createStringObjectFromLongLong(long long value
) {
2946 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
2947 incrRefCount(shared
.integers
[value
]);
2948 o
= shared
.integers
[value
];
2950 if (value
>= LONG_MIN
&& value
<= LONG_MAX
) {
2951 o
= createObject(REDIS_STRING
, NULL
);
2952 o
->encoding
= REDIS_ENCODING_INT
;
2953 o
->ptr
= (void*)((long)value
);
2955 o
= createObject(REDIS_STRING
,sdsfromlonglong(value
));
2961 static robj
*dupStringObject(robj
*o
) {
2962 assert(o
->encoding
== REDIS_ENCODING_RAW
);
2963 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
2966 static robj
*createListObject(void) {
2967 list
*l
= listCreate();
2969 listSetFreeMethod(l
,decrRefCount
);
2970 return createObject(REDIS_LIST
,l
);
2973 static robj
*createSetObject(void) {
2974 dict
*d
= dictCreate(&setDictType
,NULL
);
2975 return createObject(REDIS_SET
,d
);
2978 static robj
*createHashObject(void) {
2979 /* All the Hashes start as zipmaps. Will be automatically converted
2980 * into hash tables if there are enough elements or big elements
2982 unsigned char *zm
= zipmapNew();
2983 robj
*o
= createObject(REDIS_HASH
,zm
);
2984 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
2988 static robj
*createZsetObject(void) {
2989 zset
*zs
= zmalloc(sizeof(*zs
));
2991 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
2992 zs
->zsl
= zslCreate();
2993 return createObject(REDIS_ZSET
,zs
);
2996 static void freeStringObject(robj
*o
) {
2997 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3002 static void freeListObject(robj
*o
) {
3003 listRelease((list
*) o
->ptr
);
3006 static void freeSetObject(robj
*o
) {
3007 dictRelease((dict
*) o
->ptr
);
3010 static void freeZsetObject(robj
*o
) {
3013 dictRelease(zs
->dict
);
3018 static void freeHashObject(robj
*o
) {
3019 switch (o
->encoding
) {
3020 case REDIS_ENCODING_HT
:
3021 dictRelease((dict
*) o
->ptr
);
3023 case REDIS_ENCODING_ZIPMAP
:
3027 redisPanic("Unknown hash encoding type");
3032 static void incrRefCount(robj
*o
) {
3036 static void decrRefCount(void *obj
) {
3039 if (o
->refcount
<= 0) redisPanic("decrRefCount against refcount <= 0");
3040 /* Object is a key of a swapped out value, or in the process of being
3042 if (server
.vm_enabled
&&
3043 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
3045 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
);
3046 redisAssert(o
->type
== REDIS_STRING
);
3047 freeStringObject(o
);
3048 vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
);
3049 pthread_mutex_lock(&server
.obj_freelist_mutex
);
3050 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
3051 !listAddNodeHead(server
.objfreelist
,o
))
3053 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
3054 server
.vm_stats_swapped_objects
--;
3057 /* Object is in memory, or in the process of being swapped out. */
3058 if (--(o
->refcount
) == 0) {
3059 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
3060 vmCancelThreadedIOJob(obj
);
3062 case REDIS_STRING
: freeStringObject(o
); break;
3063 case REDIS_LIST
: freeListObject(o
); break;
3064 case REDIS_SET
: freeSetObject(o
); break;
3065 case REDIS_ZSET
: freeZsetObject(o
); break;
3066 case REDIS_HASH
: freeHashObject(o
); break;
3067 default: redisPanic("Unknown object type"); break;
3069 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
3070 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
3071 !listAddNodeHead(server
.objfreelist
,o
))
3073 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
3077 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
3078 dictEntry
*de
= dictFind(db
->dict
,key
);
3080 robj
*key
= dictGetEntryKey(de
);
3081 robj
*val
= dictGetEntryVal(de
);
3083 if (server
.vm_enabled
) {
3084 if (key
->storage
== REDIS_VM_MEMORY
||
3085 key
->storage
== REDIS_VM_SWAPPING
)
3087 /* If we were swapping the object out, stop it, this key
3089 if (key
->storage
== REDIS_VM_SWAPPING
)
3090 vmCancelThreadedIOJob(key
);
3091 /* Update the access time of the key for the aging algorithm. */
3092 key
->vm
.atime
= server
.unixtime
;
3094 int notify
= (key
->storage
== REDIS_VM_LOADING
);
3096 /* Our value was swapped on disk. Bring it at home. */
3097 redisAssert(val
== NULL
);
3098 val
= vmLoadObject(key
);
3099 dictGetEntryVal(de
) = val
;
3101 /* Clients blocked by the VM subsystem may be waiting for
3103 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
3112 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
3113 expireIfNeeded(db
,key
);
3114 return lookupKey(db
,key
);
3117 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
3118 deleteIfVolatile(db
,key
);
3119 touchWatchedKey(db
,key
);
3120 return lookupKey(db
,key
);
3123 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3124 robj
*o
= lookupKeyRead(c
->db
, key
);
3125 if (!o
) addReply(c
,reply
);
3129 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3130 robj
*o
= lookupKeyWrite(c
->db
, key
);
3131 if (!o
) addReply(c
,reply
);
3135 static int checkType(redisClient
*c
, robj
*o
, int type
) {
3136 if (o
->type
!= type
) {
3137 addReply(c
,shared
.wrongtypeerr
);
3143 static int deleteKey(redisDb
*db
, robj
*key
) {
3146 /* We need to protect key from destruction: after the first dictDelete()
3147 * it may happen that 'key' is no longer valid if we don't increment
3148 * it's count. This may happen when we get the object reference directly
3149 * from the hash table with dictRandomKey() or dict iterators */
3151 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
);
3152 retval
= dictDelete(db
->dict
,key
);
3155 return retval
== DICT_OK
;
3158 /* Check if the nul-terminated string 's' can be represented by a long
3159 * (that is, is a number that fits into long without any other space or
3160 * character before or after the digits).
3162 * If so, the function returns REDIS_OK and *longval is set to the value
3163 * of the number. Otherwise REDIS_ERR is returned */
3164 static int isStringRepresentableAsLong(sds s
, long *longval
) {
3165 char buf
[32], *endptr
;
3169 value
= strtol(s
, &endptr
, 10);
3170 if (endptr
[0] != '\0') return REDIS_ERR
;
3171 slen
= ll2string(buf
,32,value
);
3173 /* If the number converted back into a string is not identical
3174 * then it's not possible to encode the string as integer */
3175 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
3176 if (longval
) *longval
= value
;
3180 /* Try to encode a string object in order to save space */
3181 static robj
*tryObjectEncoding(robj
*o
) {
3185 if (o
->encoding
!= REDIS_ENCODING_RAW
)
3186 return o
; /* Already encoded */
3188 /* It's not safe to encode shared objects: shared objects can be shared
3189 * everywhere in the "object space" of Redis. Encoded objects can only
3190 * appear as "values" (and not, for instance, as keys) */
3191 if (o
->refcount
> 1) return o
;
3193 /* Currently we try to encode only strings */
3194 redisAssert(o
->type
== REDIS_STRING
);
3196 /* Check if we can represent this string as a long integer */
3197 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return o
;
3199 /* Ok, this object can be encoded */
3200 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
3202 incrRefCount(shared
.integers
[value
]);
3203 return shared
.integers
[value
];
3205 o
->encoding
= REDIS_ENCODING_INT
;
3207 o
->ptr
= (void*) value
;
3212 /* Get a decoded version of an encoded object (returned as a new object).
3213 * If the object is already raw-encoded just increment the ref count. */
3214 static robj
*getDecodedObject(robj
*o
) {
3217 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3221 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
3224 ll2string(buf
,32,(long)o
->ptr
);
3225 dec
= createStringObject(buf
,strlen(buf
));
3228 redisPanic("Unknown encoding type");
3232 /* Compare two string objects via strcmp() or alike.
3233 * Note that the objects may be integer-encoded. In such a case we
3234 * use ll2string() to get a string representation of the numbers on the stack
3235 * and compare the strings, it's much faster than calling getDecodedObject().
3237 * Important note: if objects are not integer encoded, but binary-safe strings,
3238 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
3240 static int compareStringObjects(robj
*a
, robj
*b
) {
3241 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
3242 char bufa
[128], bufb
[128], *astr
, *bstr
;
3245 if (a
== b
) return 0;
3246 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
3247 ll2string(bufa
,sizeof(bufa
),(long) a
->ptr
);
3253 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
3254 ll2string(bufb
,sizeof(bufb
),(long) b
->ptr
);
3260 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
3263 /* Equal string objects return 1 if the two objects are the same from the
3264 * point of view of a string comparison, otherwise 0 is returned. Note that
3265 * this function is faster then checking for (compareStringObject(a,b) == 0)
3266 * because it can perform some more optimization. */
3267 static int equalStringObjects(robj
*a
, robj
*b
) {
3268 if (a
->encoding
!= REDIS_ENCODING_RAW
&& b
->encoding
!= REDIS_ENCODING_RAW
){
3269 return a
->ptr
== b
->ptr
;
3271 return compareStringObjects(a
,b
) == 0;
3275 static size_t stringObjectLen(robj
*o
) {
3276 redisAssert(o
->type
== REDIS_STRING
);
3277 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3278 return sdslen(o
->ptr
);
3282 return ll2string(buf
,32,(long)o
->ptr
);
3286 static int getDoubleFromObject(robj
*o
, double *target
) {
3293 redisAssert(o
->type
== REDIS_STRING
);
3294 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3295 value
= strtod(o
->ptr
, &eptr
);
3296 if (eptr
[0] != '\0') return REDIS_ERR
;
3297 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3298 value
= (long)o
->ptr
;
3300 redisPanic("Unknown string encoding");
3308 static int getDoubleFromObjectOrReply(redisClient
*c
, robj
*o
, double *target
, const char *msg
) {
3310 if (getDoubleFromObject(o
, &value
) != REDIS_OK
) {
3312 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3314 addReplySds(c
, sdsnew("-ERR value is not a double\r\n"));
3323 static int getLongLongFromObject(robj
*o
, long long *target
) {
3330 redisAssert(o
->type
== REDIS_STRING
);
3331 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3332 value
= strtoll(o
->ptr
, &eptr
, 10);
3333 if (eptr
[0] != '\0') return REDIS_ERR
;
3334 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3335 value
= (long)o
->ptr
;
3337 redisPanic("Unknown string encoding");
3345 static int getLongLongFromObjectOrReply(redisClient
*c
, robj
*o
, long long *target
, const char *msg
) {
3347 if (getLongLongFromObject(o
, &value
) != REDIS_OK
) {
3349 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3351 addReplySds(c
, sdsnew("-ERR value is not an integer\r\n"));
3360 static int getLongFromObjectOrReply(redisClient
*c
, robj
*o
, long *target
, const char *msg
) {
3363 if (getLongLongFromObjectOrReply(c
, o
, &value
, msg
) != REDIS_OK
) return REDIS_ERR
;
3364 if (value
< LONG_MIN
|| value
> LONG_MAX
) {
3366 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3368 addReplySds(c
, sdsnew("-ERR value is out of range\r\n"));
3377 /*============================ RDB saving/loading =========================== */
3379 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3380 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3384 static int rdbSaveTime(FILE *fp
, time_t t
) {
3385 int32_t t32
= (int32_t) t
;
3386 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3390 /* check rdbLoadLen() comments for more info */
3391 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3392 unsigned char buf
[2];
3395 /* Save a 6 bit len */
3396 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3397 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3398 } else if (len
< (1<<14)) {
3399 /* Save a 14 bit len */
3400 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3402 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3404 /* Save a 32 bit len */
3405 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3406 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3408 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3413 /* Encode 'value' as an integer if possible (if integer will fit the
3414 * supported range). If the function sucessful encoded the integer
3415 * then the (up to 5 bytes) encoded representation is written in the
3416 * string pointed by 'enc' and the length is returned. Otherwise
3418 static int rdbEncodeInteger(long long value
, unsigned char *enc
) {
3419 /* Finally check if it fits in our ranges */
3420 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3421 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3422 enc
[1] = value
&0xFF;
3424 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3425 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3426 enc
[1] = value
&0xFF;
3427 enc
[2] = (value
>>8)&0xFF;
3429 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3430 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3431 enc
[1] = value
&0xFF;
3432 enc
[2] = (value
>>8)&0xFF;
3433 enc
[3] = (value
>>16)&0xFF;
3434 enc
[4] = (value
>>24)&0xFF;
3441 /* String objects in the form "2391" "-100" without any space and with a
3442 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3443 * encoded as integers to save space */
3444 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3446 char *endptr
, buf
[32];
3448 /* Check if it's possible to encode this value as a number */
3449 value
= strtoll(s
, &endptr
, 10);
3450 if (endptr
[0] != '\0') return 0;
3451 ll2string(buf
,32,value
);
3453 /* If the number converted back into a string is not identical
3454 * then it's not possible to encode the string as integer */
3455 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3457 return rdbEncodeInteger(value
,enc
);
3460 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3461 size_t comprlen
, outlen
;
3465 /* We require at least four bytes compression for this to be worth it */
3466 if (len
<= 4) return 0;
3468 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3469 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3470 if (comprlen
== 0) {
3474 /* Data compressed! Let's save it on disk */
3475 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3476 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3477 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3478 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3479 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3488 /* Save a string objet as [len][data] on disk. If the object is a string
3489 * representation of an integer value we try to safe it in a special form */
3490 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3493 /* Try integer encoding */
3495 unsigned char buf
[5];
3496 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3497 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3502 /* Try LZF compression - under 20 bytes it's unable to compress even
3503 * aaaaaaaaaaaaaaaaaa so skip it */
3504 if (server
.rdbcompression
&& len
> 20) {
3507 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3508 if (retval
== -1) return -1;
3509 if (retval
> 0) return 0;
3510 /* retval == 0 means data can't be compressed, save the old way */
3513 /* Store verbatim */
3514 if (rdbSaveLen(fp
,len
) == -1) return -1;
3515 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3519 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3520 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3523 /* Avoid to decode the object, then encode it again, if the
3524 * object is alrady integer encoded. */
3525 if (obj
->encoding
== REDIS_ENCODING_INT
) {
3526 long val
= (long) obj
->ptr
;
3527 unsigned char buf
[5];
3530 if ((enclen
= rdbEncodeInteger(val
,buf
)) > 0) {
3531 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3534 /* otherwise... fall throught and continue with the usual
3538 /* Avoid incr/decr ref count business when possible.
3539 * This plays well with copy-on-write given that we are probably
3540 * in a child process (BGSAVE). Also this makes sure key objects
3541 * of swapped objects are not incRefCount-ed (an assert does not allow
3542 * this in order to avoid bugs) */
3543 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
3544 obj
= getDecodedObject(obj
);
3545 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3548 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3553 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3554 * 8 bit integer specifing the length of the representation.
3555 * This 8 bit integer has special values in order to specify the following
3561 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3562 unsigned char buf
[128];
3568 } else if (!isfinite(val
)) {
3570 buf
[0] = (val
< 0) ? 255 : 254;
3572 #if (DBL_MANT_DIG >= 52) && (LLONG_MAX == 0x7fffffffffffffffLL)
3573 /* Check if the float is in a safe range to be casted into a
3574 * long long. We are assuming that long long is 64 bit here.
3575 * Also we are assuming that there are no implementations around where
3576 * double has precision < 52 bit.
3578 * Under this assumptions we test if a double is inside an interval
3579 * where casting to long long is safe. Then using two castings we
3580 * make sure the decimal part is zero. If all this is true we use
3581 * integer printing function that is much faster. */
3582 double min
= -4503599627370495; /* (2^52)-1 */
3583 double max
= 4503599627370496; /* -(2^52) */
3584 if (val
> min
&& val
< max
&& val
== ((double)((long long)val
)))
3585 ll2string((char*)buf
+1,sizeof(buf
),(long long)val
);
3588 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3589 buf
[0] = strlen((char*)buf
+1);
3592 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3596 /* Save a Redis object. */
3597 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3598 if (o
->type
== REDIS_STRING
) {
3599 /* Save a string value */
3600 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3601 } else if (o
->type
== REDIS_LIST
) {
3602 /* Save a list value */
3603 list
*list
= o
->ptr
;
3607 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3608 listRewind(list
,&li
);
3609 while((ln
= listNext(&li
))) {
3610 robj
*eleobj
= listNodeValue(ln
);
3612 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3614 } else if (o
->type
== REDIS_SET
) {
3615 /* Save a set value */
3617 dictIterator
*di
= dictGetIterator(set
);
3620 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3621 while((de
= dictNext(di
)) != NULL
) {
3622 robj
*eleobj
= dictGetEntryKey(de
);
3624 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3626 dictReleaseIterator(di
);
3627 } else if (o
->type
== REDIS_ZSET
) {
3628 /* Save a set value */
3630 dictIterator
*di
= dictGetIterator(zs
->dict
);
3633 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3634 while((de
= dictNext(di
)) != NULL
) {
3635 robj
*eleobj
= dictGetEntryKey(de
);
3636 double *score
= dictGetEntryVal(de
);
3638 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3639 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3641 dictReleaseIterator(di
);
3642 } else if (o
->type
== REDIS_HASH
) {
3643 /* Save a hash value */
3644 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3645 unsigned char *p
= zipmapRewind(o
->ptr
);
3646 unsigned int count
= zipmapLen(o
->ptr
);
3647 unsigned char *key
, *val
;
3648 unsigned int klen
, vlen
;
3650 if (rdbSaveLen(fp
,count
) == -1) return -1;
3651 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3652 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3653 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3656 dictIterator
*di
= dictGetIterator(o
->ptr
);
3659 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3660 while((de
= dictNext(di
)) != NULL
) {
3661 robj
*key
= dictGetEntryKey(de
);
3662 robj
*val
= dictGetEntryVal(de
);
3664 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3665 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3667 dictReleaseIterator(di
);
3670 redisPanic("Unknown object type");
3675 /* Return the length the object will have on disk if saved with
3676 * the rdbSaveObject() function. Currently we use a trick to get
3677 * this length with very little changes to the code. In the future
3678 * we could switch to a faster solution. */
3679 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3680 if (fp
== NULL
) fp
= server
.devnull
;
3682 assert(rdbSaveObject(fp
,o
) != 1);
3686 /* Return the number of pages required to save this object in the swap file */
3687 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3688 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3690 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3693 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3694 static int rdbSave(char *filename
) {
3695 dictIterator
*di
= NULL
;
3700 time_t now
= time(NULL
);
3702 /* Wait for I/O therads to terminate, just in case this is a
3703 * foreground-saving, to avoid seeking the swap file descriptor at the
3705 if (server
.vm_enabled
)
3706 waitEmptyIOJobsQueue();
3708 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3709 fp
= fopen(tmpfile
,"w");
3711 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3714 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3715 for (j
= 0; j
< server
.dbnum
; j
++) {
3716 redisDb
*db
= server
.db
+j
;
3718 if (dictSize(d
) == 0) continue;
3719 di
= dictGetIterator(d
);
3725 /* Write the SELECT DB opcode */
3726 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3727 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3729 /* Iterate this DB writing every entry */
3730 while((de
= dictNext(di
)) != NULL
) {
3731 robj
*key
= dictGetEntryKey(de
);
3732 robj
*o
= dictGetEntryVal(de
);
3733 time_t expiretime
= getExpire(db
,key
);
3735 /* Save the expire time */
3736 if (expiretime
!= -1) {
3737 /* If this key is already expired skip it */
3738 if (expiretime
< now
) continue;
3739 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3740 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3742 /* Save the key and associated value. This requires special
3743 * handling if the value is swapped out. */
3744 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
3745 key
->storage
== REDIS_VM_SWAPPING
) {
3746 /* Save type, key, value */
3747 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3748 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3749 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3751 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3753 /* Get a preview of the object in memory */
3754 po
= vmPreviewObject(key
);
3755 /* Save type, key, value */
3756 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
;
3757 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3758 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3759 /* Remove the loaded object from memory */
3763 dictReleaseIterator(di
);
3766 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3768 /* Make sure data will not remain on the OS's output buffers */
3773 /* Use RENAME to make sure the DB file is changed atomically only
3774 * if the generate DB file is ok. */
3775 if (rename(tmpfile
,filename
) == -1) {
3776 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3780 redisLog(REDIS_NOTICE
,"DB saved on disk");
3782 server
.lastsave
= time(NULL
);
3788 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3789 if (di
) dictReleaseIterator(di
);
3793 static int rdbSaveBackground(char *filename
) {
3796 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3797 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3798 if ((childpid
= fork()) == 0) {
3800 if (server
.vm_enabled
) vmReopenSwapFile();
3802 if (rdbSave(filename
) == REDIS_OK
) {
3809 if (childpid
== -1) {
3810 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3814 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3815 server
.bgsavechildpid
= childpid
;
3816 updateDictResizePolicy();
3819 return REDIS_OK
; /* unreached */
3822 static void rdbRemoveTempFile(pid_t childpid
) {
3825 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
3829 static int rdbLoadType(FILE *fp
) {
3831 if (fread(&type
,1,1,fp
) == 0) return -1;
3835 static time_t rdbLoadTime(FILE *fp
) {
3837 if (fread(&t32
,4,1,fp
) == 0) return -1;
3838 return (time_t) t32
;
3841 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3842 * of this file for a description of how this are stored on disk.
3844 * isencoded is set to 1 if the readed length is not actually a length but
3845 * an "encoding type", check the above comments for more info */
3846 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
3847 unsigned char buf
[2];
3851 if (isencoded
) *isencoded
= 0;
3852 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3853 type
= (buf
[0]&0xC0)>>6;
3854 if (type
== REDIS_RDB_6BITLEN
) {
3855 /* Read a 6 bit len */
3857 } else if (type
== REDIS_RDB_ENCVAL
) {
3858 /* Read a 6 bit len encoding type */
3859 if (isencoded
) *isencoded
= 1;
3861 } else if (type
== REDIS_RDB_14BITLEN
) {
3862 /* Read a 14 bit len */
3863 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3864 return ((buf
[0]&0x3F)<<8)|buf
[1];
3866 /* Read a 32 bit len */
3867 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
3872 /* Load an integer-encoded object from file 'fp', with the specified
3873 * encoding type 'enctype'. If encode is true the function may return
3874 * an integer-encoded object as reply, otherwise the returned object
3875 * will always be encoded as a raw string. */
3876 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
, int encode
) {
3877 unsigned char enc
[4];
3880 if (enctype
== REDIS_RDB_ENC_INT8
) {
3881 if (fread(enc
,1,1,fp
) == 0) return NULL
;
3882 val
= (signed char)enc
[0];
3883 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
3885 if (fread(enc
,2,1,fp
) == 0) return NULL
;
3886 v
= enc
[0]|(enc
[1]<<8);
3888 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
3890 if (fread(enc
,4,1,fp
) == 0) return NULL
;
3891 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
3894 val
= 0; /* anti-warning */
3895 redisPanic("Unknown RDB integer encoding type");
3898 return createStringObjectFromLongLong(val
);
3900 return createObject(REDIS_STRING
,sdsfromlonglong(val
));
3903 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
3904 unsigned int len
, clen
;
3905 unsigned char *c
= NULL
;
3908 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3909 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3910 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
3911 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
3912 if (fread(c
,clen
,1,fp
) == 0) goto err
;
3913 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
3915 return createObject(REDIS_STRING
,val
);
3922 static robj
*rdbGenericLoadStringObject(FILE*fp
, int encode
) {
3927 len
= rdbLoadLen(fp
,&isencoded
);
3930 case REDIS_RDB_ENC_INT8
:
3931 case REDIS_RDB_ENC_INT16
:
3932 case REDIS_RDB_ENC_INT32
:
3933 return rdbLoadIntegerObject(fp
,len
,encode
);
3934 case REDIS_RDB_ENC_LZF
:
3935 return rdbLoadLzfStringObject(fp
);
3937 redisPanic("Unknown RDB encoding type");
3941 if (len
== REDIS_RDB_LENERR
) return NULL
;
3942 val
= sdsnewlen(NULL
,len
);
3943 if (len
&& fread(val
,len
,1,fp
) == 0) {
3947 return createObject(REDIS_STRING
,val
);
3950 static robj
*rdbLoadStringObject(FILE *fp
) {
3951 return rdbGenericLoadStringObject(fp
,0);
3954 static robj
*rdbLoadEncodedStringObject(FILE *fp
) {
3955 return rdbGenericLoadStringObject(fp
,1);
3958 /* For information about double serialization check rdbSaveDoubleValue() */
3959 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
3963 if (fread(&len
,1,1,fp
) == 0) return -1;
3965 case 255: *val
= R_NegInf
; return 0;
3966 case 254: *val
= R_PosInf
; return 0;
3967 case 253: *val
= R_Nan
; return 0;
3969 if (fread(buf
,len
,1,fp
) == 0) return -1;
3971 sscanf(buf
, "%lg", val
);
3976 /* Load a Redis object of the specified type from the specified file.
3977 * On success a newly allocated object is returned, otherwise NULL. */
3978 static robj
*rdbLoadObject(int type
, FILE *fp
) {
3981 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
3982 if (type
== REDIS_STRING
) {
3983 /* Read string value */
3984 if ((o
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
3985 o
= tryObjectEncoding(o
);
3986 } else if (type
== REDIS_LIST
|| type
== REDIS_SET
) {
3987 /* Read list/set value */
3990 if ((listlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3991 o
= (type
== REDIS_LIST
) ? createListObject() : createSetObject();
3992 /* It's faster to expand the dict to the right size asap in order
3993 * to avoid rehashing */
3994 if (type
== REDIS_SET
&& listlen
> DICT_HT_INITIAL_SIZE
)
3995 dictExpand(o
->ptr
,listlen
);
3996 /* Load every single element of the list/set */
4000 if ((ele
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4001 ele
= tryObjectEncoding(ele
);
4002 if (type
== REDIS_LIST
) {
4003 listAddNodeTail((list
*)o
->ptr
,ele
);
4005 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
4008 } else if (type
== REDIS_ZSET
) {
4009 /* Read list/set value */
4013 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4014 o
= createZsetObject();
4016 /* Load every single element of the list/set */
4019 double *score
= zmalloc(sizeof(double));
4021 if ((ele
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4022 ele
= tryObjectEncoding(ele
);
4023 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
4024 dictAdd(zs
->dict
,ele
,score
);
4025 zslInsert(zs
->zsl
,*score
,ele
);
4026 incrRefCount(ele
); /* added to skiplist */
4028 } else if (type
== REDIS_HASH
) {
4031 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4032 o
= createHashObject();
4033 /* Too many entries? Use an hash table. */
4034 if (hashlen
> server
.hash_max_zipmap_entries
)
4035 convertToRealHash(o
);
4036 /* Load every key/value, then set it into the zipmap or hash
4037 * table, as needed. */
4041 if ((key
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
4042 if ((val
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
4043 /* If we are using a zipmap and there are too big values
4044 * the object is converted to real hash table encoding. */
4045 if (o
->encoding
!= REDIS_ENCODING_HT
&&
4046 (sdslen(key
->ptr
) > server
.hash_max_zipmap_value
||
4047 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
))
4049 convertToRealHash(o
);
4052 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
4053 unsigned char *zm
= o
->ptr
;
4055 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
4056 val
->ptr
,sdslen(val
->ptr
),NULL
);
4061 key
= tryObjectEncoding(key
);
4062 val
= tryObjectEncoding(val
);
4063 dictAdd((dict
*)o
->ptr
,key
,val
);
4067 redisPanic("Unknown object type");
4072 static int rdbLoad(char *filename
) {
4075 int type
, retval
, rdbver
;
4076 int swap_all_values
= 0;
4077 dict
*d
= server
.db
[0].dict
;
4078 redisDb
*db
= server
.db
+0;
4080 time_t expiretime
, now
= time(NULL
);
4081 long long loadedkeys
= 0;
4083 fp
= fopen(filename
,"r");
4084 if (!fp
) return REDIS_ERR
;
4085 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
4087 if (memcmp(buf
,"REDIS",5) != 0) {
4089 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
4092 rdbver
= atoi(buf
+5);
4095 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
4103 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
4104 if (type
== REDIS_EXPIRETIME
) {
4105 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
4106 /* We read the time so we need to read the object type again */
4107 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
4109 if (type
== REDIS_EOF
) break;
4110 /* Handle SELECT DB opcode as a special case */
4111 if (type
== REDIS_SELECTDB
) {
4112 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
4114 if (dbid
>= (unsigned)server
.dbnum
) {
4115 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
4118 db
= server
.db
+dbid
;
4123 if ((key
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
4125 if ((val
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
4126 /* Check if the key already expired */
4127 if (expiretime
!= -1 && expiretime
< now
) {
4132 /* Add the new object in the hash table */
4133 retval
= dictAdd(d
,key
,val
);
4134 if (retval
== DICT_ERR
) {
4135 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", key
->ptr
);
4139 /* Set the expire time if needed */
4140 if (expiretime
!= -1) setExpire(db
,key
,expiretime
);
4142 /* Handle swapping while loading big datasets when VM is on */
4144 /* If we detecter we are hopeless about fitting something in memory
4145 * we just swap every new key on disk. Directly...
4146 * Note that's important to check for this condition before resorting
4147 * to random sampling, otherwise we may try to swap already
4149 if (swap_all_values
) {
4150 dictEntry
*de
= dictFind(d
,key
);
4152 /* de may be NULL since the key already expired */
4154 key
= dictGetEntryKey(de
);
4155 val
= dictGetEntryVal(de
);
4157 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
4158 dictGetEntryVal(de
) = NULL
;
4164 /* If we have still some hope of having some value fitting memory
4165 * then we try random sampling. */
4166 if (!swap_all_values
&& server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
4167 while (zmalloc_used_memory() > server
.vm_max_memory
) {
4168 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
4170 if (zmalloc_used_memory() > server
.vm_max_memory
)
4171 swap_all_values
= 1; /* We are already using too much mem */
4177 eoferr
: /* unexpected end of file is handled here with a fatal exit */
4178 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
4180 return REDIS_ERR
; /* Just to avoid warning */
4183 /*================================== Shutdown =============================== */
4184 static int prepareForShutdown() {
4185 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4186 /* Kill the saving child if there is a background saving in progress.
4187 We want to avoid race conditions, for instance our saving child may
4188 overwrite the synchronous saving did by SHUTDOWN. */
4189 if (server
.bgsavechildpid
!= -1) {
4190 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4191 kill(server
.bgsavechildpid
,SIGKILL
);
4192 rdbRemoveTempFile(server
.bgsavechildpid
);
4194 if (server
.appendonly
) {
4195 /* Append only file: fsync() the AOF and exit */
4196 fsync(server
.appendfd
);
4197 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4199 /* Snapshotting. Perform a SYNC SAVE and exit */
4200 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4201 if (server
.daemonize
)
4202 unlink(server
.pidfile
);
4203 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4205 /* Ooops.. error saving! The best we can do is to continue
4206 * operating. Note that if there was a background saving process,
4207 * in the next cron() Redis will be notified that the background
4208 * saving aborted, handling special stuff like slaves pending for
4209 * synchronization... */
4210 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4214 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4218 /*================================== Commands =============================== */
4220 static void authCommand(redisClient
*c
) {
4221 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
4222 c
->authenticated
= 1;
4223 addReply(c
,shared
.ok
);
4225 c
->authenticated
= 0;
4226 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
4230 static void pingCommand(redisClient
*c
) {
4231 addReply(c
,shared
.pong
);
4234 static void echoCommand(redisClient
*c
) {
4235 addReplyBulk(c
,c
->argv
[1]);
4238 /*=================================== Strings =============================== */
4240 static void setGenericCommand(redisClient
*c
, int nx
, robj
*key
, robj
*val
, robj
*expire
) {
4242 long seconds
= 0; /* initialized to avoid an harmness warning */
4245 if (getLongFromObjectOrReply(c
, expire
, &seconds
, NULL
) != REDIS_OK
)
4248 addReplySds(c
,sdsnew("-ERR invalid expire time in SETEX\r\n"));
4253 touchWatchedKey(c
->db
,key
);
4254 if (nx
) deleteIfVolatile(c
->db
,key
);
4255 retval
= dictAdd(c
->db
->dict
,key
,val
);
4256 if (retval
== DICT_ERR
) {
4258 /* If the key is about a swapped value, we want a new key object
4259 * to overwrite the old. So we delete the old key in the database.
4260 * This will also make sure that swap pages about the old object
4261 * will be marked as free. */
4262 if (server
.vm_enabled
&& deleteIfSwapped(c
->db
,key
))
4264 dictReplace(c
->db
->dict
,key
,val
);
4267 addReply(c
,shared
.czero
);
4275 removeExpire(c
->db
,key
);
4276 if (expire
) setExpire(c
->db
,key
,time(NULL
)+seconds
);
4277 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4280 static void setCommand(redisClient
*c
) {
4281 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[2],NULL
);
4284 static void setnxCommand(redisClient
*c
) {
4285 setGenericCommand(c
,1,c
->argv
[1],c
->argv
[2],NULL
);
4288 static void setexCommand(redisClient
*c
) {
4289 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[3],c
->argv
[2]);
4292 static int getGenericCommand(redisClient
*c
) {
4295 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
4298 if (o
->type
!= REDIS_STRING
) {
4299 addReply(c
,shared
.wrongtypeerr
);
4307 static void getCommand(redisClient
*c
) {
4308 getGenericCommand(c
);
4311 static void getsetCommand(redisClient
*c
) {
4312 if (getGenericCommand(c
) == REDIS_ERR
) return;
4313 if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) {
4314 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4316 incrRefCount(c
->argv
[1]);
4318 incrRefCount(c
->argv
[2]);
4320 removeExpire(c
->db
,c
->argv
[1]);
4323 static void mgetCommand(redisClient
*c
) {
4326 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
4327 for (j
= 1; j
< c
->argc
; j
++) {
4328 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
4330 addReply(c
,shared
.nullbulk
);
4332 if (o
->type
!= REDIS_STRING
) {
4333 addReply(c
,shared
.nullbulk
);
4341 static void msetGenericCommand(redisClient
*c
, int nx
) {
4342 int j
, busykeys
= 0;
4344 if ((c
->argc
% 2) == 0) {
4345 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
4348 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
4349 * set nothing at all if at least one already key exists. */
4351 for (j
= 1; j
< c
->argc
; j
+= 2) {
4352 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
4358 addReply(c
, shared
.czero
);
4362 for (j
= 1; j
< c
->argc
; j
+= 2) {
4365 c
->argv
[j
+1] = tryObjectEncoding(c
->argv
[j
+1]);
4366 retval
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
4367 if (retval
== DICT_ERR
) {
4368 dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
4369 incrRefCount(c
->argv
[j
+1]);
4371 incrRefCount(c
->argv
[j
]);
4372 incrRefCount(c
->argv
[j
+1]);
4374 removeExpire(c
->db
,c
->argv
[j
]);
4376 server
.dirty
+= (c
->argc
-1)/2;
4377 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4380 static void msetCommand(redisClient
*c
) {
4381 msetGenericCommand(c
,0);
4384 static void msetnxCommand(redisClient
*c
) {
4385 msetGenericCommand(c
,1);
4388 static void incrDecrCommand(redisClient
*c
, long long incr
) {
4393 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4394 if (o
!= NULL
&& checkType(c
,o
,REDIS_STRING
)) return;
4395 if (getLongLongFromObjectOrReply(c
,o
,&value
,NULL
) != REDIS_OK
) return;
4398 o
= createStringObjectFromLongLong(value
);
4399 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],o
);
4400 if (retval
== DICT_ERR
) {
4401 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4402 removeExpire(c
->db
,c
->argv
[1]);
4404 incrRefCount(c
->argv
[1]);
4407 addReply(c
,shared
.colon
);
4409 addReply(c
,shared
.crlf
);
4412 static void incrCommand(redisClient
*c
) {
4413 incrDecrCommand(c
,1);
4416 static void decrCommand(redisClient
*c
) {
4417 incrDecrCommand(c
,-1);
4420 static void incrbyCommand(redisClient
*c
) {
4423 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4424 incrDecrCommand(c
,incr
);
4427 static void decrbyCommand(redisClient
*c
) {
4430 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4431 incrDecrCommand(c
,-incr
);
4434 static void appendCommand(redisClient
*c
) {
4439 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4441 /* Create the key */
4442 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4443 incrRefCount(c
->argv
[1]);
4444 incrRefCount(c
->argv
[2]);
4445 totlen
= stringObjectLen(c
->argv
[2]);
4449 de
= dictFind(c
->db
->dict
,c
->argv
[1]);
4452 o
= dictGetEntryVal(de
);
4453 if (o
->type
!= REDIS_STRING
) {
4454 addReply(c
,shared
.wrongtypeerr
);
4457 /* If the object is specially encoded or shared we have to make
4459 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
4460 robj
*decoded
= getDecodedObject(o
);
4462 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
4463 decrRefCount(decoded
);
4464 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4467 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
4468 o
->ptr
= sdscatlen(o
->ptr
,
4469 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
4471 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
4472 (unsigned long) c
->argv
[2]->ptr
);
4474 totlen
= sdslen(o
->ptr
);
4477 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
4480 static void substrCommand(redisClient
*c
) {
4482 long start
= atoi(c
->argv
[2]->ptr
);
4483 long end
= atoi(c
->argv
[3]->ptr
);
4484 size_t rangelen
, strlen
;
4487 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4488 checkType(c
,o
,REDIS_STRING
)) return;
4490 o
= getDecodedObject(o
);
4491 strlen
= sdslen(o
->ptr
);
4493 /* convert negative indexes */
4494 if (start
< 0) start
= strlen
+start
;
4495 if (end
< 0) end
= strlen
+end
;
4496 if (start
< 0) start
= 0;
4497 if (end
< 0) end
= 0;
4499 /* indexes sanity checks */
4500 if (start
> end
|| (size_t)start
>= strlen
) {
4501 /* Out of range start or start > end result in null reply */
4502 addReply(c
,shared
.nullbulk
);
4506 if ((size_t)end
>= strlen
) end
= strlen
-1;
4507 rangelen
= (end
-start
)+1;
4509 /* Return the result */
4510 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4511 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4512 addReplySds(c
,range
);
4513 addReply(c
,shared
.crlf
);
4517 /* ========================= Type agnostic commands ========================= */
4519 static void delCommand(redisClient
*c
) {
4522 for (j
= 1; j
< c
->argc
; j
++) {
4523 if (deleteKey(c
->db
,c
->argv
[j
])) {
4524 touchWatchedKey(c
->db
,c
->argv
[j
]);
4529 addReplyLongLong(c
,deleted
);
4532 static void existsCommand(redisClient
*c
) {
4533 expireIfNeeded(c
->db
,c
->argv
[1]);
4534 if (dictFind(c
->db
->dict
,c
->argv
[1])) {
4535 addReply(c
, shared
.cone
);
4537 addReply(c
, shared
.czero
);
4541 static void selectCommand(redisClient
*c
) {
4542 int id
= atoi(c
->argv
[1]->ptr
);
4544 if (selectDb(c
,id
) == REDIS_ERR
) {
4545 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4547 addReply(c
,shared
.ok
);
4551 static void randomkeyCommand(redisClient
*c
) {
4556 de
= dictGetRandomKey(c
->db
->dict
);
4557 if (!de
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break;
4561 addReply(c
,shared
.nullbulk
);
4565 key
= dictGetEntryKey(de
);
4566 if (server
.vm_enabled
) {
4567 key
= dupStringObject(key
);
4568 addReplyBulk(c
,key
);
4571 addReplyBulk(c
,key
);
4575 static void keysCommand(redisClient
*c
) {
4578 sds pattern
= c
->argv
[1]->ptr
;
4579 int plen
= sdslen(pattern
);
4580 unsigned long numkeys
= 0;
4581 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4583 di
= dictGetIterator(c
->db
->dict
);
4585 decrRefCount(lenobj
);
4586 while((de
= dictNext(di
)) != NULL
) {
4587 robj
*keyobj
= dictGetEntryKey(de
);
4589 sds key
= keyobj
->ptr
;
4590 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4591 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4592 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4593 addReplyBulk(c
,keyobj
);
4598 dictReleaseIterator(di
);
4599 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4602 static void dbsizeCommand(redisClient
*c
) {
4604 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4607 static void lastsaveCommand(redisClient
*c
) {
4609 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4612 static void typeCommand(redisClient
*c
) {
4616 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4621 case REDIS_STRING
: type
= "+string"; break;
4622 case REDIS_LIST
: type
= "+list"; break;
4623 case REDIS_SET
: type
= "+set"; break;
4624 case REDIS_ZSET
: type
= "+zset"; break;
4625 case REDIS_HASH
: type
= "+hash"; break;
4626 default: type
= "+unknown"; break;
4629 addReplySds(c
,sdsnew(type
));
4630 addReply(c
,shared
.crlf
);
4633 static void saveCommand(redisClient
*c
) {
4634 if (server
.bgsavechildpid
!= -1) {
4635 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4638 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4639 addReply(c
,shared
.ok
);
4641 addReply(c
,shared
.err
);
4645 static void bgsaveCommand(redisClient
*c
) {
4646 if (server
.bgsavechildpid
!= -1) {
4647 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4650 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4651 char *status
= "+Background saving started\r\n";
4652 addReplySds(c
,sdsnew(status
));
4654 addReply(c
,shared
.err
);
4658 static void shutdownCommand(redisClient
*c
) {
4659 if (prepareForShutdown() == REDIS_OK
)
4661 addReplySds(c
, sdsnew("-ERR Errors trying to SHUTDOWN. Check logs.\r\n"));
4664 static void renameGenericCommand(redisClient
*c
, int nx
) {
4667 /* To use the same key as src and dst is probably an error */
4668 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4669 addReply(c
,shared
.sameobjecterr
);
4673 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4677 deleteIfVolatile(c
->db
,c
->argv
[2]);
4678 if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) {
4681 addReply(c
,shared
.czero
);
4684 dictReplace(c
->db
->dict
,c
->argv
[2],o
);
4686 incrRefCount(c
->argv
[2]);
4688 deleteKey(c
->db
,c
->argv
[1]);
4689 touchWatchedKey(c
->db
,c
->argv
[2]);
4691 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4694 static void renameCommand(redisClient
*c
) {
4695 renameGenericCommand(c
,0);
4698 static void renamenxCommand(redisClient
*c
) {
4699 renameGenericCommand(c
,1);
4702 static void moveCommand(redisClient
*c
) {
4707 /* Obtain source and target DB pointers */
4710 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4711 addReply(c
,shared
.outofrangeerr
);
4715 selectDb(c
,srcid
); /* Back to the source DB */
4717 /* If the user is moving using as target the same
4718 * DB as the source DB it is probably an error. */
4720 addReply(c
,shared
.sameobjecterr
);
4724 /* Check if the element exists and get a reference */
4725 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4727 addReply(c
,shared
.czero
);
4731 /* Try to add the element to the target DB */
4732 deleteIfVolatile(dst
,c
->argv
[1]);
4733 if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) {
4734 addReply(c
,shared
.czero
);
4737 incrRefCount(c
->argv
[1]);
4740 /* OK! key moved, free the entry in the source DB */
4741 deleteKey(src
,c
->argv
[1]);
4743 addReply(c
,shared
.cone
);
4746 /* =================================== Lists ================================ */
4747 static void pushGenericCommand(redisClient
*c
, int where
) {
4751 lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4753 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4754 addReply(c
,shared
.cone
);
4757 lobj
= createListObject();
4759 if (where
== REDIS_HEAD
) {
4760 listAddNodeHead(list
,c
->argv
[2]);
4762 listAddNodeTail(list
,c
->argv
[2]);
4764 dictAdd(c
->db
->dict
,c
->argv
[1],lobj
);
4765 incrRefCount(c
->argv
[1]);
4766 incrRefCount(c
->argv
[2]);
4768 if (lobj
->type
!= REDIS_LIST
) {
4769 addReply(c
,shared
.wrongtypeerr
);
4772 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4773 addReply(c
,shared
.cone
);
4777 if (where
== REDIS_HEAD
) {
4778 listAddNodeHead(list
,c
->argv
[2]);
4780 listAddNodeTail(list
,c
->argv
[2]);
4782 incrRefCount(c
->argv
[2]);
4785 addReplyLongLong(c
,listLength(list
));
4788 static void lpushCommand(redisClient
*c
) {
4789 pushGenericCommand(c
,REDIS_HEAD
);
4792 static void rpushCommand(redisClient
*c
) {
4793 pushGenericCommand(c
,REDIS_TAIL
);
4796 static void llenCommand(redisClient
*c
) {
4800 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4801 checkType(c
,o
,REDIS_LIST
)) return;
4804 addReplyUlong(c
,listLength(l
));
4807 static void lindexCommand(redisClient
*c
) {
4809 int index
= atoi(c
->argv
[2]->ptr
);
4813 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4814 checkType(c
,o
,REDIS_LIST
)) return;
4817 ln
= listIndex(list
, index
);
4819 addReply(c
,shared
.nullbulk
);
4821 robj
*ele
= listNodeValue(ln
);
4822 addReplyBulk(c
,ele
);
4826 static void lsetCommand(redisClient
*c
) {
4828 int index
= atoi(c
->argv
[2]->ptr
);
4832 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
||
4833 checkType(c
,o
,REDIS_LIST
)) return;
4836 ln
= listIndex(list
, index
);
4838 addReply(c
,shared
.outofrangeerr
);
4840 robj
*ele
= listNodeValue(ln
);
4843 listNodeValue(ln
) = c
->argv
[3];
4844 incrRefCount(c
->argv
[3]);
4845 addReply(c
,shared
.ok
);
4850 static void popGenericCommand(redisClient
*c
, int where
) {
4855 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4856 checkType(c
,o
,REDIS_LIST
)) return;
4859 if (where
== REDIS_HEAD
)
4860 ln
= listFirst(list
);
4862 ln
= listLast(list
);
4865 addReply(c
,shared
.nullbulk
);
4867 robj
*ele
= listNodeValue(ln
);
4868 addReplyBulk(c
,ele
);
4869 listDelNode(list
,ln
);
4870 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4875 static void lpopCommand(redisClient
*c
) {
4876 popGenericCommand(c
,REDIS_HEAD
);
4879 static void rpopCommand(redisClient
*c
) {
4880 popGenericCommand(c
,REDIS_TAIL
);
4883 static void lrangeCommand(redisClient
*c
) {
4885 int start
= atoi(c
->argv
[2]->ptr
);
4886 int end
= atoi(c
->argv
[3]->ptr
);
4893 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
4894 || checkType(c
,o
,REDIS_LIST
)) return;
4896 llen
= listLength(list
);
4898 /* convert negative indexes */
4899 if (start
< 0) start
= llen
+start
;
4900 if (end
< 0) end
= llen
+end
;
4901 if (start
< 0) start
= 0;
4902 if (end
< 0) end
= 0;
4904 /* indexes sanity checks */
4905 if (start
> end
|| start
>= llen
) {
4906 /* Out of range start or start > end result in empty list */
4907 addReply(c
,shared
.emptymultibulk
);
4910 if (end
>= llen
) end
= llen
-1;
4911 rangelen
= (end
-start
)+1;
4913 /* Return the result in form of a multi-bulk reply */
4914 ln
= listIndex(list
, start
);
4915 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
4916 for (j
= 0; j
< rangelen
; j
++) {
4917 ele
= listNodeValue(ln
);
4918 addReplyBulk(c
,ele
);
4923 static void ltrimCommand(redisClient
*c
) {
4925 int start
= atoi(c
->argv
[2]->ptr
);
4926 int end
= atoi(c
->argv
[3]->ptr
);
4928 int j
, ltrim
, rtrim
;
4932 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
4933 checkType(c
,o
,REDIS_LIST
)) return;
4935 llen
= listLength(list
);
4937 /* convert negative indexes */
4938 if (start
< 0) start
= llen
+start
;
4939 if (end
< 0) end
= llen
+end
;
4940 if (start
< 0) start
= 0;
4941 if (end
< 0) end
= 0;
4943 /* indexes sanity checks */
4944 if (start
> end
|| start
>= llen
) {
4945 /* Out of range start or start > end result in empty list */
4949 if (end
>= llen
) end
= llen
-1;
4954 /* Remove list elements to perform the trim */
4955 for (j
= 0; j
< ltrim
; j
++) {
4956 ln
= listFirst(list
);
4957 listDelNode(list
,ln
);
4959 for (j
= 0; j
< rtrim
; j
++) {
4960 ln
= listLast(list
);
4961 listDelNode(list
,ln
);
4963 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4965 addReply(c
,shared
.ok
);
4968 static void lremCommand(redisClient
*c
) {
4971 listNode
*ln
, *next
;
4972 int toremove
= atoi(c
->argv
[2]->ptr
);
4976 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4977 checkType(c
,o
,REDIS_LIST
)) return;
4981 toremove
= -toremove
;
4984 ln
= fromtail
? list
->tail
: list
->head
;
4986 robj
*ele
= listNodeValue(ln
);
4988 next
= fromtail
? ln
->prev
: ln
->next
;
4989 if (equalStringObjects(ele
,c
->argv
[3])) {
4990 listDelNode(list
,ln
);
4993 if (toremove
&& removed
== toremove
) break;
4997 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4998 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
5001 /* This is the semantic of this command:
5002 * RPOPLPUSH srclist dstlist:
5003 * IF LLEN(srclist) > 0
5004 * element = RPOP srclist
5005 * LPUSH dstlist element
5012 * The idea is to be able to get an element from a list in a reliable way
5013 * since the element is not just returned but pushed against another list
5014 * as well. This command was originally proposed by Ezra Zygmuntowicz.
5016 static void rpoplpushcommand(redisClient
*c
) {
5021 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5022 checkType(c
,sobj
,REDIS_LIST
)) return;
5023 srclist
= sobj
->ptr
;
5024 ln
= listLast(srclist
);
5027 addReply(c
,shared
.nullbulk
);
5029 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
5030 robj
*ele
= listNodeValue(ln
);
5033 if (dobj
&& dobj
->type
!= REDIS_LIST
) {
5034 addReply(c
,shared
.wrongtypeerr
);
5038 /* Add the element to the target list (unless it's directly
5039 * passed to some BLPOP-ing client */
5040 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) {
5042 /* Create the list if the key does not exist */
5043 dobj
= createListObject();
5044 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
);
5045 incrRefCount(c
->argv
[2]);
5047 dstlist
= dobj
->ptr
;
5048 listAddNodeHead(dstlist
,ele
);
5052 /* Send the element to the client as reply as well */
5053 addReplyBulk(c
,ele
);
5055 /* Finally remove the element from the source list */
5056 listDelNode(srclist
,ln
);
5057 if (listLength(srclist
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5062 /* ==================================== Sets ================================ */
5064 static void saddCommand(redisClient
*c
) {
5067 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5069 set
= createSetObject();
5070 dictAdd(c
->db
->dict
,c
->argv
[1],set
);
5071 incrRefCount(c
->argv
[1]);
5073 if (set
->type
!= REDIS_SET
) {
5074 addReply(c
,shared
.wrongtypeerr
);
5078 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
5079 incrRefCount(c
->argv
[2]);
5081 addReply(c
,shared
.cone
);
5083 addReply(c
,shared
.czero
);
5087 static void sremCommand(redisClient
*c
) {
5090 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5091 checkType(c
,set
,REDIS_SET
)) return;
5093 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
5095 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
5096 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5097 addReply(c
,shared
.cone
);
5099 addReply(c
,shared
.czero
);
5103 static void smoveCommand(redisClient
*c
) {
5104 robj
*srcset
, *dstset
;
5106 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5107 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
5109 /* If the source key does not exist return 0, if it's of the wrong type
5111 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
5112 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
5115 /* Error if the destination key is not a set as well */
5116 if (dstset
&& dstset
->type
!= REDIS_SET
) {
5117 addReply(c
,shared
.wrongtypeerr
);
5120 /* Remove the element from the source set */
5121 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
5122 /* Key not found in the src set! return zero */
5123 addReply(c
,shared
.czero
);
5126 if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset
!= dstset
)
5127 deleteKey(c
->db
,c
->argv
[1]);
5129 /* Add the element to the destination set */
5131 dstset
= createSetObject();
5132 dictAdd(c
->db
->dict
,c
->argv
[2],dstset
);
5133 incrRefCount(c
->argv
[2]);
5135 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
5136 incrRefCount(c
->argv
[3]);
5137 addReply(c
,shared
.cone
);
5140 static void sismemberCommand(redisClient
*c
) {
5143 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5144 checkType(c
,set
,REDIS_SET
)) return;
5146 if (dictFind(set
->ptr
,c
->argv
[2]))
5147 addReply(c
,shared
.cone
);
5149 addReply(c
,shared
.czero
);
5152 static void scardCommand(redisClient
*c
) {
5156 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5157 checkType(c
,o
,REDIS_SET
)) return;
5160 addReplyUlong(c
,dictSize(s
));
5163 static void spopCommand(redisClient
*c
) {
5167 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5168 checkType(c
,set
,REDIS_SET
)) return;
5170 de
= dictGetRandomKey(set
->ptr
);
5172 addReply(c
,shared
.nullbulk
);
5174 robj
*ele
= dictGetEntryKey(de
);
5176 addReplyBulk(c
,ele
);
5177 dictDelete(set
->ptr
,ele
);
5178 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
5179 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5184 static void srandmemberCommand(redisClient
*c
) {
5188 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5189 checkType(c
,set
,REDIS_SET
)) return;
5191 de
= dictGetRandomKey(set
->ptr
);
5193 addReply(c
,shared
.nullbulk
);
5195 robj
*ele
= dictGetEntryKey(de
);
5197 addReplyBulk(c
,ele
);
5201 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
5202 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
5204 return dictSize(*d1
)-dictSize(*d2
);
5207 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
5208 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
5211 robj
*lenobj
= NULL
, *dstset
= NULL
;
5212 unsigned long j
, cardinality
= 0;
5214 for (j
= 0; j
< setsnum
; j
++) {
5218 lookupKeyWrite(c
->db
,setskeys
[j
]) :
5219 lookupKeyRead(c
->db
,setskeys
[j
]);
5223 if (deleteKey(c
->db
,dstkey
))
5225 addReply(c
,shared
.czero
);
5227 addReply(c
,shared
.emptymultibulk
);
5231 if (setobj
->type
!= REDIS_SET
) {
5233 addReply(c
,shared
.wrongtypeerr
);
5236 dv
[j
] = setobj
->ptr
;
5238 /* Sort sets from the smallest to largest, this will improve our
5239 * algorithm's performace */
5240 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
5242 /* The first thing we should output is the total number of elements...
5243 * since this is a multi-bulk write, but at this stage we don't know
5244 * the intersection set size, so we use a trick, append an empty object
5245 * to the output list and save the pointer to later modify it with the
5248 lenobj
= createObject(REDIS_STRING
,NULL
);
5250 decrRefCount(lenobj
);
5252 /* If we have a target key where to store the resulting set
5253 * create this key with an empty set inside */
5254 dstset
= createSetObject();
5257 /* Iterate all the elements of the first (smallest) set, and test
5258 * the element against all the other sets, if at least one set does
5259 * not include the element it is discarded */
5260 di
= dictGetIterator(dv
[0]);
5262 while((de
= dictNext(di
)) != NULL
) {
5265 for (j
= 1; j
< setsnum
; j
++)
5266 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
5268 continue; /* at least one set does not contain the member */
5269 ele
= dictGetEntryKey(de
);
5271 addReplyBulk(c
,ele
);
5274 dictAdd(dstset
->ptr
,ele
,NULL
);
5278 dictReleaseIterator(di
);
5281 /* Store the resulting set into the target, if the intersection
5282 * is not an empty set. */
5283 deleteKey(c
->db
,dstkey
);
5284 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5285 dictAdd(c
->db
->dict
,dstkey
,dstset
);
5286 incrRefCount(dstkey
);
5287 addReplyLongLong(c
,dictSize((dict
*)dstset
->ptr
));
5289 decrRefCount(dstset
);
5290 addReply(c
,shared
.czero
);
5294 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
5299 static void sinterCommand(redisClient
*c
) {
5300 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
5303 static void sinterstoreCommand(redisClient
*c
) {
5304 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
5307 #define REDIS_OP_UNION 0
5308 #define REDIS_OP_DIFF 1
5309 #define REDIS_OP_INTER 2
5311 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
5312 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
5315 robj
*dstset
= NULL
;
5316 int j
, cardinality
= 0;
5318 for (j
= 0; j
< setsnum
; j
++) {
5322 lookupKeyWrite(c
->db
,setskeys
[j
]) :
5323 lookupKeyRead(c
->db
,setskeys
[j
]);
5328 if (setobj
->type
!= REDIS_SET
) {
5330 addReply(c
,shared
.wrongtypeerr
);
5333 dv
[j
] = setobj
->ptr
;
5336 /* We need a temp set object to store our union. If the dstkey
5337 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
5338 * this set object will be the resulting object to set into the target key*/
5339 dstset
= createSetObject();
5341 /* Iterate all the elements of all the sets, add every element a single
5342 * time to the result set */
5343 for (j
= 0; j
< setsnum
; j
++) {
5344 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
5345 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
5347 di
= dictGetIterator(dv
[j
]);
5349 while((de
= dictNext(di
)) != NULL
) {
5352 /* dictAdd will not add the same element multiple times */
5353 ele
= dictGetEntryKey(de
);
5354 if (op
== REDIS_OP_UNION
|| j
== 0) {
5355 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
5359 } else if (op
== REDIS_OP_DIFF
) {
5360 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
5365 dictReleaseIterator(di
);
5367 /* result set is empty? Exit asap. */
5368 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
5371 /* Output the content of the resulting set, if not in STORE mode */
5373 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
5374 di
= dictGetIterator(dstset
->ptr
);
5375 while((de
= dictNext(di
)) != NULL
) {
5378 ele
= dictGetEntryKey(de
);
5379 addReplyBulk(c
,ele
);
5381 dictReleaseIterator(di
);
5382 decrRefCount(dstset
);
5384 /* If we have a target key where to store the resulting set
5385 * create this key with the result set inside */
5386 deleteKey(c
->db
,dstkey
);
5387 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5388 dictAdd(c
->db
->dict
,dstkey
,dstset
);
5389 incrRefCount(dstkey
);
5390 addReplyLongLong(c
,dictSize((dict
*)dstset
->ptr
));
5392 decrRefCount(dstset
);
5393 addReply(c
,shared
.czero
);
5400 static void sunionCommand(redisClient
*c
) {
5401 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
5404 static void sunionstoreCommand(redisClient
*c
) {
5405 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
5408 static void sdiffCommand(redisClient
*c
) {
5409 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
5412 static void sdiffstoreCommand(redisClient
*c
) {
5413 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
5416 /* ==================================== ZSets =============================== */
5418 /* ZSETs are ordered sets using two data structures to hold the same elements
5419 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
5422 * The elements are added to an hash table mapping Redis objects to scores.
5423 * At the same time the elements are added to a skip list mapping scores
5424 * to Redis objects (so objects are sorted by scores in this "view"). */
5426 /* This skiplist implementation is almost a C translation of the original
5427 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
5428 * Alternative to Balanced Trees", modified in three ways:
5429 * a) this implementation allows for repeated values.
5430 * b) the comparison is not just by key (our 'score') but by satellite data.
5431 * c) there is a back pointer, so it's a doubly linked list with the back
5432 * pointers being only at "level 1". This allows to traverse the list
5433 * from tail to head, useful for ZREVRANGE. */
5435 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
5436 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
5438 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
5440 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
5448 static zskiplist
*zslCreate(void) {
5452 zsl
= zmalloc(sizeof(*zsl
));
5455 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
5456 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
5457 zsl
->header
->forward
[j
] = NULL
;
5459 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
5460 if (j
< ZSKIPLIST_MAXLEVEL
-1)
5461 zsl
->header
->span
[j
] = 0;
5463 zsl
->header
->backward
= NULL
;
5468 static void zslFreeNode(zskiplistNode
*node
) {
5469 decrRefCount(node
->obj
);
5470 zfree(node
->forward
);
5475 static void zslFree(zskiplist
*zsl
) {
5476 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
5478 zfree(zsl
->header
->forward
);
5479 zfree(zsl
->header
->span
);
5482 next
= node
->forward
[0];
5489 static int zslRandomLevel(void) {
5491 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
5493 return (level
<ZSKIPLIST_MAXLEVEL
) ? level
: ZSKIPLIST_MAXLEVEL
;
5496 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
5497 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5498 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
5502 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5503 /* store rank that is crossed to reach the insert position */
5504 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
5506 while (x
->forward
[i
] &&
5507 (x
->forward
[i
]->score
< score
||
5508 (x
->forward
[i
]->score
== score
&&
5509 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
5510 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
5515 /* we assume the key is not already inside, since we allow duplicated
5516 * scores, and the re-insertion of score and redis object should never
5517 * happpen since the caller of zslInsert() should test in the hash table
5518 * if the element is already inside or not. */
5519 level
= zslRandomLevel();
5520 if (level
> zsl
->level
) {
5521 for (i
= zsl
->level
; i
< level
; i
++) {
5523 update
[i
] = zsl
->header
;
5524 update
[i
]->span
[i
-1] = zsl
->length
;
5528 x
= zslCreateNode(level
,score
,obj
);
5529 for (i
= 0; i
< level
; i
++) {
5530 x
->forward
[i
] = update
[i
]->forward
[i
];
5531 update
[i
]->forward
[i
] = x
;
5533 /* update span covered by update[i] as x is inserted here */
5535 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5536 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5540 /* increment span for untouched levels */
5541 for (i
= level
; i
< zsl
->level
; i
++) {
5542 update
[i
]->span
[i
-1]++;
5545 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
5547 x
->forward
[0]->backward
= x
;
5553 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
5554 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
5556 for (i
= 0; i
< zsl
->level
; i
++) {
5557 if (update
[i
]->forward
[i
] == x
) {
5559 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5561 update
[i
]->forward
[i
] = x
->forward
[i
];
5563 /* invariant: i > 0, because update[0]->forward[0]
5564 * is always equal to x */
5565 update
[i
]->span
[i
-1] -= 1;
5568 if (x
->forward
[0]) {
5569 x
->forward
[0]->backward
= x
->backward
;
5571 zsl
->tail
= x
->backward
;
5573 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5578 /* Delete an element with matching score/object from the skiplist. */
5579 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
5580 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5584 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5585 while (x
->forward
[i
] &&
5586 (x
->forward
[i
]->score
< score
||
5587 (x
->forward
[i
]->score
== score
&&
5588 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
5592 /* We may have multiple elements with the same score, what we need
5593 * is to find the element with both the right score and object. */
5595 if (x
&& score
== x
->score
&& equalStringObjects(x
->obj
,obj
)) {
5596 zslDeleteNode(zsl
, x
, update
);
5600 return 0; /* not found */
5602 return 0; /* not found */
5605 /* Delete all the elements with score between min and max from the skiplist.
5606 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5607 * Note that this function takes the reference to the hash table view of the
5608 * sorted set, in order to remove the elements from the hash table too. */
5609 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
5610 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5611 unsigned long removed
= 0;
5615 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5616 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
5620 /* We may have multiple elements with the same score, what we need
5621 * is to find the element with both the right score and object. */
5623 while (x
&& x
->score
<= max
) {
5624 zskiplistNode
*next
= x
->forward
[0];
5625 zslDeleteNode(zsl
, x
, update
);
5626 dictDelete(dict
,x
->obj
);
5631 return removed
; /* not found */
5634 /* Delete all the elements with rank between start and end from the skiplist.
5635 * Start and end are inclusive. Note that start and end need to be 1-based */
5636 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
5637 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5638 unsigned long traversed
= 0, removed
= 0;
5642 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5643 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
5644 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5652 while (x
&& traversed
<= end
) {
5653 zskiplistNode
*next
= x
->forward
[0];
5654 zslDeleteNode(zsl
, x
, update
);
5655 dictDelete(dict
,x
->obj
);
5664 /* Find the first node having a score equal or greater than the specified one.
5665 * Returns NULL if there is no match. */
5666 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
5671 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5672 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
5675 /* We may have multiple elements with the same score, what we need
5676 * is to find the element with both the right score and object. */
5677 return x
->forward
[0];
5680 /* Find the rank for an element by both score and key.
5681 * Returns 0 when the element cannot be found, rank otherwise.
5682 * Note that the rank is 1-based due to the span of zsl->header to the
5684 static unsigned long zslGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
5686 unsigned long rank
= 0;
5690 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5691 while (x
->forward
[i
] &&
5692 (x
->forward
[i
]->score
< score
||
5693 (x
->forward
[i
]->score
== score
&&
5694 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
5695 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
5699 /* x might be equal to zsl->header, so test if obj is non-NULL */
5700 if (x
->obj
&& equalStringObjects(x
->obj
,o
)) {
5707 /* Finds an element by its rank. The rank argument needs to be 1-based. */
5708 zskiplistNode
* zslGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
5710 unsigned long traversed
= 0;
5714 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5715 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
5717 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5720 if (traversed
== rank
) {
5727 /* The actual Z-commands implementations */
5729 /* This generic command implements both ZADD and ZINCRBY.
5730 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
5731 * the increment if the operation is a ZINCRBY (doincrement == 1). */
5732 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
5737 zsetobj
= lookupKeyWrite(c
->db
,key
);
5738 if (zsetobj
== NULL
) {
5739 zsetobj
= createZsetObject();
5740 dictAdd(c
->db
->dict
,key
,zsetobj
);
5743 if (zsetobj
->type
!= REDIS_ZSET
) {
5744 addReply(c
,shared
.wrongtypeerr
);
5750 /* Ok now since we implement both ZADD and ZINCRBY here the code
5751 * needs to handle the two different conditions. It's all about setting
5752 * '*score', that is, the new score to set, to the right value. */
5753 score
= zmalloc(sizeof(double));
5757 /* Read the old score. If the element was not present starts from 0 */
5758 de
= dictFind(zs
->dict
,ele
);
5760 double *oldscore
= dictGetEntryVal(de
);
5761 *score
= *oldscore
+ scoreval
;
5769 /* What follows is a simple remove and re-insert operation that is common
5770 * to both ZADD and ZINCRBY... */
5771 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
5772 /* case 1: New element */
5773 incrRefCount(ele
); /* added to hash */
5774 zslInsert(zs
->zsl
,*score
,ele
);
5775 incrRefCount(ele
); /* added to skiplist */
5778 addReplyDouble(c
,*score
);
5780 addReply(c
,shared
.cone
);
5785 /* case 2: Score update operation */
5786 de
= dictFind(zs
->dict
,ele
);
5787 redisAssert(de
!= NULL
);
5788 oldscore
= dictGetEntryVal(de
);
5789 if (*score
!= *oldscore
) {
5792 /* Remove and insert the element in the skip list with new score */
5793 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
5794 redisAssert(deleted
!= 0);
5795 zslInsert(zs
->zsl
,*score
,ele
);
5797 /* Update the score in the hash table */
5798 dictReplace(zs
->dict
,ele
,score
);
5804 addReplyDouble(c
,*score
);
5806 addReply(c
,shared
.czero
);
5810 static void zaddCommand(redisClient
*c
) {
5813 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
5814 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
5817 static void zincrbyCommand(redisClient
*c
) {
5820 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
5821 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
5824 static void zremCommand(redisClient
*c
) {
5831 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5832 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5835 de
= dictFind(zs
->dict
,c
->argv
[2]);
5837 addReply(c
,shared
.czero
);
5840 /* Delete from the skiplist */
5841 oldscore
= dictGetEntryVal(de
);
5842 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
5843 redisAssert(deleted
!= 0);
5845 /* Delete from the hash table */
5846 dictDelete(zs
->dict
,c
->argv
[2]);
5847 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5848 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5850 addReply(c
,shared
.cone
);
5853 static void zremrangebyscoreCommand(redisClient
*c
) {
5860 if ((getDoubleFromObjectOrReply(c
, c
->argv
[2], &min
, NULL
) != REDIS_OK
) ||
5861 (getDoubleFromObjectOrReply(c
, c
->argv
[3], &max
, NULL
) != REDIS_OK
)) return;
5863 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5864 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5867 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
5868 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5869 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5870 server
.dirty
+= deleted
;
5871 addReplyLongLong(c
,deleted
);
5874 static void zremrangebyrankCommand(redisClient
*c
) {
5882 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
5883 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
5885 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5886 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5888 llen
= zs
->zsl
->length
;
5890 /* convert negative indexes */
5891 if (start
< 0) start
= llen
+start
;
5892 if (end
< 0) end
= llen
+end
;
5893 if (start
< 0) start
= 0;
5894 if (end
< 0) end
= 0;
5896 /* indexes sanity checks */
5897 if (start
> end
|| start
>= llen
) {
5898 addReply(c
,shared
.czero
);
5901 if (end
>= llen
) end
= llen
-1;
5903 /* increment start and end because zsl*Rank functions
5904 * use 1-based rank */
5905 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
5906 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5907 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5908 server
.dirty
+= deleted
;
5909 addReplyLongLong(c
, deleted
);
5917 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
5918 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
5919 unsigned long size1
, size2
;
5920 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
5921 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
5922 return size1
- size2
;
5925 #define REDIS_AGGR_SUM 1
5926 #define REDIS_AGGR_MIN 2
5927 #define REDIS_AGGR_MAX 3
5928 #define zunionInterDictValue(_e) (dictGetEntryVal(_e) == NULL ? 1.0 : *(double*)dictGetEntryVal(_e))
5930 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
5931 if (aggregate
== REDIS_AGGR_SUM
) {
5932 *target
= *target
+ val
;
5933 } else if (aggregate
== REDIS_AGGR_MIN
) {
5934 *target
= val
< *target
? val
: *target
;
5935 } else if (aggregate
== REDIS_AGGR_MAX
) {
5936 *target
= val
> *target
? val
: *target
;
5939 redisPanic("Unknown ZUNION/INTER aggregate type");
5943 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
5945 int aggregate
= REDIS_AGGR_SUM
;
5952 /* expect setnum input keys to be given */
5953 setnum
= atoi(c
->argv
[2]->ptr
);
5955 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNIONSTORE/ZINTERSTORE\r\n"));
5959 /* test if the expected number of keys would overflow */
5960 if (3+setnum
> c
->argc
) {
5961 addReply(c
,shared
.syntaxerr
);
5965 /* read keys to be used for input */
5966 src
= zmalloc(sizeof(zsetopsrc
) * setnum
);
5967 for (i
= 0, j
= 3; i
< setnum
; i
++, j
++) {
5968 robj
*obj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
5972 if (obj
->type
== REDIS_ZSET
) {
5973 src
[i
].dict
= ((zset
*)obj
->ptr
)->dict
;
5974 } else if (obj
->type
== REDIS_SET
) {
5975 src
[i
].dict
= (obj
->ptr
);
5978 addReply(c
,shared
.wrongtypeerr
);
5983 /* default all weights to 1 */
5984 src
[i
].weight
= 1.0;
5987 /* parse optional extra arguments */
5989 int remaining
= c
->argc
- j
;
5992 if (remaining
>= (setnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
5994 for (i
= 0; i
< setnum
; i
++, j
++, remaining
--) {
5995 if (getDoubleFromObjectOrReply(c
, c
->argv
[j
], &src
[i
].weight
, NULL
) != REDIS_OK
)
5998 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
6000 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
6001 aggregate
= REDIS_AGGR_SUM
;
6002 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
6003 aggregate
= REDIS_AGGR_MIN
;
6004 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
6005 aggregate
= REDIS_AGGR_MAX
;
6008 addReply(c
,shared
.syntaxerr
);
6014 addReply(c
,shared
.syntaxerr
);
6020 /* sort sets from the smallest to largest, this will improve our
6021 * algorithm's performance */
6022 qsort(src
,setnum
,sizeof(zsetopsrc
),qsortCompareZsetopsrcByCardinality
);
6024 dstobj
= createZsetObject();
6025 dstzset
= dstobj
->ptr
;
6027 if (op
== REDIS_OP_INTER
) {
6028 /* skip going over all entries if the smallest zset is NULL or empty */
6029 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
6030 /* precondition: as src[0].dict is non-empty and the zsets are ordered
6031 * from small to large, all src[i > 0].dict are non-empty too */
6032 di
= dictGetIterator(src
[0].dict
);
6033 while((de
= dictNext(di
)) != NULL
) {
6034 double *score
= zmalloc(sizeof(double)), value
;
6035 *score
= src
[0].weight
* zunionInterDictValue(de
);
6037 for (j
= 1; j
< setnum
; j
++) {
6038 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
6040 value
= src
[j
].weight
* zunionInterDictValue(other
);
6041 zunionInterAggregate(score
, value
, aggregate
);
6047 /* skip entry when not present in every source dict */
6051 robj
*o
= dictGetEntryKey(de
);
6052 dictAdd(dstzset
->dict
,o
,score
);
6053 incrRefCount(o
); /* added to dictionary */
6054 zslInsert(dstzset
->zsl
,*score
,o
);
6055 incrRefCount(o
); /* added to skiplist */
6058 dictReleaseIterator(di
);
6060 } else if (op
== REDIS_OP_UNION
) {
6061 for (i
= 0; i
< setnum
; i
++) {
6062 if (!src
[i
].dict
) continue;
6064 di
= dictGetIterator(src
[i
].dict
);
6065 while((de
= dictNext(di
)) != NULL
) {
6066 /* skip key when already processed */
6067 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
6069 double *score
= zmalloc(sizeof(double)), value
;
6070 *score
= src
[i
].weight
* zunionInterDictValue(de
);
6072 /* because the zsets are sorted by size, its only possible
6073 * for sets at larger indices to hold this entry */
6074 for (j
= (i
+1); j
< setnum
; j
++) {
6075 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
6077 value
= src
[j
].weight
* zunionInterDictValue(other
);
6078 zunionInterAggregate(score
, value
, aggregate
);
6082 robj
*o
= dictGetEntryKey(de
);
6083 dictAdd(dstzset
->dict
,o
,score
);
6084 incrRefCount(o
); /* added to dictionary */
6085 zslInsert(dstzset
->zsl
,*score
,o
);
6086 incrRefCount(o
); /* added to skiplist */
6088 dictReleaseIterator(di
);
6091 /* unknown operator */
6092 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
6095 deleteKey(c
->db
,dstkey
);
6096 if (dstzset
->zsl
->length
) {
6097 dictAdd(c
->db
->dict
,dstkey
,dstobj
);
6098 incrRefCount(dstkey
);
6099 addReplyLongLong(c
, dstzset
->zsl
->length
);
6102 decrRefCount(dstobj
);
6103 addReply(c
, shared
.czero
);
6108 static void zunionstoreCommand(redisClient
*c
) {
6109 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
6112 static void zinterstoreCommand(redisClient
*c
) {
6113 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
6116 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
6128 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
6129 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
6131 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
6133 } else if (c
->argc
>= 5) {
6134 addReply(c
,shared
.syntaxerr
);
6138 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6139 || checkType(c
,o
,REDIS_ZSET
)) return;
6144 /* convert negative indexes */
6145 if (start
< 0) start
= llen
+start
;
6146 if (end
< 0) end
= llen
+end
;
6147 if (start
< 0) start
= 0;
6148 if (end
< 0) end
= 0;
6150 /* indexes sanity checks */
6151 if (start
> end
|| start
>= llen
) {
6152 /* Out of range start or start > end result in empty list */
6153 addReply(c
,shared
.emptymultibulk
);
6156 if (end
>= llen
) end
= llen
-1;
6157 rangelen
= (end
-start
)+1;
6159 /* check if starting point is trivial, before searching
6160 * the element in log(N) time */
6162 ln
= start
== 0 ? zsl
->tail
: zslGetElementByRank(zsl
, llen
-start
);
6165 zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1);
6168 /* Return the result in form of a multi-bulk reply */
6169 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
6170 withscores
? (rangelen
*2) : rangelen
));
6171 for (j
= 0; j
< rangelen
; j
++) {
6173 addReplyBulk(c
,ele
);
6175 addReplyDouble(c
,ln
->score
);
6176 ln
= reverse
? ln
->backward
: ln
->forward
[0];
6180 static void zrangeCommand(redisClient
*c
) {
6181 zrangeGenericCommand(c
,0);
6184 static void zrevrangeCommand(redisClient
*c
) {
6185 zrangeGenericCommand(c
,1);
6188 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
6189 * If justcount is non-zero, just the count is returned. */
6190 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
6193 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
6194 int offset
= 0, limit
= -1;
6198 /* Parse the min-max interval. If one of the values is prefixed
6199 * by the "(" character, it's considered "open". For instance
6200 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
6201 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
6202 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
6203 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
6206 min
= strtod(c
->argv
[2]->ptr
,NULL
);
6208 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
6209 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
6212 max
= strtod(c
->argv
[3]->ptr
,NULL
);
6215 /* Parse "WITHSCORES": note that if the command was called with
6216 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
6217 * enter the following paths to parse WITHSCORES and LIMIT. */
6218 if (c
->argc
== 5 || c
->argc
== 8) {
6219 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
6224 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
6228 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
6233 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
6234 addReply(c
,shared
.syntaxerr
);
6236 } else if (c
->argc
== (7 + withscores
)) {
6237 offset
= atoi(c
->argv
[5]->ptr
);
6238 limit
= atoi(c
->argv
[6]->ptr
);
6239 if (offset
< 0) offset
= 0;
6242 /* Ok, lookup the key and get the range */
6243 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
6245 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6247 if (o
->type
!= REDIS_ZSET
) {
6248 addReply(c
,shared
.wrongtypeerr
);
6250 zset
*zsetobj
= o
->ptr
;
6251 zskiplist
*zsl
= zsetobj
->zsl
;
6253 robj
*ele
, *lenobj
= NULL
;
6254 unsigned long rangelen
= 0;
6256 /* Get the first node with the score >= min, or with
6257 * score > min if 'minex' is true. */
6258 ln
= zslFirstWithScore(zsl
,min
);
6259 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
6262 /* No element matching the speciifed interval */
6263 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6267 /* We don't know in advance how many matching elements there
6268 * are in the list, so we push this object that will represent
6269 * the multi-bulk length in the output buffer, and will "fix"
6272 lenobj
= createObject(REDIS_STRING
,NULL
);
6274 decrRefCount(lenobj
);
6277 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
6280 ln
= ln
->forward
[0];
6283 if (limit
== 0) break;
6286 addReplyBulk(c
,ele
);
6288 addReplyDouble(c
,ln
->score
);
6290 ln
= ln
->forward
[0];
6292 if (limit
> 0) limit
--;
6295 addReplyLongLong(c
,(long)rangelen
);
6297 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
6298 withscores
? (rangelen
*2) : rangelen
);
6304 static void zrangebyscoreCommand(redisClient
*c
) {
6305 genericZrangebyscoreCommand(c
,0);
6308 static void zcountCommand(redisClient
*c
) {
6309 genericZrangebyscoreCommand(c
,1);
6312 static void zcardCommand(redisClient
*c
) {
6316 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6317 checkType(c
,o
,REDIS_ZSET
)) return;
6320 addReplyUlong(c
,zs
->zsl
->length
);
6323 static void zscoreCommand(redisClient
*c
) {
6328 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6329 checkType(c
,o
,REDIS_ZSET
)) return;
6332 de
= dictFind(zs
->dict
,c
->argv
[2]);
6334 addReply(c
,shared
.nullbulk
);
6336 double *score
= dictGetEntryVal(de
);
6338 addReplyDouble(c
,*score
);
6342 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
6350 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6351 checkType(c
,o
,REDIS_ZSET
)) return;
6355 de
= dictFind(zs
->dict
,c
->argv
[2]);
6357 addReply(c
,shared
.nullbulk
);
6361 score
= dictGetEntryVal(de
);
6362 rank
= zslGetRank(zsl
, *score
, c
->argv
[2]);
6365 addReplyLongLong(c
, zsl
->length
- rank
);
6367 addReplyLongLong(c
, rank
-1);
6370 addReply(c
,shared
.nullbulk
);
6374 static void zrankCommand(redisClient
*c
) {
6375 zrankGenericCommand(c
, 0);
6378 static void zrevrankCommand(redisClient
*c
) {
6379 zrankGenericCommand(c
, 1);
6382 /* ========================= Hashes utility functions ======================= */
6383 #define REDIS_HASH_KEY 1
6384 #define REDIS_HASH_VALUE 2
6386 /* Check the length of a number of objects to see if we need to convert a
6387 * zipmap to a real hash. Note that we only check string encoded objects
6388 * as their string length can be queried in constant time. */
6389 static void hashTryConversion(robj
*subject
, robj
**argv
, int start
, int end
) {
6391 if (subject
->encoding
!= REDIS_ENCODING_ZIPMAP
) return;
6393 for (i
= start
; i
<= end
; i
++) {
6394 if (argv
[i
]->encoding
== REDIS_ENCODING_RAW
&&
6395 sdslen(argv
[i
]->ptr
) > server
.hash_max_zipmap_value
)
6397 convertToRealHash(subject
);
6403 /* Encode given objects in-place when the hash uses a dict. */
6404 static void hashTryObjectEncoding(robj
*subject
, robj
**o1
, robj
**o2
) {
6405 if (subject
->encoding
== REDIS_ENCODING_HT
) {
6406 if (o1
) *o1
= tryObjectEncoding(*o1
);
6407 if (o2
) *o2
= tryObjectEncoding(*o2
);
6411 /* Get the value from a hash identified by key. Returns either a string
6412 * object or NULL if the value cannot be found. The refcount of the object
6413 * is always increased by 1 when the value was found. */
6414 static robj
*hashGet(robj
*o
, robj
*key
) {
6416 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6419 key
= getDecodedObject(key
);
6420 if (zipmapGet(o
->ptr
,key
->ptr
,sdslen(key
->ptr
),&v
,&vlen
)) {
6421 value
= createStringObject((char*)v
,vlen
);
6425 dictEntry
*de
= dictFind(o
->ptr
,key
);
6427 value
= dictGetEntryVal(de
);
6428 incrRefCount(value
);
6434 /* Test if the key exists in the given hash. Returns 1 if the key
6435 * exists and 0 when it doesn't. */
6436 static int hashExists(robj
*o
, robj
*key
) {
6437 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6438 key
= getDecodedObject(key
);
6439 if (zipmapExists(o
->ptr
,key
->ptr
,sdslen(key
->ptr
))) {
6445 if (dictFind(o
->ptr
,key
) != NULL
) {
6452 /* Add an element, discard the old if the key already exists.
6453 * Return 0 on insert and 1 on update. */
6454 static int hashSet(robj
*o
, robj
*key
, robj
*value
) {
6456 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6457 key
= getDecodedObject(key
);
6458 value
= getDecodedObject(value
);
6459 o
->ptr
= zipmapSet(o
->ptr
,
6460 key
->ptr
,sdslen(key
->ptr
),
6461 value
->ptr
,sdslen(value
->ptr
), &update
);
6463 decrRefCount(value
);
6465 /* Check if the zipmap needs to be upgraded to a real hash table */
6466 if (zipmapLen(o
->ptr
) > server
.hash_max_zipmap_entries
)
6467 convertToRealHash(o
);
6469 if (dictReplace(o
->ptr
,key
,value
)) {
6476 incrRefCount(value
);
6481 /* Delete an element from a hash.
6482 * Return 1 on deleted and 0 on not found. */
6483 static int hashDelete(robj
*o
, robj
*key
) {
6485 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6486 key
= getDecodedObject(key
);
6487 o
->ptr
= zipmapDel(o
->ptr
,key
->ptr
,sdslen(key
->ptr
), &deleted
);
6490 deleted
= dictDelete((dict
*)o
->ptr
,key
) == DICT_OK
;
6491 /* Always check if the dictionary needs a resize after a delete. */
6492 if (deleted
&& htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
6497 /* Return the number of elements in a hash. */
6498 static unsigned long hashLength(robj
*o
) {
6499 return (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
6500 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
6503 /* Structure to hold hash iteration abstration. Note that iteration over
6504 * hashes involves both fields and values. Because it is possible that
6505 * not both are required, store pointers in the iterator to avoid
6506 * unnecessary memory allocation for fields/values. */
6510 unsigned char *zk
, *zv
;
6511 unsigned int zklen
, zvlen
;
6517 static hashIterator
*hashInitIterator(robj
*subject
) {
6518 hashIterator
*hi
= zmalloc(sizeof(hashIterator
));
6519 hi
->encoding
= subject
->encoding
;
6520 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6521 hi
->zi
= zipmapRewind(subject
->ptr
);
6522 } else if (hi
->encoding
== REDIS_ENCODING_HT
) {
6523 hi
->di
= dictGetIterator(subject
->ptr
);
6530 static void hashReleaseIterator(hashIterator
*hi
) {
6531 if (hi
->encoding
== REDIS_ENCODING_HT
) {
6532 dictReleaseIterator(hi
->di
);
6537 /* Move to the next entry in the hash. Return REDIS_OK when the next entry
6538 * could be found and REDIS_ERR when the iterator reaches the end. */
6539 static int hashNext(hashIterator
*hi
) {
6540 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6541 if ((hi
->zi
= zipmapNext(hi
->zi
, &hi
->zk
, &hi
->zklen
,
6542 &hi
->zv
, &hi
->zvlen
)) == NULL
) return REDIS_ERR
;
6544 if ((hi
->de
= dictNext(hi
->di
)) == NULL
) return REDIS_ERR
;
6549 /* Get key or value object at current iteration position.
6550 * This increases the refcount of the field object by 1. */
6551 static robj
*hashCurrent(hashIterator
*hi
, int what
) {
6553 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6554 if (what
& REDIS_HASH_KEY
) {
6555 o
= createStringObject((char*)hi
->zk
,hi
->zklen
);
6557 o
= createStringObject((char*)hi
->zv
,hi
->zvlen
);
6560 if (what
& REDIS_HASH_KEY
) {
6561 o
= dictGetEntryKey(hi
->de
);
6563 o
= dictGetEntryVal(hi
->de
);
6570 static robj
*hashLookupWriteOrCreate(redisClient
*c
, robj
*key
) {
6571 robj
*o
= lookupKeyWrite(c
->db
,key
);
6573 o
= createHashObject();
6574 dictAdd(c
->db
->dict
,key
,o
);
6577 if (o
->type
!= REDIS_HASH
) {
6578 addReply(c
,shared
.wrongtypeerr
);
6585 /* ============================= Hash commands ============================== */
6586 static void hsetCommand(redisClient
*c
) {
6590 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6591 hashTryConversion(o
,c
->argv
,2,3);
6592 hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
6593 update
= hashSet(o
,c
->argv
[2],c
->argv
[3]);
6594 addReply(c
, update
? shared
.czero
: shared
.cone
);
6598 static void hsetnxCommand(redisClient
*c
) {
6600 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6601 hashTryConversion(o
,c
->argv
,2,3);
6603 if (hashExists(o
, c
->argv
[2])) {
6604 addReply(c
, shared
.czero
);
6606 hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
6607 hashSet(o
,c
->argv
[2],c
->argv
[3]);
6608 addReply(c
, shared
.cone
);
6613 static void hmsetCommand(redisClient
*c
) {
6617 if ((c
->argc
% 2) == 1) {
6618 addReplySds(c
,sdsnew("-ERR wrong number of arguments for HMSET\r\n"));
6622 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6623 hashTryConversion(o
,c
->argv
,2,c
->argc
-1);
6624 for (i
= 2; i
< c
->argc
; i
+= 2) {
6625 hashTryObjectEncoding(o
,&c
->argv
[i
], &c
->argv
[i
+1]);
6626 hashSet(o
,c
->argv
[i
],c
->argv
[i
+1]);
6628 addReply(c
, shared
.ok
);
6632 static void hincrbyCommand(redisClient
*c
) {
6633 long long value
, incr
;
6634 robj
*o
, *current
, *new;
6636 if (getLongLongFromObjectOrReply(c
,c
->argv
[3],&incr
,NULL
) != REDIS_OK
) return;
6637 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6638 if ((current
= hashGet(o
,c
->argv
[2])) != NULL
) {
6639 if (getLongLongFromObjectOrReply(c
,current
,&value
,
6640 "hash value is not an integer") != REDIS_OK
) {
6641 decrRefCount(current
);
6644 decrRefCount(current
);
6650 new = createStringObjectFromLongLong(value
);
6651 hashTryObjectEncoding(o
,&c
->argv
[2],NULL
);
6652 hashSet(o
,c
->argv
[2],new);
6654 addReplyLongLong(c
,value
);
6658 static void hgetCommand(redisClient
*c
) {
6660 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6661 checkType(c
,o
,REDIS_HASH
)) return;
6663 if ((value
= hashGet(o
,c
->argv
[2])) != NULL
) {
6664 addReplyBulk(c
,value
);
6665 decrRefCount(value
);
6667 addReply(c
,shared
.nullbulk
);
6671 static void hmgetCommand(redisClient
*c
) {
6674 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
6675 if (o
!= NULL
&& o
->type
!= REDIS_HASH
) {
6676 addReply(c
,shared
.wrongtypeerr
);
6679 /* Note the check for o != NULL happens inside the loop. This is
6680 * done because objects that cannot be found are considered to be
6681 * an empty hash. The reply should then be a series of NULLs. */
6682 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-2));
6683 for (i
= 2; i
< c
->argc
; i
++) {
6684 if (o
!= NULL
&& (value
= hashGet(o
,c
->argv
[i
])) != NULL
) {
6685 addReplyBulk(c
,value
);
6686 decrRefCount(value
);
6688 addReply(c
,shared
.nullbulk
);
6693 static void hdelCommand(redisClient
*c
) {
6695 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6696 checkType(c
,o
,REDIS_HASH
)) return;
6698 if (hashDelete(o
,c
->argv
[2])) {
6699 if (hashLength(o
) == 0) deleteKey(c
->db
,c
->argv
[1]);
6700 addReply(c
,shared
.cone
);
6703 addReply(c
,shared
.czero
);
6707 static void hlenCommand(redisClient
*c
) {
6709 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6710 checkType(c
,o
,REDIS_HASH
)) return;
6712 addReplyUlong(c
,hashLength(o
));
6715 static void genericHgetallCommand(redisClient
*c
, int flags
) {
6716 robj
*o
, *lenobj
, *obj
;
6717 unsigned long count
= 0;
6720 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6721 || checkType(c
,o
,REDIS_HASH
)) return;
6723 lenobj
= createObject(REDIS_STRING
,NULL
);
6725 decrRefCount(lenobj
);
6727 hi
= hashInitIterator(o
);
6728 while (hashNext(hi
) != REDIS_ERR
) {
6729 if (flags
& REDIS_HASH_KEY
) {
6730 obj
= hashCurrent(hi
,REDIS_HASH_KEY
);
6731 addReplyBulk(c
,obj
);
6735 if (flags
& REDIS_HASH_VALUE
) {
6736 obj
= hashCurrent(hi
,REDIS_HASH_VALUE
);
6737 addReplyBulk(c
,obj
);
6742 hashReleaseIterator(hi
);
6744 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
6747 static void hkeysCommand(redisClient
*c
) {
6748 genericHgetallCommand(c
,REDIS_HASH_KEY
);
6751 static void hvalsCommand(redisClient
*c
) {
6752 genericHgetallCommand(c
,REDIS_HASH_VALUE
);
6755 static void hgetallCommand(redisClient
*c
) {
6756 genericHgetallCommand(c
,REDIS_HASH_KEY
|REDIS_HASH_VALUE
);
6759 static void hexistsCommand(redisClient
*c
) {
6761 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6762 checkType(c
,o
,REDIS_HASH
)) return;
6764 addReply(c
, hashExists(o
,c
->argv
[2]) ? shared
.cone
: shared
.czero
);
6767 static void convertToRealHash(robj
*o
) {
6768 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
6769 unsigned int klen
, vlen
;
6770 dict
*dict
= dictCreate(&hashDictType
,NULL
);
6772 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
6773 p
= zipmapRewind(zm
);
6774 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
6775 robj
*keyobj
, *valobj
;
6777 keyobj
= createStringObject((char*)key
,klen
);
6778 valobj
= createStringObject((char*)val
,vlen
);
6779 keyobj
= tryObjectEncoding(keyobj
);
6780 valobj
= tryObjectEncoding(valobj
);
6781 dictAdd(dict
,keyobj
,valobj
);
6783 o
->encoding
= REDIS_ENCODING_HT
;
6788 /* ========================= Non type-specific commands ==================== */
6790 static void flushdbCommand(redisClient
*c
) {
6791 server
.dirty
+= dictSize(c
->db
->dict
);
6792 touchWatchedKeysOnFlush(c
->db
->id
);
6793 dictEmpty(c
->db
->dict
);
6794 dictEmpty(c
->db
->expires
);
6795 addReply(c
,shared
.ok
);
6798 static void flushallCommand(redisClient
*c
) {
6799 touchWatchedKeysOnFlush(-1);
6800 server
.dirty
+= emptyDb();
6801 addReply(c
,shared
.ok
);
6802 if (server
.bgsavechildpid
!= -1) {
6803 kill(server
.bgsavechildpid
,SIGKILL
);
6804 rdbRemoveTempFile(server
.bgsavechildpid
);
6806 rdbSave(server
.dbfilename
);
6810 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
6811 redisSortOperation
*so
= zmalloc(sizeof(*so
));
6813 so
->pattern
= pattern
;
6817 /* Return the value associated to the key with a name obtained
6818 * substituting the first occurence of '*' in 'pattern' with 'subst'.
6819 * The returned object will always have its refcount increased by 1
6820 * when it is non-NULL. */
6821 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
6824 robj keyobj
, fieldobj
, *o
;
6825 int prefixlen
, sublen
, postfixlen
, fieldlen
;
6826 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
6830 char buf
[REDIS_SORTKEY_MAX
+1];
6831 } keyname
, fieldname
;
6833 /* If the pattern is "#" return the substitution object itself in order
6834 * to implement the "SORT ... GET #" feature. */
6835 spat
= pattern
->ptr
;
6836 if (spat
[0] == '#' && spat
[1] == '\0') {
6837 incrRefCount(subst
);
6841 /* The substitution object may be specially encoded. If so we create
6842 * a decoded object on the fly. Otherwise getDecodedObject will just
6843 * increment the ref count, that we'll decrement later. */
6844 subst
= getDecodedObject(subst
);
6847 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
6848 p
= strchr(spat
,'*');
6850 decrRefCount(subst
);
6854 /* Find out if we're dealing with a hash dereference. */
6855 if ((f
= strstr(p
+1, "->")) != NULL
) {
6856 fieldlen
= sdslen(spat
)-(f
-spat
);
6857 /* this also copies \0 character */
6858 memcpy(fieldname
.buf
,f
+2,fieldlen
-1);
6859 fieldname
.len
= fieldlen
-2;
6865 sublen
= sdslen(ssub
);
6866 postfixlen
= sdslen(spat
)-(prefixlen
+1)-fieldlen
;
6867 memcpy(keyname
.buf
,spat
,prefixlen
);
6868 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
6869 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
6870 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
6871 keyname
.len
= prefixlen
+sublen
+postfixlen
;
6872 decrRefCount(subst
);
6874 /* Lookup substituted key */
6875 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2));
6876 o
= lookupKeyRead(db
,&keyobj
);
6877 if (o
== NULL
) return NULL
;
6880 if (o
->type
!= REDIS_HASH
|| fieldname
.len
< 1) return NULL
;
6882 /* Retrieve value from hash by the field name. This operation
6883 * already increases the refcount of the returned object. */
6884 initStaticStringObject(fieldobj
,((char*)&fieldname
)+(sizeof(long)*2));
6885 o
= hashGet(o
, &fieldobj
);
6887 if (o
->type
!= REDIS_STRING
) return NULL
;
6889 /* Every object that this function returns needs to have its refcount
6890 * increased. sortCommand decreases it again. */
6897 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
6898 * the additional parameter is not standard but a BSD-specific we have to
6899 * pass sorting parameters via the global 'server' structure */
6900 static int sortCompare(const void *s1
, const void *s2
) {
6901 const redisSortObject
*so1
= s1
, *so2
= s2
;
6904 if (!server
.sort_alpha
) {
6905 /* Numeric sorting. Here it's trivial as we precomputed scores */
6906 if (so1
->u
.score
> so2
->u
.score
) {
6908 } else if (so1
->u
.score
< so2
->u
.score
) {
6914 /* Alphanumeric sorting */
6915 if (server
.sort_bypattern
) {
6916 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
6917 /* At least one compare object is NULL */
6918 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
6920 else if (so1
->u
.cmpobj
== NULL
)
6925 /* We have both the objects, use strcoll */
6926 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
6929 /* Compare elements directly. */
6930 cmp
= compareStringObjects(so1
->obj
,so2
->obj
);
6933 return server
.sort_desc
? -cmp
: cmp
;
6936 /* The SORT command is the most complex command in Redis. Warning: this code
6937 * is optimized for speed and a bit less for readability */
6938 static void sortCommand(redisClient
*c
) {
6941 int desc
= 0, alpha
= 0;
6942 int limit_start
= 0, limit_count
= -1, start
, end
;
6943 int j
, dontsort
= 0, vectorlen
;
6944 int getop
= 0; /* GET operation counter */
6945 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
6946 redisSortObject
*vector
; /* Resulting vector to sort */
6948 /* Lookup the key to sort. It must be of the right types */
6949 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
6950 if (sortval
== NULL
) {
6951 addReply(c
,shared
.emptymultibulk
);
6954 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
6955 sortval
->type
!= REDIS_ZSET
)
6957 addReply(c
,shared
.wrongtypeerr
);
6961 /* Create a list of operations to perform for every sorted element.
6962 * Operations can be GET/DEL/INCR/DECR */
6963 operations
= listCreate();
6964 listSetFreeMethod(operations
,zfree
);
6967 /* Now we need to protect sortval incrementing its count, in the future
6968 * SORT may have options able to overwrite/delete keys during the sorting
6969 * and the sorted key itself may get destroied */
6970 incrRefCount(sortval
);
6972 /* The SORT command has an SQL-alike syntax, parse it */
6973 while(j
< c
->argc
) {
6974 int leftargs
= c
->argc
-j
-1;
6975 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
6977 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
6979 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
6981 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
6982 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
6983 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
6985 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
6986 storekey
= c
->argv
[j
+1];
6988 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
6989 sortby
= c
->argv
[j
+1];
6990 /* If the BY pattern does not contain '*', i.e. it is constant,
6991 * we don't need to sort nor to lookup the weight keys. */
6992 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
6994 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
6995 listAddNodeTail(operations
,createSortOperation(
6996 REDIS_SORT_GET
,c
->argv
[j
+1]));
7000 decrRefCount(sortval
);
7001 listRelease(operations
);
7002 addReply(c
,shared
.syntaxerr
);
7008 /* Load the sorting vector with all the objects to sort */
7009 switch(sortval
->type
) {
7010 case REDIS_LIST
: vectorlen
= listLength((list
*)sortval
->ptr
); break;
7011 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
7012 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
7013 default: vectorlen
= 0; redisPanic("Bad SORT type"); /* Avoid GCC warning */
7015 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
7018 if (sortval
->type
== REDIS_LIST
) {
7019 list
*list
= sortval
->ptr
;
7023 listRewind(list
,&li
);
7024 while((ln
= listNext(&li
))) {
7025 robj
*ele
= ln
->value
;
7026 vector
[j
].obj
= ele
;
7027 vector
[j
].u
.score
= 0;
7028 vector
[j
].u
.cmpobj
= NULL
;
7036 if (sortval
->type
== REDIS_SET
) {
7039 zset
*zs
= sortval
->ptr
;
7043 di
= dictGetIterator(set
);
7044 while((setele
= dictNext(di
)) != NULL
) {
7045 vector
[j
].obj
= dictGetEntryKey(setele
);
7046 vector
[j
].u
.score
= 0;
7047 vector
[j
].u
.cmpobj
= NULL
;
7050 dictReleaseIterator(di
);
7052 redisAssert(j
== vectorlen
);
7054 /* Now it's time to load the right scores in the sorting vector */
7055 if (dontsort
== 0) {
7056 for (j
= 0; j
< vectorlen
; j
++) {
7059 /* lookup value to sort by */
7060 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
7061 if (!byval
) continue;
7063 /* use object itself to sort by */
7064 byval
= vector
[j
].obj
;
7068 if (sortby
) vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
7070 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
7071 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
7072 } else if (byval
->encoding
== REDIS_ENCODING_INT
) {
7073 /* Don't need to decode the object if it's
7074 * integer-encoded (the only encoding supported) so
7075 * far. We can just cast it */
7076 vector
[j
].u
.score
= (long)byval
->ptr
;
7078 redisAssert(1 != 1);
7082 /* when the object was retrieved using lookupKeyByPattern,
7083 * its refcount needs to be decreased. */
7085 decrRefCount(byval
);
7090 /* We are ready to sort the vector... perform a bit of sanity check
7091 * on the LIMIT option too. We'll use a partial version of quicksort. */
7092 start
= (limit_start
< 0) ? 0 : limit_start
;
7093 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
7094 if (start
>= vectorlen
) {
7095 start
= vectorlen
-1;
7098 if (end
>= vectorlen
) end
= vectorlen
-1;
7100 if (dontsort
== 0) {
7101 server
.sort_desc
= desc
;
7102 server
.sort_alpha
= alpha
;
7103 server
.sort_bypattern
= sortby
? 1 : 0;
7104 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
7105 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
7107 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
7110 /* Send command output to the output buffer, performing the specified
7111 * GET/DEL/INCR/DECR operations if any. */
7112 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
7113 if (storekey
== NULL
) {
7114 /* STORE option not specified, sent the sorting result to client */
7115 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
7116 for (j
= start
; j
<= end
; j
++) {
7120 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
7121 listRewind(operations
,&li
);
7122 while((ln
= listNext(&li
))) {
7123 redisSortOperation
*sop
= ln
->value
;
7124 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
7127 if (sop
->type
== REDIS_SORT_GET
) {
7129 addReply(c
,shared
.nullbulk
);
7131 addReplyBulk(c
,val
);
7135 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
7140 robj
*listObject
= createListObject();
7141 list
*listPtr
= (list
*) listObject
->ptr
;
7143 /* STORE option specified, set the sorting result as a List object */
7144 for (j
= start
; j
<= end
; j
++) {
7149 listAddNodeTail(listPtr
,vector
[j
].obj
);
7150 incrRefCount(vector
[j
].obj
);
7152 listRewind(operations
,&li
);
7153 while((ln
= listNext(&li
))) {
7154 redisSortOperation
*sop
= ln
->value
;
7155 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
7158 if (sop
->type
== REDIS_SORT_GET
) {
7160 listAddNodeTail(listPtr
,createStringObject("",0));
7162 /* We should do a incrRefCount on val because it is
7163 * added to the list, but also a decrRefCount because
7164 * it is returned by lookupKeyByPattern. This results
7165 * in doing nothing at all. */
7166 listAddNodeTail(listPtr
,val
);
7169 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
7173 if (dictReplace(c
->db
->dict
,storekey
,listObject
)) {
7174 incrRefCount(storekey
);
7176 /* Note: we add 1 because the DB is dirty anyway since even if the
7177 * SORT result is empty a new key is set and maybe the old content
7179 server
.dirty
+= 1+outputlen
;
7180 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
7184 decrRefCount(sortval
);
7185 listRelease(operations
);
7186 for (j
= 0; j
< vectorlen
; j
++) {
7187 if (alpha
&& vector
[j
].u
.cmpobj
)
7188 decrRefCount(vector
[j
].u
.cmpobj
);
7193 /* Convert an amount of bytes into a human readable string in the form
7194 * of 100B, 2G, 100M, 4K, and so forth. */
7195 static void bytesToHuman(char *s
, unsigned long long n
) {
7200 sprintf(s
,"%lluB",n
);
7202 } else if (n
< (1024*1024)) {
7203 d
= (double)n
/(1024);
7204 sprintf(s
,"%.2fK",d
);
7205 } else if (n
< (1024LL*1024*1024)) {
7206 d
= (double)n
/(1024*1024);
7207 sprintf(s
,"%.2fM",d
);
7208 } else if (n
< (1024LL*1024*1024*1024)) {
7209 d
= (double)n
/(1024LL*1024*1024);
7210 sprintf(s
,"%.2fG",d
);
7214 /* Create the string returned by the INFO command. This is decoupled
7215 * by the INFO command itself as we need to report the same information
7216 * on memory corruption problems. */
7217 static sds
genRedisInfoString(void) {
7219 time_t uptime
= time(NULL
)-server
.stat_starttime
;
7223 bytesToHuman(hmem
,zmalloc_used_memory());
7224 info
= sdscatprintf(sdsempty(),
7225 "redis_version:%s\r\n"
7226 "redis_git_sha1:%s\r\n"
7227 "redis_git_dirty:%d\r\n"
7229 "multiplexing_api:%s\r\n"
7230 "process_id:%ld\r\n"
7231 "uptime_in_seconds:%ld\r\n"
7232 "uptime_in_days:%ld\r\n"
7233 "connected_clients:%d\r\n"
7234 "connected_slaves:%d\r\n"
7235 "blocked_clients:%d\r\n"
7236 "used_memory:%zu\r\n"
7237 "used_memory_human:%s\r\n"
7238 "changes_since_last_save:%lld\r\n"
7239 "bgsave_in_progress:%d\r\n"
7240 "last_save_time:%ld\r\n"
7241 "bgrewriteaof_in_progress:%d\r\n"
7242 "total_connections_received:%lld\r\n"
7243 "total_commands_processed:%lld\r\n"
7244 "expired_keys:%lld\r\n"
7245 "hash_max_zipmap_entries:%zu\r\n"
7246 "hash_max_zipmap_value:%zu\r\n"
7247 "pubsub_channels:%ld\r\n"
7248 "pubsub_patterns:%u\r\n"
7253 strtol(REDIS_GIT_DIRTY
,NULL
,10) > 0,
7254 (sizeof(long) == 8) ? "64" : "32",
7259 listLength(server
.clients
)-listLength(server
.slaves
),
7260 listLength(server
.slaves
),
7261 server
.blpop_blocked_clients
,
7262 zmalloc_used_memory(),
7265 server
.bgsavechildpid
!= -1,
7267 server
.bgrewritechildpid
!= -1,
7268 server
.stat_numconnections
,
7269 server
.stat_numcommands
,
7270 server
.stat_expiredkeys
,
7271 server
.hash_max_zipmap_entries
,
7272 server
.hash_max_zipmap_value
,
7273 dictSize(server
.pubsub_channels
),
7274 listLength(server
.pubsub_patterns
),
7275 server
.vm_enabled
!= 0,
7276 server
.masterhost
== NULL
? "master" : "slave"
7278 if (server
.masterhost
) {
7279 info
= sdscatprintf(info
,
7280 "master_host:%s\r\n"
7281 "master_port:%d\r\n"
7282 "master_link_status:%s\r\n"
7283 "master_last_io_seconds_ago:%d\r\n"
7286 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
7288 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
7291 if (server
.vm_enabled
) {
7293 info
= sdscatprintf(info
,
7294 "vm_conf_max_memory:%llu\r\n"
7295 "vm_conf_page_size:%llu\r\n"
7296 "vm_conf_pages:%llu\r\n"
7297 "vm_stats_used_pages:%llu\r\n"
7298 "vm_stats_swapped_objects:%llu\r\n"
7299 "vm_stats_swappin_count:%llu\r\n"
7300 "vm_stats_swappout_count:%llu\r\n"
7301 "vm_stats_io_newjobs_len:%lu\r\n"
7302 "vm_stats_io_processing_len:%lu\r\n"
7303 "vm_stats_io_processed_len:%lu\r\n"
7304 "vm_stats_io_active_threads:%lu\r\n"
7305 "vm_stats_blocked_clients:%lu\r\n"
7306 ,(unsigned long long) server
.vm_max_memory
,
7307 (unsigned long long) server
.vm_page_size
,
7308 (unsigned long long) server
.vm_pages
,
7309 (unsigned long long) server
.vm_stats_used_pages
,
7310 (unsigned long long) server
.vm_stats_swapped_objects
,
7311 (unsigned long long) server
.vm_stats_swapins
,
7312 (unsigned long long) server
.vm_stats_swapouts
,
7313 (unsigned long) listLength(server
.io_newjobs
),
7314 (unsigned long) listLength(server
.io_processing
),
7315 (unsigned long) listLength(server
.io_processed
),
7316 (unsigned long) server
.io_active_threads
,
7317 (unsigned long) server
.vm_blocked_clients
7321 for (j
= 0; j
< server
.dbnum
; j
++) {
7322 long long keys
, vkeys
;
7324 keys
= dictSize(server
.db
[j
].dict
);
7325 vkeys
= dictSize(server
.db
[j
].expires
);
7326 if (keys
|| vkeys
) {
7327 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
7334 static void infoCommand(redisClient
*c
) {
7335 sds info
= genRedisInfoString();
7336 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
7337 (unsigned long)sdslen(info
)));
7338 addReplySds(c
,info
);
7339 addReply(c
,shared
.crlf
);
7342 static void monitorCommand(redisClient
*c
) {
7343 /* ignore MONITOR if aleady slave or in monitor mode */
7344 if (c
->flags
& REDIS_SLAVE
) return;
7346 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
7348 listAddNodeTail(server
.monitors
,c
);
7349 addReply(c
,shared
.ok
);
7352 /* ================================= Expire ================================= */
7353 static int removeExpire(redisDb
*db
, robj
*key
) {
7354 if (dictDelete(db
->expires
,key
) == DICT_OK
) {
7361 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
7362 if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) {
7370 /* Return the expire time of the specified key, or -1 if no expire
7371 * is associated with this key (i.e. the key is non volatile) */
7372 static time_t getExpire(redisDb
*db
, robj
*key
) {
7375 /* No expire? return ASAP */
7376 if (dictSize(db
->expires
) == 0 ||
7377 (de
= dictFind(db
->expires
,key
)) == NULL
) return -1;
7379 return (time_t) dictGetEntryVal(de
);
7382 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
7386 /* No expire? return ASAP */
7387 if (dictSize(db
->expires
) == 0 ||
7388 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7390 /* Lookup the expire */
7391 when
= (time_t) dictGetEntryVal(de
);
7392 if (time(NULL
) <= when
) return 0;
7394 /* Delete the key */
7395 dictDelete(db
->expires
,key
);
7396 server
.stat_expiredkeys
++;
7397 return dictDelete(db
->dict
,key
) == DICT_OK
;
7400 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
7403 /* No expire? return ASAP */
7404 if (dictSize(db
->expires
) == 0 ||
7405 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7407 /* Delete the key */
7409 server
.stat_expiredkeys
++;
7410 dictDelete(db
->expires
,key
);
7411 return dictDelete(db
->dict
,key
) == DICT_OK
;
7414 static void expireGenericCommand(redisClient
*c
, robj
*key
, robj
*param
, long offset
) {
7418 if (getLongFromObjectOrReply(c
, param
, &seconds
, NULL
) != REDIS_OK
) return;
7422 de
= dictFind(c
->db
->dict
,key
);
7424 addReply(c
,shared
.czero
);
7428 if (deleteKey(c
->db
,key
)) server
.dirty
++;
7429 addReply(c
, shared
.cone
);
7432 time_t when
= time(NULL
)+seconds
;
7433 if (setExpire(c
->db
,key
,when
)) {
7434 addReply(c
,shared
.cone
);
7437 addReply(c
,shared
.czero
);
7443 static void expireCommand(redisClient
*c
) {
7444 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],0);
7447 static void expireatCommand(redisClient
*c
) {
7448 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],time(NULL
));
7451 static void ttlCommand(redisClient
*c
) {
7455 expire
= getExpire(c
->db
,c
->argv
[1]);
7457 ttl
= (int) (expire
-time(NULL
));
7458 if (ttl
< 0) ttl
= -1;
7460 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
7463 /* ================================ MULTI/EXEC ============================== */
7465 /* Client state initialization for MULTI/EXEC */
7466 static void initClientMultiState(redisClient
*c
) {
7467 c
->mstate
.commands
= NULL
;
7468 c
->mstate
.count
= 0;
7471 /* Release all the resources associated with MULTI/EXEC state */
7472 static void freeClientMultiState(redisClient
*c
) {
7475 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7477 multiCmd
*mc
= c
->mstate
.commands
+j
;
7479 for (i
= 0; i
< mc
->argc
; i
++)
7480 decrRefCount(mc
->argv
[i
]);
7483 zfree(c
->mstate
.commands
);
7486 /* Add a new command into the MULTI commands queue */
7487 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
7491 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
7492 sizeof(multiCmd
)*(c
->mstate
.count
+1));
7493 mc
= c
->mstate
.commands
+c
->mstate
.count
;
7496 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
7497 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
7498 for (j
= 0; j
< c
->argc
; j
++)
7499 incrRefCount(mc
->argv
[j
]);
7503 static void multiCommand(redisClient
*c
) {
7504 if (c
->flags
& REDIS_MULTI
) {
7505 addReplySds(c
,sdsnew("-ERR MULTI calls can not be nested\r\n"));
7508 c
->flags
|= REDIS_MULTI
;
7509 addReply(c
,shared
.ok
);
7512 static void discardCommand(redisClient
*c
) {
7513 if (!(c
->flags
& REDIS_MULTI
)) {
7514 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
7518 freeClientMultiState(c
);
7519 initClientMultiState(c
);
7520 c
->flags
&= (~REDIS_MULTI
);
7521 addReply(c
,shared
.ok
);
7524 /* Send a MULTI command to all the slaves and AOF file. Check the execCommand
7525 * implememntation for more information. */
7526 static void execCommandReplicateMulti(redisClient
*c
) {
7527 struct redisCommand
*cmd
;
7528 robj
*multistring
= createStringObject("MULTI",5);
7530 cmd
= lookupCommand("multi");
7531 if (server
.appendonly
)
7532 feedAppendOnlyFile(cmd
,c
->db
->id
,&multistring
,1);
7533 if (listLength(server
.slaves
))
7534 replicationFeedSlaves(server
.slaves
,c
->db
->id
,&multistring
,1);
7535 decrRefCount(multistring
);
7538 static void execCommand(redisClient
*c
) {
7543 if (!(c
->flags
& REDIS_MULTI
)) {
7544 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
7548 /* Check if we need to abort the EXEC if some WATCHed key was touched.
7549 * A failed EXEC will return a multi bulk nil object. */
7550 if (c
->flags
& REDIS_DIRTY_CAS
) {
7551 freeClientMultiState(c
);
7552 initClientMultiState(c
);
7553 c
->flags
&= ~(REDIS_MULTI
|REDIS_DIRTY_CAS
);
7555 addReply(c
,shared
.nullmultibulk
);
7559 /* Replicate a MULTI request now that we are sure the block is executed.
7560 * This way we'll deliver the MULTI/..../EXEC block as a whole and
7561 * both the AOF and the replication link will have the same consistency
7562 * and atomicity guarantees. */
7563 execCommandReplicateMulti(c
);
7565 /* Exec all the queued commands */
7566 unwatchAllKeys(c
); /* Unwatch ASAP otherwise we'll waste CPU cycles */
7567 orig_argv
= c
->argv
;
7568 orig_argc
= c
->argc
;
7569 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
7570 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7571 c
->argc
= c
->mstate
.commands
[j
].argc
;
7572 c
->argv
= c
->mstate
.commands
[j
].argv
;
7573 call(c
,c
->mstate
.commands
[j
].cmd
);
7575 c
->argv
= orig_argv
;
7576 c
->argc
= orig_argc
;
7577 freeClientMultiState(c
);
7578 initClientMultiState(c
);
7579 c
->flags
&= ~(REDIS_MULTI
|REDIS_DIRTY_CAS
);
7580 /* Make sure the EXEC command is always replicated / AOF, since we
7581 * always send the MULTI command (we can't know beforehand if the
7582 * next operations will contain at least a modification to the DB). */
7586 /* =========================== Blocking Operations ========================= */
7588 /* Currently Redis blocking operations support is limited to list POP ops,
7589 * so the current implementation is not fully generic, but it is also not
7590 * completely specific so it will not require a rewrite to support new
7591 * kind of blocking operations in the future.
7593 * Still it's important to note that list blocking operations can be already
7594 * used as a notification mechanism in order to implement other blocking
7595 * operations at application level, so there must be a very strong evidence
7596 * of usefulness and generality before new blocking operations are implemented.
7598 * This is how the current blocking POP works, we use BLPOP as example:
7599 * - If the user calls BLPOP and the key exists and contains a non empty list
7600 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
7601 * if there is not to block.
7602 * - If instead BLPOP is called and the key does not exists or the list is
7603 * empty we need to block. In order to do so we remove the notification for
7604 * new data to read in the client socket (so that we'll not serve new
7605 * requests if the blocking request is not served). Also we put the client
7606 * in a dictionary (db->blocking_keys) mapping keys to a list of clients
7607 * blocking for this keys.
7608 * - If a PUSH operation against a key with blocked clients waiting is
7609 * performed, we serve the first in the list: basically instead to push
7610 * the new element inside the list we return it to the (first / oldest)
7611 * blocking client, unblock the client, and remove it form the list.
7613 * The above comment and the source code should be enough in order to understand
7614 * the implementation and modify / fix it later.
7617 /* Set a client in blocking mode for the specified key, with the specified
7619 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
7624 c
->blocking_keys
= zmalloc(sizeof(robj
*)*numkeys
);
7625 c
->blocking_keys_num
= numkeys
;
7626 c
->blockingto
= timeout
;
7627 for (j
= 0; j
< numkeys
; j
++) {
7628 /* Add the key in the client structure, to map clients -> keys */
7629 c
->blocking_keys
[j
] = keys
[j
];
7630 incrRefCount(keys
[j
]);
7632 /* And in the other "side", to map keys -> clients */
7633 de
= dictFind(c
->db
->blocking_keys
,keys
[j
]);
7637 /* For every key we take a list of clients blocked for it */
7639 retval
= dictAdd(c
->db
->blocking_keys
,keys
[j
],l
);
7640 incrRefCount(keys
[j
]);
7641 assert(retval
== DICT_OK
);
7643 l
= dictGetEntryVal(de
);
7645 listAddNodeTail(l
,c
);
7647 /* Mark the client as a blocked client */
7648 c
->flags
|= REDIS_BLOCKED
;
7649 server
.blpop_blocked_clients
++;
7652 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
7653 static void unblockClientWaitingData(redisClient
*c
) {
7658 assert(c
->blocking_keys
!= NULL
);
7659 /* The client may wait for multiple keys, so unblock it for every key. */
7660 for (j
= 0; j
< c
->blocking_keys_num
; j
++) {
7661 /* Remove this client from the list of clients waiting for this key. */
7662 de
= dictFind(c
->db
->blocking_keys
,c
->blocking_keys
[j
]);
7664 l
= dictGetEntryVal(de
);
7665 listDelNode(l
,listSearchKey(l
,c
));
7666 /* If the list is empty we need to remove it to avoid wasting memory */
7667 if (listLength(l
) == 0)
7668 dictDelete(c
->db
->blocking_keys
,c
->blocking_keys
[j
]);
7669 decrRefCount(c
->blocking_keys
[j
]);
7671 /* Cleanup the client structure */
7672 zfree(c
->blocking_keys
);
7673 c
->blocking_keys
= NULL
;
7674 c
->flags
&= (~REDIS_BLOCKED
);
7675 server
.blpop_blocked_clients
--;
7676 /* We want to process data if there is some command waiting
7677 * in the input buffer. Note that this is safe even if
7678 * unblockClientWaitingData() gets called from freeClient() because
7679 * freeClient() will be smart enough to call this function
7680 * *after* c->querybuf was set to NULL. */
7681 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
7684 /* This should be called from any function PUSHing into lists.
7685 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
7686 * 'ele' is the element pushed.
7688 * If the function returns 0 there was no client waiting for a list push
7691 * If the function returns 1 there was a client waiting for a list push
7692 * against this key, the element was passed to this client thus it's not
7693 * needed to actually add it to the list and the caller should return asap. */
7694 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
7695 struct dictEntry
*de
;
7696 redisClient
*receiver
;
7700 de
= dictFind(c
->db
->blocking_keys
,key
);
7701 if (de
== NULL
) return 0;
7702 l
= dictGetEntryVal(de
);
7705 receiver
= ln
->value
;
7707 addReplySds(receiver
,sdsnew("*2\r\n"));
7708 addReplyBulk(receiver
,key
);
7709 addReplyBulk(receiver
,ele
);
7710 unblockClientWaitingData(receiver
);
7714 /* Blocking RPOP/LPOP */
7715 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
7720 for (j
= 1; j
< c
->argc
-1; j
++) {
7721 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
7723 if (o
->type
!= REDIS_LIST
) {
7724 addReply(c
,shared
.wrongtypeerr
);
7727 list
*list
= o
->ptr
;
7728 if (listLength(list
) != 0) {
7729 /* If the list contains elements fall back to the usual
7730 * non-blocking POP operation */
7731 robj
*argv
[2], **orig_argv
;
7734 /* We need to alter the command arguments before to call
7735 * popGenericCommand() as the command takes a single key. */
7736 orig_argv
= c
->argv
;
7737 orig_argc
= c
->argc
;
7738 argv
[1] = c
->argv
[j
];
7742 /* Also the return value is different, we need to output
7743 * the multi bulk reply header and the key name. The
7744 * "real" command will add the last element (the value)
7745 * for us. If this souds like an hack to you it's just
7746 * because it is... */
7747 addReplySds(c
,sdsnew("*2\r\n"));
7748 addReplyBulk(c
,argv
[1]);
7749 popGenericCommand(c
,where
);
7751 /* Fix the client structure with the original stuff */
7752 c
->argv
= orig_argv
;
7753 c
->argc
= orig_argc
;
7759 /* If the list is empty or the key does not exists we must block */
7760 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
7761 if (timeout
> 0) timeout
+= time(NULL
);
7762 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
7765 static void blpopCommand(redisClient
*c
) {
7766 blockingPopGenericCommand(c
,REDIS_HEAD
);
7769 static void brpopCommand(redisClient
*c
) {
7770 blockingPopGenericCommand(c
,REDIS_TAIL
);
7773 /* =============================== Replication ============================= */
7775 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7776 ssize_t nwritten
, ret
= size
;
7777 time_t start
= time(NULL
);
7781 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
7782 nwritten
= write(fd
,ptr
,size
);
7783 if (nwritten
== -1) return -1;
7787 if ((time(NULL
)-start
) > timeout
) {
7795 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7796 ssize_t nread
, totread
= 0;
7797 time_t start
= time(NULL
);
7801 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
7802 nread
= read(fd
,ptr
,size
);
7803 if (nread
== -1) return -1;
7808 if ((time(NULL
)-start
) > timeout
) {
7816 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7823 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
7826 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
7837 static void syncCommand(redisClient
*c
) {
7838 /* ignore SYNC if aleady slave or in monitor mode */
7839 if (c
->flags
& REDIS_SLAVE
) return;
7841 /* SYNC can't be issued when the server has pending data to send to
7842 * the client about already issued commands. We need a fresh reply
7843 * buffer registering the differences between the BGSAVE and the current
7844 * dataset, so that we can copy to other slaves if needed. */
7845 if (listLength(c
->reply
) != 0) {
7846 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
7850 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
7851 /* Here we need to check if there is a background saving operation
7852 * in progress, or if it is required to start one */
7853 if (server
.bgsavechildpid
!= -1) {
7854 /* Ok a background save is in progress. Let's check if it is a good
7855 * one for replication, i.e. if there is another slave that is
7856 * registering differences since the server forked to save */
7861 listRewind(server
.slaves
,&li
);
7862 while((ln
= listNext(&li
))) {
7864 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
7867 /* Perfect, the server is already registering differences for
7868 * another slave. Set the right state, and copy the buffer. */
7869 listRelease(c
->reply
);
7870 c
->reply
= listDup(slave
->reply
);
7871 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7872 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
7874 /* No way, we need to wait for the next BGSAVE in order to
7875 * register differences */
7876 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7877 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
7880 /* Ok we don't have a BGSAVE in progress, let's start one */
7881 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
7882 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7883 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
7884 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
7887 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7890 c
->flags
|= REDIS_SLAVE
;
7892 listAddNodeTail(server
.slaves
,c
);
7896 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
7897 redisClient
*slave
= privdata
;
7899 REDIS_NOTUSED(mask
);
7900 char buf
[REDIS_IOBUF_LEN
];
7901 ssize_t nwritten
, buflen
;
7903 if (slave
->repldboff
== 0) {
7904 /* Write the bulk write count before to transfer the DB. In theory here
7905 * we don't know how much room there is in the output buffer of the
7906 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
7907 * operations) will never be smaller than the few bytes we need. */
7910 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
7912 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
7920 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
7921 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
7923 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
7924 (buflen
== 0) ? "premature EOF" : strerror(errno
));
7928 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
7929 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
7934 slave
->repldboff
+= nwritten
;
7935 if (slave
->repldboff
== slave
->repldbsize
) {
7936 close(slave
->repldbfd
);
7937 slave
->repldbfd
= -1;
7938 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7939 slave
->replstate
= REDIS_REPL_ONLINE
;
7940 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
7941 sendReplyToClient
, slave
) == AE_ERR
) {
7945 addReplySds(slave
,sdsempty());
7946 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
7950 /* This function is called at the end of every backgrond saving.
7951 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
7952 * otherwise REDIS_ERR is passed to the function.
7954 * The goal of this function is to handle slaves waiting for a successful
7955 * background saving in order to perform non-blocking synchronization. */
7956 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
7958 int startbgsave
= 0;
7961 listRewind(server
.slaves
,&li
);
7962 while((ln
= listNext(&li
))) {
7963 redisClient
*slave
= ln
->value
;
7965 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
7967 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7968 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
7969 struct redis_stat buf
;
7971 if (bgsaveerr
!= REDIS_OK
) {
7973 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
7976 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
7977 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
7979 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
7982 slave
->repldboff
= 0;
7983 slave
->repldbsize
= buf
.st_size
;
7984 slave
->replstate
= REDIS_REPL_SEND_BULK
;
7985 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7986 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
7993 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7996 listRewind(server
.slaves
,&li
);
7997 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
7998 while((ln
= listNext(&li
))) {
7999 redisClient
*slave
= ln
->value
;
8001 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
8008 static int syncWithMaster(void) {
8009 char buf
[1024], tmpfile
[256], authcmd
[1024];
8011 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
8012 int dfd
, maxtries
= 5;
8015 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
8020 /* AUTH with the master if required. */
8021 if(server
.masterauth
) {
8022 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
8023 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
8025 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
8029 /* Read the AUTH result. */
8030 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
8032 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
8036 if (buf
[0] != '+') {
8038 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
8043 /* Issue the SYNC command */
8044 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
8046 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
8050 /* Read the bulk write count */
8051 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
8053 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
8057 if (buf
[0] != '$') {
8059 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
8062 dumpsize
= strtol(buf
+1,NULL
,10);
8063 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
8064 /* Read the bulk write data on a temp file */
8066 snprintf(tmpfile
,256,
8067 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
8068 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
8069 if (dfd
!= -1) break;
8074 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
8078 int nread
, nwritten
;
8080 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
8082 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
8088 nwritten
= write(dfd
,buf
,nread
);
8089 if (nwritten
== -1) {
8090 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
8098 if (rename(tmpfile
,server
.dbfilename
) == -1) {
8099 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
8105 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
8106 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
8110 server
.master
= createClient(fd
);
8111 server
.master
->flags
|= REDIS_MASTER
;
8112 server
.master
->authenticated
= 1;
8113 server
.replstate
= REDIS_REPL_CONNECTED
;
8117 static void slaveofCommand(redisClient
*c
) {
8118 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
8119 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
8120 if (server
.masterhost
) {
8121 sdsfree(server
.masterhost
);
8122 server
.masterhost
= NULL
;
8123 if (server
.master
) freeClient(server
.master
);
8124 server
.replstate
= REDIS_REPL_NONE
;
8125 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
8128 sdsfree(server
.masterhost
);
8129 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
8130 server
.masterport
= atoi(c
->argv
[2]->ptr
);
8131 if (server
.master
) freeClient(server
.master
);
8132 server
.replstate
= REDIS_REPL_CONNECT
;
8133 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
8134 server
.masterhost
, server
.masterport
);
8136 addReply(c
,shared
.ok
);
8139 /* ============================ Maxmemory directive ======================== */
8141 /* Try to free one object form the pre-allocated objects free list.
8142 * This is useful under low mem conditions as by default we take 1 million
8143 * free objects allocated. On success REDIS_OK is returned, otherwise
8145 static int tryFreeOneObjectFromFreelist(void) {
8148 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
8149 if (listLength(server
.objfreelist
)) {
8150 listNode
*head
= listFirst(server
.objfreelist
);
8151 o
= listNodeValue(head
);
8152 listDelNode(server
.objfreelist
,head
);
8153 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
8157 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
8162 /* This function gets called when 'maxmemory' is set on the config file to limit
8163 * the max memory used by the server, and we are out of memory.
8164 * This function will try to, in order:
8166 * - Free objects from the free list
8167 * - Try to remove keys with an EXPIRE set
8169 * It is not possible to free enough memory to reach used-memory < maxmemory
8170 * the server will start refusing commands that will enlarge even more the
8173 static void freeMemoryIfNeeded(void) {
8174 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
8175 int j
, k
, freed
= 0;
8177 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
8178 for (j
= 0; j
< server
.dbnum
; j
++) {
8180 robj
*minkey
= NULL
;
8181 struct dictEntry
*de
;
8183 if (dictSize(server
.db
[j
].expires
)) {
8185 /* From a sample of three keys drop the one nearest to
8186 * the natural expire */
8187 for (k
= 0; k
< 3; k
++) {
8190 de
= dictGetRandomKey(server
.db
[j
].expires
);
8191 t
= (time_t) dictGetEntryVal(de
);
8192 if (minttl
== -1 || t
< minttl
) {
8193 minkey
= dictGetEntryKey(de
);
8197 deleteKey(server
.db
+j
,minkey
);
8200 if (!freed
) return; /* nothing to free... */
8204 /* ============================== Append Only file ========================== */
8206 /* Write the append only file buffer on disk.
8208 * Since we are required to write the AOF before replying to the client,
8209 * and the only way the client socket can get a write is entering when the
8210 * the event loop, we accumulate all the AOF writes in a memory
8211 * buffer and write it on disk using this function just before entering
8212 * the event loop again. */
8213 static void flushAppendOnlyFile(void) {
8217 if (sdslen(server
.aofbuf
) == 0) return;
8219 /* We want to perform a single write. This should be guaranteed atomic
8220 * at least if the filesystem we are writing is a real physical one.
8221 * While this will save us against the server being killed I don't think
8222 * there is much to do about the whole server stopping for power problems
8224 nwritten
= write(server
.appendfd
,server
.aofbuf
,sdslen(server
.aofbuf
));
8225 if (nwritten
!= (signed)sdslen(server
.aofbuf
)) {
8226 /* Ooops, we are in troubles. The best thing to do for now is
8227 * aborting instead of giving the illusion that everything is
8228 * working as expected. */
8229 if (nwritten
== -1) {
8230 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
8232 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
8236 sdsfree(server
.aofbuf
);
8237 server
.aofbuf
= sdsempty();
8239 /* Fsync if needed */
8241 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
8242 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
8243 now
-server
.lastfsync
> 1))
8245 /* aof_fsync is defined as fdatasync() for Linux in order to avoid
8246 * flushing metadata. */
8247 aof_fsync(server
.appendfd
); /* Let's try to get this data on the disk */
8248 server
.lastfsync
= now
;
8252 static sds
catAppendOnlyGenericCommand(sds buf
, int argc
, robj
**argv
) {
8254 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
8255 for (j
= 0; j
< argc
; j
++) {
8256 robj
*o
= getDecodedObject(argv
[j
]);
8257 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
8258 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
8259 buf
= sdscatlen(buf
,"\r\n",2);
8265 static sds
catAppendOnlyExpireAtCommand(sds buf
, robj
*key
, robj
*seconds
) {
8270 /* Make sure we can use strtol */
8271 seconds
= getDecodedObject(seconds
);
8272 when
= time(NULL
)+strtol(seconds
->ptr
,NULL
,10);
8273 decrRefCount(seconds
);
8275 argv
[0] = createStringObject("EXPIREAT",8);
8277 argv
[2] = createObject(REDIS_STRING
,
8278 sdscatprintf(sdsempty(),"%ld",when
));
8279 buf
= catAppendOnlyGenericCommand(buf
, argc
, argv
);
8280 decrRefCount(argv
[0]);
8281 decrRefCount(argv
[2]);
8285 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
8286 sds buf
= sdsempty();
8289 /* The DB this command was targetting is not the same as the last command
8290 * we appendend. To issue a SELECT command is needed. */
8291 if (dictid
!= server
.appendseldb
) {
8294 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
8295 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
8296 (unsigned long)strlen(seldb
),seldb
);
8297 server
.appendseldb
= dictid
;
8300 if (cmd
->proc
== expireCommand
) {
8301 /* Translate EXPIRE into EXPIREAT */
8302 buf
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]);
8303 } else if (cmd
->proc
== setexCommand
) {
8304 /* Translate SETEX to SET and EXPIREAT */
8305 tmpargv
[0] = createStringObject("SET",3);
8306 tmpargv
[1] = argv
[1];
8307 tmpargv
[2] = argv
[3];
8308 buf
= catAppendOnlyGenericCommand(buf
,3,tmpargv
);
8309 decrRefCount(tmpargv
[0]);
8310 buf
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]);
8312 buf
= catAppendOnlyGenericCommand(buf
,argc
,argv
);
8315 /* Append to the AOF buffer. This will be flushed on disk just before
8316 * of re-entering the event loop, so before the client will get a
8317 * positive reply about the operation performed. */
8318 server
.aofbuf
= sdscatlen(server
.aofbuf
,buf
,sdslen(buf
));
8320 /* If a background append only file rewriting is in progress we want to
8321 * accumulate the differences between the child DB and the current one
8322 * in a buffer, so that when the child process will do its work we
8323 * can append the differences to the new append only file. */
8324 if (server
.bgrewritechildpid
!= -1)
8325 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
8330 /* In Redis commands are always executed in the context of a client, so in
8331 * order to load the append only file we need to create a fake client. */
8332 static struct redisClient
*createFakeClient(void) {
8333 struct redisClient
*c
= zmalloc(sizeof(*c
));
8337 c
->querybuf
= sdsempty();
8341 /* We set the fake client as a slave waiting for the synchronization
8342 * so that Redis will not try to send replies to this client. */
8343 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
8344 c
->reply
= listCreate();
8345 listSetFreeMethod(c
->reply
,decrRefCount
);
8346 listSetDupMethod(c
->reply
,dupClientReplyValue
);
8347 initClientMultiState(c
);
8351 static void freeFakeClient(struct redisClient
*c
) {
8352 sdsfree(c
->querybuf
);
8353 listRelease(c
->reply
);
8354 freeClientMultiState(c
);
8358 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
8359 * error (the append only file is zero-length) REDIS_ERR is returned. On
8360 * fatal error an error message is logged and the program exists. */
8361 int loadAppendOnlyFile(char *filename
) {
8362 struct redisClient
*fakeClient
;
8363 FILE *fp
= fopen(filename
,"r");
8364 struct redis_stat sb
;
8365 unsigned long long loadedkeys
= 0;
8366 int appendonly
= server
.appendonly
;
8368 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
8372 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
8376 /* Temporarily disable AOF, to prevent EXEC from feeding a MULTI
8377 * to the same file we're about to read. */
8378 server
.appendonly
= 0;
8380 fakeClient
= createFakeClient();
8387 struct redisCommand
*cmd
;
8389 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
8395 if (buf
[0] != '*') goto fmterr
;
8397 argv
= zmalloc(sizeof(robj
*)*argc
);
8398 for (j
= 0; j
< argc
; j
++) {
8399 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
8400 if (buf
[0] != '$') goto fmterr
;
8401 len
= strtol(buf
+1,NULL
,10);
8402 argsds
= sdsnewlen(NULL
,len
);
8403 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
8404 argv
[j
] = createObject(REDIS_STRING
,argsds
);
8405 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
8408 /* Command lookup */
8409 cmd
= lookupCommand(argv
[0]->ptr
);
8411 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
8414 /* Try object encoding */
8415 if (cmd
->flags
& REDIS_CMD_BULK
)
8416 argv
[argc
-1] = tryObjectEncoding(argv
[argc
-1]);
8417 /* Run the command in the context of a fake client */
8418 fakeClient
->argc
= argc
;
8419 fakeClient
->argv
= argv
;
8420 cmd
->proc(fakeClient
);
8421 /* Discard the reply objects list from the fake client */
8422 while(listLength(fakeClient
->reply
))
8423 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
8424 /* Clean up, ready for the next command */
8425 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
8427 /* Handle swapping while loading big datasets when VM is on */
8429 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
8430 while (zmalloc_used_memory() > server
.vm_max_memory
) {
8431 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
8436 /* This point can only be reached when EOF is reached without errors.
8437 * If the client is in the middle of a MULTI/EXEC, log error and quit. */
8438 if (fakeClient
->flags
& REDIS_MULTI
) goto readerr
;
8441 freeFakeClient(fakeClient
);
8442 server
.appendonly
= appendonly
;
8447 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
8449 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
8453 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
8457 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
8458 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
8462 /* Avoid the incr/decr ref count business if possible to help
8463 * copy-on-write (we are often in a child process when this function
8465 * Also makes sure that key objects don't get incrRefCount-ed when VM
8467 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
8468 obj
= getDecodedObject(obj
);
8471 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
));
8472 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
;
8473 if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0)
8475 if (fwrite("\r\n",2,1,fp
) == 0) goto err
;
8476 if (decrrc
) decrRefCount(obj
);
8479 if (decrrc
) decrRefCount(obj
);
8483 /* Write binary-safe string into a file in the bulkformat
8484 * $<count>\r\n<payload>\r\n */
8485 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
8488 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
);
8489 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8490 if (len
&& fwrite(s
,len
,1,fp
) == 0) return 0;
8491 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
8495 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
8496 static int fwriteBulkDouble(FILE *fp
, double d
) {
8497 char buf
[128], dbuf
[128];
8499 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
8500 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
8501 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8502 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
8506 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
8507 static int fwriteBulkLong(FILE *fp
, long l
) {
8508 char buf
[128], lbuf
[128];
8510 snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
);
8511 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2);
8512 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8513 if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0;
8517 /* Write a sequence of commands able to fully rebuild the dataset into
8518 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
8519 static int rewriteAppendOnlyFile(char *filename
) {
8520 dictIterator
*di
= NULL
;
8525 time_t now
= time(NULL
);
8527 /* Note that we have to use a different temp name here compared to the
8528 * one used by rewriteAppendOnlyFileBackground() function. */
8529 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
8530 fp
= fopen(tmpfile
,"w");
8532 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
8535 for (j
= 0; j
< server
.dbnum
; j
++) {
8536 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
8537 redisDb
*db
= server
.db
+j
;
8539 if (dictSize(d
) == 0) continue;
8540 di
= dictGetIterator(d
);
8546 /* SELECT the new DB */
8547 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
8548 if (fwriteBulkLong(fp
,j
) == 0) goto werr
;
8550 /* Iterate this DB writing every entry */
8551 while((de
= dictNext(di
)) != NULL
) {
8556 key
= dictGetEntryKey(de
);
8557 /* If the value for this key is swapped, load a preview in memory.
8558 * We use a "swapped" flag to remember if we need to free the
8559 * value object instead to just increment the ref count anyway
8560 * in order to avoid copy-on-write of pages if we are forked() */
8561 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
8562 key
->storage
== REDIS_VM_SWAPPING
) {
8563 o
= dictGetEntryVal(de
);
8566 o
= vmPreviewObject(key
);
8569 expiretime
= getExpire(db
,key
);
8571 /* Save the key and associated value */
8572 if (o
->type
== REDIS_STRING
) {
8573 /* Emit a SET command */
8574 char cmd
[]="*3\r\n$3\r\nSET\r\n";
8575 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8577 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8578 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
8579 } else if (o
->type
== REDIS_LIST
) {
8580 /* Emit the RPUSHes needed to rebuild the list */
8581 list
*list
= o
->ptr
;
8585 listRewind(list
,&li
);
8586 while((ln
= listNext(&li
))) {
8587 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
8588 robj
*eleobj
= listNodeValue(ln
);
8590 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8591 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8592 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8594 } else if (o
->type
== REDIS_SET
) {
8595 /* Emit the SADDs needed to rebuild the set */
8597 dictIterator
*di
= dictGetIterator(set
);
8600 while((de
= dictNext(di
)) != NULL
) {
8601 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
8602 robj
*eleobj
= dictGetEntryKey(de
);
8604 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8605 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8606 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8608 dictReleaseIterator(di
);
8609 } else if (o
->type
== REDIS_ZSET
) {
8610 /* Emit the ZADDs needed to rebuild the sorted set */
8612 dictIterator
*di
= dictGetIterator(zs
->dict
);
8615 while((de
= dictNext(di
)) != NULL
) {
8616 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
8617 robj
*eleobj
= dictGetEntryKey(de
);
8618 double *score
= dictGetEntryVal(de
);
8620 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8621 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8622 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
8623 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8625 dictReleaseIterator(di
);
8626 } else if (o
->type
== REDIS_HASH
) {
8627 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
8629 /* Emit the HSETs needed to rebuild the hash */
8630 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8631 unsigned char *p
= zipmapRewind(o
->ptr
);
8632 unsigned char *field
, *val
;
8633 unsigned int flen
, vlen
;
8635 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
8636 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8637 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8638 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
8640 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
8644 dictIterator
*di
= dictGetIterator(o
->ptr
);
8647 while((de
= dictNext(di
)) != NULL
) {
8648 robj
*field
= dictGetEntryKey(de
);
8649 robj
*val
= dictGetEntryVal(de
);
8651 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8652 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8653 if (fwriteBulkObject(fp
,field
) == -1) return -1;
8654 if (fwriteBulkObject(fp
,val
) == -1) return -1;
8656 dictReleaseIterator(di
);
8659 redisPanic("Unknown object type");
8661 /* Save the expire time */
8662 if (expiretime
!= -1) {
8663 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
8664 /* If this key is already expired skip it */
8665 if (expiretime
< now
) continue;
8666 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8667 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8668 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
;
8670 if (swapped
) decrRefCount(o
);
8672 dictReleaseIterator(di
);
8675 /* Make sure data will not remain on the OS's output buffers */
8680 /* Use RENAME to make sure the DB file is changed atomically only
8681 * if the generate DB file is ok. */
8682 if (rename(tmpfile
,filename
) == -1) {
8683 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
8687 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
8693 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
8694 if (di
) dictReleaseIterator(di
);
8698 /* This is how rewriting of the append only file in background works:
8700 * 1) The user calls BGREWRITEAOF
8701 * 2) Redis calls this function, that forks():
8702 * 2a) the child rewrite the append only file in a temp file.
8703 * 2b) the parent accumulates differences in server.bgrewritebuf.
8704 * 3) When the child finished '2a' exists.
8705 * 4) The parent will trap the exit code, if it's OK, will append the
8706 * data accumulated into server.bgrewritebuf into the temp file, and
8707 * finally will rename(2) the temp file in the actual file name.
8708 * The the new file is reopened as the new append only file. Profit!
8710 static int rewriteAppendOnlyFileBackground(void) {
8713 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
8714 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
8715 if ((childpid
= fork()) == 0) {
8719 if (server
.vm_enabled
) vmReopenSwapFile();
8721 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
8722 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
8729 if (childpid
== -1) {
8730 redisLog(REDIS_WARNING
,
8731 "Can't rewrite append only file in background: fork: %s",
8735 redisLog(REDIS_NOTICE
,
8736 "Background append only file rewriting started by pid %d",childpid
);
8737 server
.bgrewritechildpid
= childpid
;
8738 updateDictResizePolicy();
8739 /* We set appendseldb to -1 in order to force the next call to the
8740 * feedAppendOnlyFile() to issue a SELECT command, so the differences
8741 * accumulated by the parent into server.bgrewritebuf will start
8742 * with a SELECT statement and it will be safe to merge. */
8743 server
.appendseldb
= -1;
8746 return REDIS_OK
; /* unreached */
8749 static void bgrewriteaofCommand(redisClient
*c
) {
8750 if (server
.bgrewritechildpid
!= -1) {
8751 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
8754 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
8755 char *status
= "+Background append only file rewriting started\r\n";
8756 addReplySds(c
,sdsnew(status
));
8758 addReply(c
,shared
.err
);
8762 static void aofRemoveTempFile(pid_t childpid
) {
8765 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
8769 /* Virtual Memory is composed mainly of two subsystems:
8770 * - Blocking Virutal Memory
8771 * - Threaded Virtual Memory I/O
8772 * The two parts are not fully decoupled, but functions are split among two
8773 * different sections of the source code (delimited by comments) in order to
8774 * make more clear what functionality is about the blocking VM and what about
8775 * the threaded (not blocking) VM.
8779 * Redis VM is a blocking VM (one that blocks reading swapped values from
8780 * disk into memory when a value swapped out is needed in memory) that is made
8781 * unblocking by trying to examine the command argument vector in order to
8782 * load in background values that will likely be needed in order to exec
8783 * the command. The command is executed only once all the relevant keys
8784 * are loaded into memory.
8786 * This basically is almost as simple of a blocking VM, but almost as parallel
8787 * as a fully non-blocking VM.
8790 /* Called when the user switches from "appendonly yes" to "appendonly no"
8791 * at runtime using the CONFIG command. */
8792 static void stopAppendOnly(void) {
8793 flushAppendOnlyFile();
8794 fsync(server
.appendfd
);
8795 close(server
.appendfd
);
8797 server
.appendfd
= -1;
8798 server
.appendseldb
= -1;
8799 server
.appendonly
= 0;
8800 /* rewrite operation in progress? kill it, wait child exit */
8801 if (server
.bgsavechildpid
!= -1) {
8804 if (kill(server
.bgsavechildpid
,SIGKILL
) != -1)
8805 wait3(&statloc
,0,NULL
);
8806 /* reset the buffer accumulating changes while the child saves */
8807 sdsfree(server
.bgrewritebuf
);
8808 server
.bgrewritebuf
= sdsempty();
8809 server
.bgsavechildpid
= -1;
8813 /* Called when the user switches from "appendonly no" to "appendonly yes"
8814 * at runtime using the CONFIG command. */
8815 static int startAppendOnly(void) {
8816 server
.appendonly
= 1;
8817 server
.lastfsync
= time(NULL
);
8818 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
8819 if (server
.appendfd
== -1) {
8820 redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, but I can't open the AOF file: %s",strerror(errno
));
8823 if (rewriteAppendOnlyFileBackground() == REDIS_ERR
) {
8824 server
.appendonly
= 0;
8825 close(server
.appendfd
);
8826 redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, I can't trigger a background AOF rewrite operation. Check the above logs for more info about the error.",strerror(errno
));
8832 /* =================== Virtual Memory - Blocking Side ====================== */
8834 static void vmInit(void) {
8840 if (server
.vm_max_threads
!= 0)
8841 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
8843 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
8844 /* Try to open the old swap file, otherwise create it */
8845 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
8846 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
8848 if (server
.vm_fp
== NULL
) {
8849 redisLog(REDIS_WARNING
,
8850 "Can't open the swap file: %s. Exiting.",
8854 server
.vm_fd
= fileno(server
.vm_fp
);
8855 /* Lock the swap file for writing, this is useful in order to avoid
8856 * another instance to use the same swap file for a config error. */
8857 fl
.l_type
= F_WRLCK
;
8858 fl
.l_whence
= SEEK_SET
;
8859 fl
.l_start
= fl
.l_len
= 0;
8860 if (fcntl(server
.vm_fd
,F_SETLK
,&fl
) == -1) {
8861 redisLog(REDIS_WARNING
,
8862 "Can't lock the swap file at '%s': %s. Make sure it is not used by another Redis instance.", server
.vm_swap_file
, strerror(errno
));
8866 server
.vm_next_page
= 0;
8867 server
.vm_near_pages
= 0;
8868 server
.vm_stats_used_pages
= 0;
8869 server
.vm_stats_swapped_objects
= 0;
8870 server
.vm_stats_swapouts
= 0;
8871 server
.vm_stats_swapins
= 0;
8872 totsize
= server
.vm_pages
*server
.vm_page_size
;
8873 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
8874 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
8875 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
8879 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
8881 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
8882 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
8883 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
8884 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
8886 /* Initialize threaded I/O (used by Virtual Memory) */
8887 server
.io_newjobs
= listCreate();
8888 server
.io_processing
= listCreate();
8889 server
.io_processed
= listCreate();
8890 server
.io_ready_clients
= listCreate();
8891 pthread_mutex_init(&server
.io_mutex
,NULL
);
8892 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
8893 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
8894 server
.io_active_threads
= 0;
8895 if (pipe(pipefds
) == -1) {
8896 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
8900 server
.io_ready_pipe_read
= pipefds
[0];
8901 server
.io_ready_pipe_write
= pipefds
[1];
8902 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
8903 /* LZF requires a lot of stack */
8904 pthread_attr_init(&server
.io_threads_attr
);
8905 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
8906 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
8907 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
8908 /* Listen for events in the threaded I/O pipe */
8909 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
8910 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
8911 oom("creating file event");
8914 /* Mark the page as used */
8915 static void vmMarkPageUsed(off_t page
) {
8916 off_t byte
= page
/8;
8918 redisAssert(vmFreePage(page
) == 1);
8919 server
.vm_bitmap
[byte
] |= 1<<bit
;
8922 /* Mark N contiguous pages as used, with 'page' being the first. */
8923 static void vmMarkPagesUsed(off_t page
, off_t count
) {
8926 for (j
= 0; j
< count
; j
++)
8927 vmMarkPageUsed(page
+j
);
8928 server
.vm_stats_used_pages
+= count
;
8929 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
8930 (long long)count
, (long long)page
);
8933 /* Mark the page as free */
8934 static void vmMarkPageFree(off_t page
) {
8935 off_t byte
= page
/8;
8937 redisAssert(vmFreePage(page
) == 0);
8938 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
8941 /* Mark N contiguous pages as free, with 'page' being the first. */
8942 static void vmMarkPagesFree(off_t page
, off_t count
) {
8945 for (j
= 0; j
< count
; j
++)
8946 vmMarkPageFree(page
+j
);
8947 server
.vm_stats_used_pages
-= count
;
8948 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
8949 (long long)count
, (long long)page
);
8952 /* Test if the page is free */
8953 static int vmFreePage(off_t page
) {
8954 off_t byte
= page
/8;
8956 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
8959 /* Find N contiguous free pages storing the first page of the cluster in *first.
8960 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
8961 * REDIS_ERR is returned.
8963 * This function uses a simple algorithm: we try to allocate
8964 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
8965 * again from the start of the swap file searching for free spaces.
8967 * If it looks pretty clear that there are no free pages near our offset
8968 * we try to find less populated places doing a forward jump of
8969 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
8970 * without hurry, and then we jump again and so forth...
8972 * This function can be improved using a free list to avoid to guess
8973 * too much, since we could collect data about freed pages.
8975 * note: I implemented this function just after watching an episode of
8976 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
8978 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
8979 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
8981 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
8982 server
.vm_near_pages
= 0;
8983 server
.vm_next_page
= 0;
8985 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
8986 base
= server
.vm_next_page
;
8988 while(offset
< server
.vm_pages
) {
8989 off_t
this = base
+offset
;
8991 /* If we overflow, restart from page zero */
8992 if (this >= server
.vm_pages
) {
8993 this -= server
.vm_pages
;
8995 /* Just overflowed, what we found on tail is no longer
8996 * interesting, as it's no longer contiguous. */
9000 if (vmFreePage(this)) {
9001 /* This is a free page */
9003 /* Already got N free pages? Return to the caller, with success */
9005 *first
= this-(n
-1);
9006 server
.vm_next_page
= this+1;
9007 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
9011 /* The current one is not a free page */
9015 /* Fast-forward if the current page is not free and we already
9016 * searched enough near this place. */
9018 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
9019 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
9021 /* Note that even if we rewind after the jump, we are don't need
9022 * to make sure numfree is set to zero as we only jump *if* it
9023 * is set to zero. */
9025 /* Otherwise just check the next page */
9032 /* Write the specified object at the specified page of the swap file */
9033 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
9034 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
9035 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
9036 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9037 redisLog(REDIS_WARNING
,
9038 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
9042 rdbSaveObject(server
.vm_fp
,o
);
9043 fflush(server
.vm_fp
);
9044 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9048 /* Swap the 'val' object relative to 'key' into disk. Store all the information
9049 * needed to later retrieve the object into the key object.
9050 * If we can't find enough contiguous empty pages to swap the object on disk
9051 * REDIS_ERR is returned. */
9052 static int vmSwapObjectBlocking(robj
*key
, robj
*val
) {
9053 off_t pages
= rdbSavedObjectPages(val
,NULL
);
9056 assert(key
->storage
== REDIS_VM_MEMORY
);
9057 assert(key
->refcount
== 1);
9058 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
;
9059 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
;
9060 key
->vm
.page
= page
;
9061 key
->vm
.usedpages
= pages
;
9062 key
->storage
= REDIS_VM_SWAPPED
;
9063 key
->vtype
= val
->type
;
9064 decrRefCount(val
); /* Deallocate the object from memory. */
9065 vmMarkPagesUsed(page
,pages
);
9066 redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)",
9067 (unsigned char*) key
->ptr
,
9068 (unsigned long long) page
, (unsigned long long) pages
);
9069 server
.vm_stats_swapped_objects
++;
9070 server
.vm_stats_swapouts
++;
9074 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
9077 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
9078 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
9079 redisLog(REDIS_WARNING
,
9080 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
9084 o
= rdbLoadObject(type
,server
.vm_fp
);
9086 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
9089 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9093 /* Load the value object relative to the 'key' object from swap to memory.
9094 * The newly allocated object is returned.
9096 * If preview is true the unserialized object is returned to the caller but
9097 * no changes are made to the key object, nor the pages are marked as freed */
9098 static robj
*vmGenericLoadObject(robj
*key
, int preview
) {
9101 redisAssert(key
->storage
== REDIS_VM_SWAPPED
|| key
->storage
== REDIS_VM_LOADING
);
9102 val
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
);
9104 key
->storage
= REDIS_VM_MEMORY
;
9105 key
->vm
.atime
= server
.unixtime
;
9106 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
9107 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk",
9108 (unsigned char*) key
->ptr
);
9109 server
.vm_stats_swapped_objects
--;
9111 redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk",
9112 (unsigned char*) key
->ptr
);
9114 server
.vm_stats_swapins
++;
9118 /* Plain object loading, from swap to memory */
9119 static robj
*vmLoadObject(robj
*key
) {
9120 /* If we are loading the object in background, stop it, we
9121 * need to load this object synchronously ASAP. */
9122 if (key
->storage
== REDIS_VM_LOADING
)
9123 vmCancelThreadedIOJob(key
);
9124 return vmGenericLoadObject(key
,0);
9127 /* Just load the value on disk, without to modify the key.
9128 * This is useful when we want to perform some operation on the value
9129 * without to really bring it from swap to memory, like while saving the
9130 * dataset or rewriting the append only log. */
9131 static robj
*vmPreviewObject(robj
*key
) {
9132 return vmGenericLoadObject(key
,1);
9135 /* How a good candidate is this object for swapping?
9136 * The better candidate it is, the greater the returned value.
9138 * Currently we try to perform a fast estimation of the object size in
9139 * memory, and combine it with aging informations.
9141 * Basically swappability = idle-time * log(estimated size)
9143 * Bigger objects are preferred over smaller objects, but not
9144 * proportionally, this is why we use the logarithm. This algorithm is
9145 * just a first try and will probably be tuned later. */
9146 static double computeObjectSwappability(robj
*o
) {
9147 time_t age
= server
.unixtime
- o
->vm
.atime
;
9151 struct dictEntry
*de
;
9154 if (age
<= 0) return 0;
9157 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
9160 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
9165 listNode
*ln
= listFirst(l
);
9167 asize
= sizeof(list
);
9169 robj
*ele
= ln
->value
;
9172 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9173 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9175 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
9180 z
= (o
->type
== REDIS_ZSET
);
9181 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
9183 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
9184 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
9189 de
= dictGetRandomKey(d
);
9190 ele
= dictGetEntryKey(de
);
9191 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9192 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9194 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
9195 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
9199 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
9200 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
9201 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
9202 unsigned int klen
, vlen
;
9203 unsigned char *key
, *val
;
9205 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
9209 asize
= len
*(klen
+vlen
+3);
9210 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
9212 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
9217 de
= dictGetRandomKey(d
);
9218 ele
= dictGetEntryKey(de
);
9219 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9220 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9222 ele
= dictGetEntryVal(de
);
9223 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9224 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9226 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
9231 return (double)age
*log(1+asize
);
9234 /* Try to swap an object that's a good candidate for swapping.
9235 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
9236 * to swap any object at all.
9238 * If 'usethreaded' is true, Redis will try to swap the object in background
9239 * using I/O threads. */
9240 static int vmSwapOneObject(int usethreads
) {
9242 struct dictEntry
*best
= NULL
;
9243 double best_swappability
= 0;
9244 redisDb
*best_db
= NULL
;
9247 for (j
= 0; j
< server
.dbnum
; j
++) {
9248 redisDb
*db
= server
.db
+j
;
9249 /* Why maxtries is set to 100?
9250 * Because this way (usually) we'll find 1 object even if just 1% - 2%
9251 * are swappable objects */
9254 if (dictSize(db
->dict
) == 0) continue;
9255 for (i
= 0; i
< 5; i
++) {
9257 double swappability
;
9259 if (maxtries
) maxtries
--;
9260 de
= dictGetRandomKey(db
->dict
);
9261 key
= dictGetEntryKey(de
);
9262 val
= dictGetEntryVal(de
);
9263 /* Only swap objects that are currently in memory.
9265 * Also don't swap shared objects if threaded VM is on, as we
9266 * try to ensure that the main thread does not touch the
9267 * object while the I/O thread is using it, but we can't
9268 * control other keys without adding additional mutex. */
9269 if (key
->storage
!= REDIS_VM_MEMORY
||
9270 (server
.vm_max_threads
!= 0 && val
->refcount
!= 1)) {
9271 if (maxtries
) i
--; /* don't count this try */
9274 swappability
= computeObjectSwappability(val
);
9275 if (!best
|| swappability
> best_swappability
) {
9277 best_swappability
= swappability
;
9282 if (best
== NULL
) return REDIS_ERR
;
9283 key
= dictGetEntryKey(best
);
9284 val
= dictGetEntryVal(best
);
9286 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
9287 key
->ptr
, best_swappability
);
9289 /* Unshare the key if needed */
9290 if (key
->refcount
> 1) {
9291 robj
*newkey
= dupStringObject(key
);
9293 key
= dictGetEntryKey(best
) = newkey
;
9297 vmSwapObjectThreaded(key
,val
,best_db
);
9300 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
9301 dictGetEntryVal(best
) = NULL
;
9309 static int vmSwapOneObjectBlocking() {
9310 return vmSwapOneObject(0);
9313 static int vmSwapOneObjectThreaded() {
9314 return vmSwapOneObject(1);
9317 /* Return true if it's safe to swap out objects in a given moment.
9318 * Basically we don't want to swap objects out while there is a BGSAVE
9319 * or a BGAEOREWRITE running in backgroud. */
9320 static int vmCanSwapOut(void) {
9321 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
9324 /* Delete a key if swapped. Returns 1 if the key was found, was swapped
9325 * and was deleted. Otherwise 0 is returned. */
9326 static int deleteIfSwapped(redisDb
*db
, robj
*key
) {
9330 if ((de
= dictFind(db
->dict
,key
)) == NULL
) return 0;
9331 foundkey
= dictGetEntryKey(de
);
9332 if (foundkey
->storage
== REDIS_VM_MEMORY
) return 0;
9337 /* =================== Virtual Memory - Threaded I/O ======================= */
9339 static void freeIOJob(iojob
*j
) {
9340 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
9341 j
->type
== REDIS_IOJOB_DO_SWAP
||
9342 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
9343 decrRefCount(j
->val
);
9344 /* We don't decrRefCount the j->key field as we did't incremented
9345 * the count creating IO Jobs. This is because the key field here is
9346 * just used as an indentifier and if a key is removed the Job should
9347 * never be touched again. */
9351 /* Every time a thread finished a Job, it writes a byte into the write side
9352 * of an unix pipe in order to "awake" the main thread, and this function
9354 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
9358 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
9360 REDIS_NOTUSED(mask
);
9361 REDIS_NOTUSED(privdata
);
9363 /* For every byte we read in the read side of the pipe, there is one
9364 * I/O job completed to process. */
9365 while((retval
= read(fd
,buf
,1)) == 1) {
9369 struct dictEntry
*de
;
9371 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
9373 /* Get the processed element (the oldest one) */
9375 assert(listLength(server
.io_processed
) != 0);
9376 if (toprocess
== -1) {
9377 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
9378 if (toprocess
<= 0) toprocess
= 1;
9380 ln
= listFirst(server
.io_processed
);
9382 listDelNode(server
.io_processed
,ln
);
9384 /* If this job is marked as canceled, just ignore it */
9389 /* Post process it in the main thread, as there are things we
9390 * can do just here to avoid race conditions and/or invasive locks */
9391 redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
);
9392 de
= dictFind(j
->db
->dict
,j
->key
);
9394 key
= dictGetEntryKey(de
);
9395 if (j
->type
== REDIS_IOJOB_LOAD
) {
9398 /* Key loaded, bring it at home */
9399 key
->storage
= REDIS_VM_MEMORY
;
9400 key
->vm
.atime
= server
.unixtime
;
9401 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
9402 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
9403 (unsigned char*) key
->ptr
);
9404 server
.vm_stats_swapped_objects
--;
9405 server
.vm_stats_swapins
++;
9406 dictGetEntryVal(de
) = j
->val
;
9407 incrRefCount(j
->val
);
9410 /* Handle clients waiting for this key to be loaded. */
9411 handleClientsBlockedOnSwappedKey(db
,key
);
9412 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9413 /* Now we know the amount of pages required to swap this object.
9414 * Let's find some space for it, and queue this task again
9415 * rebranded as REDIS_IOJOB_DO_SWAP. */
9416 if (!vmCanSwapOut() ||
9417 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
9419 /* Ooops... no space or we can't swap as there is
9420 * a fork()ed Redis trying to save stuff on disk. */
9422 key
->storage
= REDIS_VM_MEMORY
; /* undo operation */
9424 /* Note that we need to mark this pages as used now,
9425 * if the job will be canceled, we'll mark them as freed
9427 vmMarkPagesUsed(j
->page
,j
->pages
);
9428 j
->type
= REDIS_IOJOB_DO_SWAP
;
9433 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9436 /* Key swapped. We can finally free some memory. */
9437 if (key
->storage
!= REDIS_VM_SWAPPING
) {
9438 printf("key->storage: %d\n",key
->storage
);
9439 printf("key->name: %s\n",(char*)key
->ptr
);
9440 printf("key->refcount: %d\n",key
->refcount
);
9441 printf("val: %p\n",(void*)j
->val
);
9442 printf("val->type: %d\n",j
->val
->type
);
9443 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
9445 redisAssert(key
->storage
== REDIS_VM_SWAPPING
);
9446 val
= dictGetEntryVal(de
);
9447 key
->vm
.page
= j
->page
;
9448 key
->vm
.usedpages
= j
->pages
;
9449 key
->storage
= REDIS_VM_SWAPPED
;
9450 key
->vtype
= j
->val
->type
;
9451 decrRefCount(val
); /* Deallocate the object from memory. */
9452 dictGetEntryVal(de
) = NULL
;
9453 redisLog(REDIS_DEBUG
,
9454 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
9455 (unsigned char*) key
->ptr
,
9456 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
9457 server
.vm_stats_swapped_objects
++;
9458 server
.vm_stats_swapouts
++;
9460 /* Put a few more swap requests in queue if we are still
9462 if (trytoswap
&& vmCanSwapOut() &&
9463 zmalloc_used_memory() > server
.vm_max_memory
)
9468 more
= listLength(server
.io_newjobs
) <
9469 (unsigned) server
.vm_max_threads
;
9471 /* Don't waste CPU time if swappable objects are rare. */
9472 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
9480 if (processed
== toprocess
) return;
9482 if (retval
< 0 && errno
!= EAGAIN
) {
9483 redisLog(REDIS_WARNING
,
9484 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
9489 static void lockThreadedIO(void) {
9490 pthread_mutex_lock(&server
.io_mutex
);
9493 static void unlockThreadedIO(void) {
9494 pthread_mutex_unlock(&server
.io_mutex
);
9497 /* Remove the specified object from the threaded I/O queue if still not
9498 * processed, otherwise make sure to flag it as canceled. */
9499 static void vmCancelThreadedIOJob(robj
*o
) {
9501 server
.io_newjobs
, /* 0 */
9502 server
.io_processing
, /* 1 */
9503 server
.io_processed
/* 2 */
9507 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
9510 /* Search for a matching key in one of the queues */
9511 for (i
= 0; i
< 3; i
++) {
9515 listRewind(lists
[i
],&li
);
9516 while ((ln
= listNext(&li
)) != NULL
) {
9517 iojob
*job
= ln
->value
;
9519 if (job
->canceled
) continue; /* Skip this, already canceled. */
9520 if (job
->key
== o
) {
9521 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n",
9522 (void*)job
, (char*)o
->ptr
, job
->type
, i
);
9523 /* Mark the pages as free since the swap didn't happened
9524 * or happened but is now discarded. */
9525 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
9526 vmMarkPagesFree(job
->page
,job
->pages
);
9527 /* Cancel the job. It depends on the list the job is
9530 case 0: /* io_newjobs */
9531 /* If the job was yet not processed the best thing to do
9532 * is to remove it from the queue at all */
9534 listDelNode(lists
[i
],ln
);
9536 case 1: /* io_processing */
9537 /* Oh Shi- the thread is messing with the Job:
9539 * Probably it's accessing the object if this is a
9540 * PREPARE_SWAP or DO_SWAP job.
9541 * If it's a LOAD job it may be reading from disk and
9542 * if we don't wait for the job to terminate before to
9543 * cancel it, maybe in a few microseconds data can be
9544 * corrupted in this pages. So the short story is:
9546 * Better to wait for the job to move into the
9547 * next queue (processed)... */
9549 /* We try again and again until the job is completed. */
9551 /* But let's wait some time for the I/O thread
9552 * to finish with this job. After all this condition
9553 * should be very rare. */
9556 case 2: /* io_processed */
9557 /* The job was already processed, that's easy...
9558 * just mark it as canceled so that we'll ignore it
9559 * when processing completed jobs. */
9563 /* Finally we have to adjust the storage type of the object
9564 * in order to "UNDO" the operaiton. */
9565 if (o
->storage
== REDIS_VM_LOADING
)
9566 o
->storage
= REDIS_VM_SWAPPED
;
9567 else if (o
->storage
== REDIS_VM_SWAPPING
)
9568 o
->storage
= REDIS_VM_MEMORY
;
9575 assert(1 != 1); /* We should never reach this */
9578 static void *IOThreadEntryPoint(void *arg
) {
9583 pthread_detach(pthread_self());
9585 /* Get a new job to process */
9587 if (listLength(server
.io_newjobs
) == 0) {
9588 /* No new jobs in queue, exit. */
9589 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
9590 (long) pthread_self());
9591 server
.io_active_threads
--;
9595 ln
= listFirst(server
.io_newjobs
);
9597 listDelNode(server
.io_newjobs
,ln
);
9598 /* Add the job in the processing queue */
9599 j
->thread
= pthread_self();
9600 listAddNodeTail(server
.io_processing
,j
);
9601 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
9603 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
9604 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
9606 /* Process the Job */
9607 if (j
->type
== REDIS_IOJOB_LOAD
) {
9608 j
->val
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
);
9609 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9610 FILE *fp
= fopen("/dev/null","w+");
9611 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
9613 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9614 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
9618 /* Done: insert the job into the processed queue */
9619 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
9620 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
9622 listDelNode(server
.io_processing
,ln
);
9623 listAddNodeTail(server
.io_processed
,j
);
9626 /* Signal the main thread there is new stuff to process */
9627 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
9629 return NULL
; /* never reached */
9632 static void spawnIOThread(void) {
9634 sigset_t mask
, omask
;
9638 sigaddset(&mask
,SIGCHLD
);
9639 sigaddset(&mask
,SIGHUP
);
9640 sigaddset(&mask
,SIGPIPE
);
9641 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
9642 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
9643 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
9647 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
9648 server
.io_active_threads
++;
9651 /* We need to wait for the last thread to exit before we are able to
9652 * fork() in order to BGSAVE or BGREWRITEAOF. */
9653 static void waitEmptyIOJobsQueue(void) {
9655 int io_processed_len
;
9658 if (listLength(server
.io_newjobs
) == 0 &&
9659 listLength(server
.io_processing
) == 0 &&
9660 server
.io_active_threads
== 0)
9665 /* While waiting for empty jobs queue condition we post-process some
9666 * finshed job, as I/O threads may be hanging trying to write against
9667 * the io_ready_pipe_write FD but there are so much pending jobs that
9669 io_processed_len
= listLength(server
.io_processed
);
9671 if (io_processed_len
) {
9672 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
9673 usleep(1000); /* 1 millisecond */
9675 usleep(10000); /* 10 milliseconds */
9680 static void vmReopenSwapFile(void) {
9681 /* Note: we don't close the old one as we are in the child process
9682 * and don't want to mess at all with the original file object. */
9683 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
9684 if (server
.vm_fp
== NULL
) {
9685 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
9686 server
.vm_swap_file
);
9689 server
.vm_fd
= fileno(server
.vm_fp
);
9692 /* This function must be called while with threaded IO locked */
9693 static void queueIOJob(iojob
*j
) {
9694 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
9695 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
9696 listAddNodeTail(server
.io_newjobs
,j
);
9697 if (server
.io_active_threads
< server
.vm_max_threads
)
9701 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
9704 assert(key
->storage
== REDIS_VM_MEMORY
);
9705 assert(key
->refcount
== 1);
9707 j
= zmalloc(sizeof(*j
));
9708 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
9714 j
->thread
= (pthread_t
) -1;
9715 key
->storage
= REDIS_VM_SWAPPING
;
9723 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
9725 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
9726 * If there is not already a job loading the key, it is craeted.
9727 * The key is added to the io_keys list in the client structure, and also
9728 * in the hash table mapping swapped keys to waiting clients, that is,
9729 * server.io_waited_keys. */
9730 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
9731 struct dictEntry
*de
;
9735 /* If the key does not exist or is already in RAM we don't need to
9736 * block the client at all. */
9737 de
= dictFind(c
->db
->dict
,key
);
9738 if (de
== NULL
) return 0;
9739 o
= dictGetEntryKey(de
);
9740 if (o
->storage
== REDIS_VM_MEMORY
) {
9742 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
9743 /* We were swapping the key, undo it! */
9744 vmCancelThreadedIOJob(o
);
9748 /* OK: the key is either swapped, or being loaded just now. */
9750 /* Add the key to the list of keys this client is waiting for.
9751 * This maps clients to keys they are waiting for. */
9752 listAddNodeTail(c
->io_keys
,key
);
9755 /* Add the client to the swapped keys => clients waiting map. */
9756 de
= dictFind(c
->db
->io_keys
,key
);
9760 /* For every key we take a list of clients blocked for it */
9762 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
9764 assert(retval
== DICT_OK
);
9766 l
= dictGetEntryVal(de
);
9768 listAddNodeTail(l
,c
);
9770 /* Are we already loading the key from disk? If not create a job */
9771 if (o
->storage
== REDIS_VM_SWAPPED
) {
9774 o
->storage
= REDIS_VM_LOADING
;
9775 j
= zmalloc(sizeof(*j
));
9776 j
->type
= REDIS_IOJOB_LOAD
;
9779 j
->key
->vtype
= o
->vtype
;
9780 j
->page
= o
->vm
.page
;
9783 j
->thread
= (pthread_t
) -1;
9791 /* Preload keys for any command with first, last and step values for
9792 * the command keys prototype, as defined in the command table. */
9793 static void waitForMultipleSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
9795 if (cmd
->vm_firstkey
== 0) return;
9796 last
= cmd
->vm_lastkey
;
9797 if (last
< 0) last
= argc
+last
;
9798 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
) {
9799 redisAssert(j
< argc
);
9800 waitForSwappedKey(c
,argv
[j
]);
9804 /* Preload keys needed for the ZUNIONSTORE and ZINTERSTORE commands.
9805 * Note that the number of keys to preload is user-defined, so we need to
9806 * apply a sanity check against argc. */
9807 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
9811 num
= atoi(argv
[2]->ptr
);
9812 if (num
> (argc
-3)) return;
9813 for (i
= 0; i
< num
; i
++) {
9814 waitForSwappedKey(c
,argv
[3+i
]);
9818 /* Preload keys needed to execute the entire MULTI/EXEC block.
9820 * This function is called by blockClientOnSwappedKeys when EXEC is issued,
9821 * and will block the client when any command requires a swapped out value. */
9822 static void execBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
9824 struct redisCommand
*mcmd
;
9827 REDIS_NOTUSED(argc
);
9828 REDIS_NOTUSED(argv
);
9830 if (!(c
->flags
& REDIS_MULTI
)) return;
9831 for (i
= 0; i
< c
->mstate
.count
; i
++) {
9832 mcmd
= c
->mstate
.commands
[i
].cmd
;
9833 margc
= c
->mstate
.commands
[i
].argc
;
9834 margv
= c
->mstate
.commands
[i
].argv
;
9836 if (mcmd
->vm_preload_proc
!= NULL
) {
9837 mcmd
->vm_preload_proc(c
,mcmd
,margc
,margv
);
9839 waitForMultipleSwappedKeys(c
,mcmd
,margc
,margv
);
9844 /* Is this client attempting to run a command against swapped keys?
9845 * If so, block it ASAP, load the keys in background, then resume it.
9847 * The important idea about this function is that it can fail! If keys will
9848 * still be swapped when the client is resumed, this key lookups will
9849 * just block loading keys from disk. In practical terms this should only
9850 * happen with SORT BY command or if there is a bug in this function.
9852 * Return 1 if the client is marked as blocked, 0 if the client can
9853 * continue as the keys it is going to access appear to be in memory. */
9854 static int blockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
) {
9855 if (cmd
->vm_preload_proc
!= NULL
) {
9856 cmd
->vm_preload_proc(c
,cmd
,c
->argc
,c
->argv
);
9858 waitForMultipleSwappedKeys(c
,cmd
,c
->argc
,c
->argv
);
9861 /* If the client was blocked for at least one key, mark it as blocked. */
9862 if (listLength(c
->io_keys
)) {
9863 c
->flags
|= REDIS_IO_WAIT
;
9864 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
9865 server
.vm_blocked_clients
++;
9872 /* Remove the 'key' from the list of blocked keys for a given client.
9874 * The function returns 1 when there are no longer blocking keys after
9875 * the current one was removed (and the client can be unblocked). */
9876 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
9880 struct dictEntry
*de
;
9882 /* Remove the key from the list of keys this client is waiting for. */
9883 listRewind(c
->io_keys
,&li
);
9884 while ((ln
= listNext(&li
)) != NULL
) {
9885 if (equalStringObjects(ln
->value
,key
)) {
9886 listDelNode(c
->io_keys
,ln
);
9892 /* Remove the client form the key => waiting clients map. */
9893 de
= dictFind(c
->db
->io_keys
,key
);
9895 l
= dictGetEntryVal(de
);
9896 ln
= listSearchKey(l
,c
);
9899 if (listLength(l
) == 0)
9900 dictDelete(c
->db
->io_keys
,key
);
9902 return listLength(c
->io_keys
) == 0;
9905 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
9906 struct dictEntry
*de
;
9911 de
= dictFind(db
->io_keys
,key
);
9914 l
= dictGetEntryVal(de
);
9915 len
= listLength(l
);
9916 /* Note: we can't use something like while(listLength(l)) as the list
9917 * can be freed by the calling function when we remove the last element. */
9920 redisClient
*c
= ln
->value
;
9922 if (dontWaitForSwappedKey(c
,key
)) {
9923 /* Put the client in the list of clients ready to go as we
9924 * loaded all the keys about it. */
9925 listAddNodeTail(server
.io_ready_clients
,c
);
9930 /* =========================== Remote Configuration ========================= */
9932 static void configSetCommand(redisClient
*c
) {
9933 robj
*o
= getDecodedObject(c
->argv
[3]);
9936 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
9937 zfree(server
.dbfilename
);
9938 server
.dbfilename
= zstrdup(o
->ptr
);
9939 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
9940 zfree(server
.requirepass
);
9941 server
.requirepass
= zstrdup(o
->ptr
);
9942 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
9943 zfree(server
.masterauth
);
9944 server
.masterauth
= zstrdup(o
->ptr
);
9945 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
9946 if (getLongLongFromObject(o
,&ll
) == REDIS_ERR
||
9947 ll
< 0) goto badfmt
;
9948 server
.maxmemory
= ll
;
9949 } else if (!strcasecmp(c
->argv
[2]->ptr
,"timeout")) {
9950 if (getLongLongFromObject(o
,&ll
) == REDIS_ERR
||
9951 ll
< 0 || ll
> LONG_MAX
) goto badfmt
;
9952 server
.maxidletime
= ll
;
9953 } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendfsync")) {
9954 if (!strcasecmp(o
->ptr
,"no")) {
9955 server
.appendfsync
= APPENDFSYNC_NO
;
9956 } else if (!strcasecmp(o
->ptr
,"everysec")) {
9957 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
9958 } else if (!strcasecmp(o
->ptr
,"always")) {
9959 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
9963 } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendonly")) {
9964 int old
= server
.appendonly
;
9965 int new = yesnotoi(o
->ptr
);
9967 if (new == -1) goto badfmt
;
9972 if (startAppendOnly() == REDIS_ERR
) {
9973 addReplySds(c
,sdscatprintf(sdsempty(),
9974 "-ERR Unable to turn on AOF. Check server logs.\r\n"));
9980 } else if (!strcasecmp(c
->argv
[2]->ptr
,"save")) {
9982 sds
*v
= sdssplitlen(o
->ptr
,sdslen(o
->ptr
)," ",1,&vlen
);
9984 /* Perform sanity check before setting the new config:
9985 * - Even number of args
9986 * - Seconds >= 1, changes >= 0 */
9988 sdsfreesplitres(v
,vlen
);
9991 for (j
= 0; j
< vlen
; j
++) {
9995 val
= strtoll(v
[j
], &eptr
, 10);
9996 if (eptr
[0] != '\0' ||
9997 ((j
& 1) == 0 && val
< 1) ||
9998 ((j
& 1) == 1 && val
< 0)) {
9999 sdsfreesplitres(v
,vlen
);
10003 /* Finally set the new config */
10004 resetServerSaveParams();
10005 for (j
= 0; j
< vlen
; j
+= 2) {
10009 seconds
= strtoll(v
[j
],NULL
,10);
10010 changes
= strtoll(v
[j
+1],NULL
,10);
10011 appendServerSaveParams(seconds
, changes
);
10013 sdsfreesplitres(v
,vlen
);
10015 addReplySds(c
,sdscatprintf(sdsempty(),
10016 "-ERR not supported CONFIG parameter %s\r\n",
10017 (char*)c
->argv
[2]->ptr
));
10022 addReply(c
,shared
.ok
);
10025 badfmt
: /* Bad format errors */
10026 addReplySds(c
,sdscatprintf(sdsempty(),
10027 "-ERR invalid argument '%s' for CONFIG SET '%s'\r\n",
10029 (char*)c
->argv
[2]->ptr
));
10033 static void configGetCommand(redisClient
*c
) {
10034 robj
*o
= getDecodedObject(c
->argv
[2]);
10035 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
10036 char *pattern
= o
->ptr
;
10039 addReply(c
,lenobj
);
10040 decrRefCount(lenobj
);
10042 if (stringmatch(pattern
,"dbfilename",0)) {
10043 addReplyBulkCString(c
,"dbfilename");
10044 addReplyBulkCString(c
,server
.dbfilename
);
10047 if (stringmatch(pattern
,"requirepass",0)) {
10048 addReplyBulkCString(c
,"requirepass");
10049 addReplyBulkCString(c
,server
.requirepass
);
10052 if (stringmatch(pattern
,"masterauth",0)) {
10053 addReplyBulkCString(c
,"masterauth");
10054 addReplyBulkCString(c
,server
.masterauth
);
10057 if (stringmatch(pattern
,"maxmemory",0)) {
10060 ll2string(buf
,128,server
.maxmemory
);
10061 addReplyBulkCString(c
,"maxmemory");
10062 addReplyBulkCString(c
,buf
);
10065 if (stringmatch(pattern
,"timeout",0)) {
10068 ll2string(buf
,128,server
.maxidletime
);
10069 addReplyBulkCString(c
,"timeout");
10070 addReplyBulkCString(c
,buf
);
10073 if (stringmatch(pattern
,"appendonly",0)) {
10074 addReplyBulkCString(c
,"appendonly");
10075 addReplyBulkCString(c
,server
.appendonly
? "yes" : "no");
10078 if (stringmatch(pattern
,"appendfsync",0)) {
10081 switch(server
.appendfsync
) {
10082 case APPENDFSYNC_NO
: policy
= "no"; break;
10083 case APPENDFSYNC_EVERYSEC
: policy
= "everysec"; break;
10084 case APPENDFSYNC_ALWAYS
: policy
= "always"; break;
10085 default: policy
= "unknown"; break; /* too harmless to panic */
10087 addReplyBulkCString(c
,"appendfsync");
10088 addReplyBulkCString(c
,policy
);
10091 if (stringmatch(pattern
,"save",0)) {
10092 sds buf
= sdsempty();
10095 for (j
= 0; j
< server
.saveparamslen
; j
++) {
10096 buf
= sdscatprintf(buf
,"%ld %d",
10097 server
.saveparams
[j
].seconds
,
10098 server
.saveparams
[j
].changes
);
10099 if (j
!= server
.saveparamslen
-1)
10100 buf
= sdscatlen(buf
," ",1);
10102 addReplyBulkCString(c
,"save");
10103 addReplyBulkCString(c
,buf
);
10108 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
10111 static void configCommand(redisClient
*c
) {
10112 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
10113 if (c
->argc
!= 4) goto badarity
;
10114 configSetCommand(c
);
10115 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
10116 if (c
->argc
!= 3) goto badarity
;
10117 configGetCommand(c
);
10118 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
10119 if (c
->argc
!= 2) goto badarity
;
10120 server
.stat_numcommands
= 0;
10121 server
.stat_numconnections
= 0;
10122 server
.stat_expiredkeys
= 0;
10123 server
.stat_starttime
= time(NULL
);
10124 addReply(c
,shared
.ok
);
10126 addReplySds(c
,sdscatprintf(sdsempty(),
10127 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
10132 addReplySds(c
,sdscatprintf(sdsempty(),
10133 "-ERR Wrong number of arguments for CONFIG %s\r\n",
10134 (char*) c
->argv
[1]->ptr
));
10137 /* =========================== Pubsub implementation ======================== */
10139 static void freePubsubPattern(void *p
) {
10140 pubsubPattern
*pat
= p
;
10142 decrRefCount(pat
->pattern
);
10146 static int listMatchPubsubPattern(void *a
, void *b
) {
10147 pubsubPattern
*pa
= a
, *pb
= b
;
10149 return (pa
->client
== pb
->client
) &&
10150 (equalStringObjects(pa
->pattern
,pb
->pattern
));
10153 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
10154 * 0 if the client was already subscribed to that channel. */
10155 static int pubsubSubscribeChannel(redisClient
*c
, robj
*channel
) {
10156 struct dictEntry
*de
;
10157 list
*clients
= NULL
;
10160 /* Add the channel to the client -> channels hash table */
10161 if (dictAdd(c
->pubsub_channels
,channel
,NULL
) == DICT_OK
) {
10163 incrRefCount(channel
);
10164 /* Add the client to the channel -> list of clients hash table */
10165 de
= dictFind(server
.pubsub_channels
,channel
);
10167 clients
= listCreate();
10168 dictAdd(server
.pubsub_channels
,channel
,clients
);
10169 incrRefCount(channel
);
10171 clients
= dictGetEntryVal(de
);
10173 listAddNodeTail(clients
,c
);
10175 /* Notify the client */
10176 addReply(c
,shared
.mbulk3
);
10177 addReply(c
,shared
.subscribebulk
);
10178 addReplyBulk(c
,channel
);
10179 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
10183 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
10184 * 0 if the client was not subscribed to the specified channel. */
10185 static int pubsubUnsubscribeChannel(redisClient
*c
, robj
*channel
, int notify
) {
10186 struct dictEntry
*de
;
10191 /* Remove the channel from the client -> channels hash table */
10192 incrRefCount(channel
); /* channel may be just a pointer to the same object
10193 we have in the hash tables. Protect it... */
10194 if (dictDelete(c
->pubsub_channels
,channel
) == DICT_OK
) {
10196 /* Remove the client from the channel -> clients list hash table */
10197 de
= dictFind(server
.pubsub_channels
,channel
);
10198 assert(de
!= NULL
);
10199 clients
= dictGetEntryVal(de
);
10200 ln
= listSearchKey(clients
,c
);
10201 assert(ln
!= NULL
);
10202 listDelNode(clients
,ln
);
10203 if (listLength(clients
) == 0) {
10204 /* Free the list and associated hash entry at all if this was
10205 * the latest client, so that it will be possible to abuse
10206 * Redis PUBSUB creating millions of channels. */
10207 dictDelete(server
.pubsub_channels
,channel
);
10210 /* Notify the client */
10212 addReply(c
,shared
.mbulk3
);
10213 addReply(c
,shared
.unsubscribebulk
);
10214 addReplyBulk(c
,channel
);
10215 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+
10216 listLength(c
->pubsub_patterns
));
10219 decrRefCount(channel
); /* it is finally safe to release it */
10223 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */
10224 static int pubsubSubscribePattern(redisClient
*c
, robj
*pattern
) {
10227 if (listSearchKey(c
->pubsub_patterns
,pattern
) == NULL
) {
10229 pubsubPattern
*pat
;
10230 listAddNodeTail(c
->pubsub_patterns
,pattern
);
10231 incrRefCount(pattern
);
10232 pat
= zmalloc(sizeof(*pat
));
10233 pat
->pattern
= getDecodedObject(pattern
);
10235 listAddNodeTail(server
.pubsub_patterns
,pat
);
10237 /* Notify the client */
10238 addReply(c
,shared
.mbulk3
);
10239 addReply(c
,shared
.psubscribebulk
);
10240 addReplyBulk(c
,pattern
);
10241 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
10245 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
10246 * 0 if the client was not subscribed to the specified channel. */
10247 static int pubsubUnsubscribePattern(redisClient
*c
, robj
*pattern
, int notify
) {
10252 incrRefCount(pattern
); /* Protect the object. May be the same we remove */
10253 if ((ln
= listSearchKey(c
->pubsub_patterns
,pattern
)) != NULL
) {
10255 listDelNode(c
->pubsub_patterns
,ln
);
10257 pat
.pattern
= pattern
;
10258 ln
= listSearchKey(server
.pubsub_patterns
,&pat
);
10259 listDelNode(server
.pubsub_patterns
,ln
);
10261 /* Notify the client */
10263 addReply(c
,shared
.mbulk3
);
10264 addReply(c
,shared
.punsubscribebulk
);
10265 addReplyBulk(c
,pattern
);
10266 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+
10267 listLength(c
->pubsub_patterns
));
10269 decrRefCount(pattern
);
10273 /* Unsubscribe from all the channels. Return the number of channels the
10274 * client was subscribed from. */
10275 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
) {
10276 dictIterator
*di
= dictGetIterator(c
->pubsub_channels
);
10280 while((de
= dictNext(di
)) != NULL
) {
10281 robj
*channel
= dictGetEntryKey(de
);
10283 count
+= pubsubUnsubscribeChannel(c
,channel
,notify
);
10285 dictReleaseIterator(di
);
10289 /* Unsubscribe from all the patterns. Return the number of patterns the
10290 * client was subscribed from. */
10291 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
) {
10296 listRewind(c
->pubsub_patterns
,&li
);
10297 while ((ln
= listNext(&li
)) != NULL
) {
10298 robj
*pattern
= ln
->value
;
10300 count
+= pubsubUnsubscribePattern(c
,pattern
,notify
);
10305 /* Publish a message */
10306 static int pubsubPublishMessage(robj
*channel
, robj
*message
) {
10308 struct dictEntry
*de
;
10312 /* Send to clients listening for that channel */
10313 de
= dictFind(server
.pubsub_channels
,channel
);
10315 list
*list
= dictGetEntryVal(de
);
10319 listRewind(list
,&li
);
10320 while ((ln
= listNext(&li
)) != NULL
) {
10321 redisClient
*c
= ln
->value
;
10323 addReply(c
,shared
.mbulk3
);
10324 addReply(c
,shared
.messagebulk
);
10325 addReplyBulk(c
,channel
);
10326 addReplyBulk(c
,message
);
10330 /* Send to clients listening to matching channels */
10331 if (listLength(server
.pubsub_patterns
)) {
10332 listRewind(server
.pubsub_patterns
,&li
);
10333 channel
= getDecodedObject(channel
);
10334 while ((ln
= listNext(&li
)) != NULL
) {
10335 pubsubPattern
*pat
= ln
->value
;
10337 if (stringmatchlen((char*)pat
->pattern
->ptr
,
10338 sdslen(pat
->pattern
->ptr
),
10339 (char*)channel
->ptr
,
10340 sdslen(channel
->ptr
),0)) {
10341 addReply(pat
->client
,shared
.mbulk4
);
10342 addReply(pat
->client
,shared
.pmessagebulk
);
10343 addReplyBulk(pat
->client
,pat
->pattern
);
10344 addReplyBulk(pat
->client
,channel
);
10345 addReplyBulk(pat
->client
,message
);
10349 decrRefCount(channel
);
10354 static void subscribeCommand(redisClient
*c
) {
10357 for (j
= 1; j
< c
->argc
; j
++)
10358 pubsubSubscribeChannel(c
,c
->argv
[j
]);
10361 static void unsubscribeCommand(redisClient
*c
) {
10362 if (c
->argc
== 1) {
10363 pubsubUnsubscribeAllChannels(c
,1);
10368 for (j
= 1; j
< c
->argc
; j
++)
10369 pubsubUnsubscribeChannel(c
,c
->argv
[j
],1);
10373 static void psubscribeCommand(redisClient
*c
) {
10376 for (j
= 1; j
< c
->argc
; j
++)
10377 pubsubSubscribePattern(c
,c
->argv
[j
]);
10380 static void punsubscribeCommand(redisClient
*c
) {
10381 if (c
->argc
== 1) {
10382 pubsubUnsubscribeAllPatterns(c
,1);
10387 for (j
= 1; j
< c
->argc
; j
++)
10388 pubsubUnsubscribePattern(c
,c
->argv
[j
],1);
10392 static void publishCommand(redisClient
*c
) {
10393 int receivers
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]);
10394 addReplyLongLong(c
,receivers
);
10397 /* ===================== WATCH (CAS alike for MULTI/EXEC) ===================
10399 * The implementation uses a per-DB hash table mapping keys to list of clients
10400 * WATCHing those keys, so that given a key that is going to be modified
10401 * we can mark all the associated clients as dirty.
10403 * Also every client contains a list of WATCHed keys so that's possible to
10404 * un-watch such keys when the client is freed or when UNWATCH is called. */
10406 /* In the client->watched_keys list we need to use watchedKey structures
10407 * as in order to identify a key in Redis we need both the key name and the
10409 typedef struct watchedKey
{
10414 /* Watch for the specified key */
10415 static void watchForKey(redisClient
*c
, robj
*key
) {
10416 list
*clients
= NULL
;
10421 /* Check if we are already watching for this key */
10422 listRewind(c
->watched_keys
,&li
);
10423 while((ln
= listNext(&li
))) {
10424 wk
= listNodeValue(ln
);
10425 if (wk
->db
== c
->db
&& equalStringObjects(key
,wk
->key
))
10426 return; /* Key already watched */
10428 /* This key is not already watched in this DB. Let's add it */
10429 clients
= dictFetchValue(c
->db
->watched_keys
,key
);
10431 clients
= listCreate();
10432 dictAdd(c
->db
->watched_keys
,key
,clients
);
10435 listAddNodeTail(clients
,c
);
10436 /* Add the new key to the lits of keys watched by this client */
10437 wk
= zmalloc(sizeof(*wk
));
10441 listAddNodeTail(c
->watched_keys
,wk
);
10444 /* Unwatch all the keys watched by this client. To clean the EXEC dirty
10445 * flag is up to the caller. */
10446 static void unwatchAllKeys(redisClient
*c
) {
10450 if (listLength(c
->watched_keys
) == 0) return;
10451 listRewind(c
->watched_keys
,&li
);
10452 while((ln
= listNext(&li
))) {
10456 /* Lookup the watched key -> clients list and remove the client
10458 wk
= listNodeValue(ln
);
10459 clients
= dictFetchValue(wk
->db
->watched_keys
, wk
->key
);
10460 assert(clients
!= NULL
);
10461 listDelNode(clients
,listSearchKey(clients
,c
));
10462 /* Kill the entry at all if this was the only client */
10463 if (listLength(clients
) == 0)
10464 dictDelete(wk
->db
->watched_keys
, wk
->key
);
10465 /* Remove this watched key from the client->watched list */
10466 listDelNode(c
->watched_keys
,ln
);
10467 decrRefCount(wk
->key
);
10472 /* "Touch" a key, so that if this key is being WATCHed by some client the
10473 * next EXEC will fail. */
10474 static void touchWatchedKey(redisDb
*db
, robj
*key
) {
10479 if (dictSize(db
->watched_keys
) == 0) return;
10480 clients
= dictFetchValue(db
->watched_keys
, key
);
10481 if (!clients
) return;
10483 /* Mark all the clients watching this key as REDIS_DIRTY_CAS */
10484 /* Check if we are already watching for this key */
10485 listRewind(clients
,&li
);
10486 while((ln
= listNext(&li
))) {
10487 redisClient
*c
= listNodeValue(ln
);
10489 c
->flags
|= REDIS_DIRTY_CAS
;
10493 /* On FLUSHDB or FLUSHALL all the watched keys that are present before the
10494 * flush but will be deleted as effect of the flushing operation should
10495 * be touched. "dbid" is the DB that's getting the flush. -1 if it is
10496 * a FLUSHALL operation (all the DBs flushed). */
10497 static void touchWatchedKeysOnFlush(int dbid
) {
10501 /* For every client, check all the waited keys */
10502 listRewind(server
.clients
,&li1
);
10503 while((ln
= listNext(&li1
))) {
10504 redisClient
*c
= listNodeValue(ln
);
10505 listRewind(c
->watched_keys
,&li2
);
10506 while((ln
= listNext(&li2
))) {
10507 watchedKey
*wk
= listNodeValue(ln
);
10509 /* For every watched key matching the specified DB, if the
10510 * key exists, mark the client as dirty, as the key will be
10512 if (dbid
== -1 || wk
->db
->id
== dbid
) {
10513 if (dictFind(wk
->db
->dict
, wk
->key
) != NULL
)
10514 c
->flags
|= REDIS_DIRTY_CAS
;
10520 static void watchCommand(redisClient
*c
) {
10523 if (c
->flags
& REDIS_MULTI
) {
10524 addReplySds(c
,sdsnew("-ERR WATCH inside MULTI is not allowed\r\n"));
10527 for (j
= 1; j
< c
->argc
; j
++)
10528 watchForKey(c
,c
->argv
[j
]);
10529 addReply(c
,shared
.ok
);
10532 static void unwatchCommand(redisClient
*c
) {
10534 c
->flags
&= (~REDIS_DIRTY_CAS
);
10535 addReply(c
,shared
.ok
);
10538 /* ================================= Debugging ============================== */
10540 /* Compute the sha1 of string at 's' with 'len' bytes long.
10541 * The SHA1 is then xored againt the string pointed by digest.
10542 * Since xor is commutative, this operation is used in order to
10543 * "add" digests relative to unordered elements.
10545 * So digest(a,b,c,d) will be the same of digest(b,a,c,d) */
10546 static void xorDigest(unsigned char *digest
, void *ptr
, size_t len
) {
10548 unsigned char hash
[20], *s
= ptr
;
10552 SHA1Update(&ctx
,s
,len
);
10553 SHA1Final(hash
,&ctx
);
10555 for (j
= 0; j
< 20; j
++)
10556 digest
[j
] ^= hash
[j
];
10559 static void xorObjectDigest(unsigned char *digest
, robj
*o
) {
10560 o
= getDecodedObject(o
);
10561 xorDigest(digest
,o
->ptr
,sdslen(o
->ptr
));
10565 /* This function instead of just computing the SHA1 and xoring it
10566 * against diget, also perform the digest of "digest" itself and
10567 * replace the old value with the new one.
10569 * So the final digest will be:
10571 * digest = SHA1(digest xor SHA1(data))
10573 * This function is used every time we want to preserve the order so
10574 * that digest(a,b,c,d) will be different than digest(b,c,d,a)
10576 * Also note that mixdigest("foo") followed by mixdigest("bar")
10577 * will lead to a different digest compared to "fo", "obar".
10579 static void mixDigest(unsigned char *digest
, void *ptr
, size_t len
) {
10583 xorDigest(digest
,s
,len
);
10585 SHA1Update(&ctx
,digest
,20);
10586 SHA1Final(digest
,&ctx
);
10589 static void mixObjectDigest(unsigned char *digest
, robj
*o
) {
10590 o
= getDecodedObject(o
);
10591 mixDigest(digest
,o
->ptr
,sdslen(o
->ptr
));
10595 /* Compute the dataset digest. Since keys, sets elements, hashes elements
10596 * are not ordered, we use a trick: every aggregate digest is the xor
10597 * of the digests of their elements. This way the order will not change
10598 * the result. For list instead we use a feedback entering the output digest
10599 * as input in order to ensure that a different ordered list will result in
10600 * a different digest. */
10601 static void computeDatasetDigest(unsigned char *final
) {
10602 unsigned char digest
[20];
10604 dictIterator
*di
= NULL
;
10609 memset(final
,0,20); /* Start with a clean result */
10611 for (j
= 0; j
< server
.dbnum
; j
++) {
10612 redisDb
*db
= server
.db
+j
;
10614 if (dictSize(db
->dict
) == 0) continue;
10615 di
= dictGetIterator(db
->dict
);
10617 /* hash the DB id, so the same dataset moved in a different
10618 * DB will lead to a different digest */
10620 mixDigest(final
,&aux
,sizeof(aux
));
10622 /* Iterate this DB writing every entry */
10623 while((de
= dictNext(di
)) != NULL
) {
10624 robj
*key
, *o
, *kcopy
;
10627 memset(digest
,0,20); /* This key-val digest */
10628 key
= dictGetEntryKey(de
);
10630 if (!server
.vm_enabled
) {
10631 mixObjectDigest(digest
,key
);
10632 o
= dictGetEntryVal(de
);
10634 /* Don't work with the key directly as when VM is active
10635 * this is unsafe: TODO: fix decrRefCount to check if the
10636 * count really reached 0 to avoid this mess */
10637 kcopy
= dupStringObject(key
);
10638 mixObjectDigest(digest
,kcopy
);
10639 o
= lookupKeyRead(db
,kcopy
);
10640 decrRefCount(kcopy
);
10642 aux
= htonl(o
->type
);
10643 mixDigest(digest
,&aux
,sizeof(aux
));
10644 expiretime
= getExpire(db
,key
);
10646 /* Save the key and associated value */
10647 if (o
->type
== REDIS_STRING
) {
10648 mixObjectDigest(digest
,o
);
10649 } else if (o
->type
== REDIS_LIST
) {
10650 list
*list
= o
->ptr
;
10654 listRewind(list
,&li
);
10655 while((ln
= listNext(&li
))) {
10656 robj
*eleobj
= listNodeValue(ln
);
10658 mixObjectDigest(digest
,eleobj
);
10660 } else if (o
->type
== REDIS_SET
) {
10661 dict
*set
= o
->ptr
;
10662 dictIterator
*di
= dictGetIterator(set
);
10665 while((de
= dictNext(di
)) != NULL
) {
10666 robj
*eleobj
= dictGetEntryKey(de
);
10668 xorObjectDigest(digest
,eleobj
);
10670 dictReleaseIterator(di
);
10671 } else if (o
->type
== REDIS_ZSET
) {
10673 dictIterator
*di
= dictGetIterator(zs
->dict
);
10676 while((de
= dictNext(di
)) != NULL
) {
10677 robj
*eleobj
= dictGetEntryKey(de
);
10678 double *score
= dictGetEntryVal(de
);
10679 unsigned char eledigest
[20];
10681 snprintf(buf
,sizeof(buf
),"%.17g",*score
);
10682 memset(eledigest
,0,20);
10683 mixObjectDigest(eledigest
,eleobj
);
10684 mixDigest(eledigest
,buf
,strlen(buf
));
10685 xorDigest(digest
,eledigest
,20);
10687 dictReleaseIterator(di
);
10688 } else if (o
->type
== REDIS_HASH
) {
10692 hi
= hashInitIterator(o
);
10693 while (hashNext(hi
) != REDIS_ERR
) {
10694 unsigned char eledigest
[20];
10696 memset(eledigest
,0,20);
10697 obj
= hashCurrent(hi
,REDIS_HASH_KEY
);
10698 mixObjectDigest(eledigest
,obj
);
10700 obj
= hashCurrent(hi
,REDIS_HASH_VALUE
);
10701 mixObjectDigest(eledigest
,obj
);
10703 xorDigest(digest
,eledigest
,20);
10705 hashReleaseIterator(hi
);
10707 redisPanic("Unknown object type");
10709 /* If the key has an expire, add it to the mix */
10710 if (expiretime
!= -1) xorDigest(digest
,"!!expire!!",10);
10711 /* We can finally xor the key-val digest to the final digest */
10712 xorDigest(final
,digest
,20);
10714 dictReleaseIterator(di
);
10718 static void debugCommand(redisClient
*c
) {
10719 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
10720 *((char*)-1) = 'x';
10721 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
10722 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
10723 addReply(c
,shared
.err
);
10727 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
10728 addReply(c
,shared
.err
);
10731 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
10732 addReply(c
,shared
.ok
);
10733 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
10735 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
10736 addReply(c
,shared
.err
);
10739 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
10740 addReply(c
,shared
.ok
);
10741 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
10742 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
10746 addReply(c
,shared
.nokeyerr
);
10749 key
= dictGetEntryKey(de
);
10750 val
= dictGetEntryVal(de
);
10751 if (!server
.vm_enabled
|| (key
->storage
== REDIS_VM_MEMORY
||
10752 key
->storage
== REDIS_VM_SWAPPING
)) {
10756 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
10757 strenc
= strencoding
[val
->encoding
];
10759 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
10762 addReplySds(c
,sdscatprintf(sdsempty(),
10763 "+Key at:%p refcount:%d, value at:%p refcount:%d "
10764 "encoding:%s serializedlength:%lld\r\n",
10765 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
,
10766 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
10768 addReplySds(c
,sdscatprintf(sdsempty(),
10769 "+Key at:%p refcount:%d, value swapped at: page %llu "
10770 "using %llu pages\r\n",
10771 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
,
10772 (unsigned long long) key
->vm
.usedpages
));
10774 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapin") && c
->argc
== 3) {
10775 lookupKeyRead(c
->db
,c
->argv
[2]);
10776 addReply(c
,shared
.ok
);
10777 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
10778 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
10781 if (!server
.vm_enabled
) {
10782 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
10786 addReply(c
,shared
.nokeyerr
);
10789 key
= dictGetEntryKey(de
);
10790 val
= dictGetEntryVal(de
);
10791 /* If the key is shared we want to create a copy */
10792 if (key
->refcount
> 1) {
10793 robj
*newkey
= dupStringObject(key
);
10795 key
= dictGetEntryKey(de
) = newkey
;
10798 if (key
->storage
!= REDIS_VM_MEMORY
) {
10799 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
10800 } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
10801 dictGetEntryVal(de
) = NULL
;
10802 addReply(c
,shared
.ok
);
10804 addReply(c
,shared
.err
);
10806 } else if (!strcasecmp(c
->argv
[1]->ptr
,"populate") && c
->argc
== 3) {
10811 if (getLongFromObjectOrReply(c
, c
->argv
[2], &keys
, NULL
) != REDIS_OK
)
10813 for (j
= 0; j
< keys
; j
++) {
10814 snprintf(buf
,sizeof(buf
),"key:%lu",j
);
10815 key
= createStringObject(buf
,strlen(buf
));
10816 if (lookupKeyRead(c
->db
,key
) != NULL
) {
10820 snprintf(buf
,sizeof(buf
),"value:%lu",j
);
10821 val
= createStringObject(buf
,strlen(buf
));
10822 dictAdd(c
->db
->dict
,key
,val
);
10824 addReply(c
,shared
.ok
);
10825 } else if (!strcasecmp(c
->argv
[1]->ptr
,"digest") && c
->argc
== 2) {
10826 unsigned char digest
[20];
10827 sds d
= sdsnew("+");
10830 computeDatasetDigest(digest
);
10831 for (j
= 0; j
< 20; j
++)
10832 d
= sdscatprintf(d
, "%02x",digest
[j
]);
10834 d
= sdscatlen(d
,"\r\n",2);
10837 addReplySds(c
,sdsnew(
10838 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n"));
10842 static void _redisAssert(char *estr
, char *file
, int line
) {
10843 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
10844 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true",file
,line
,estr
);
10845 #ifdef HAVE_BACKTRACE
10846 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
10847 *((char*)-1) = 'x';
10851 static void _redisPanic(char *msg
, char *file
, int line
) {
10852 redisLog(REDIS_WARNING
,"!!! Software Failure. Press left mouse button to continue");
10853 redisLog(REDIS_WARNING
,"Guru Meditation: %s #%s:%d",msg
,file
,line
);
10854 #ifdef HAVE_BACKTRACE
10855 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
10856 *((char*)-1) = 'x';
10860 /* =================================== Main! ================================ */
10863 int linuxOvercommitMemoryValue(void) {
10864 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
10867 if (!fp
) return -1;
10868 if (fgets(buf
,64,fp
) == NULL
) {
10877 void linuxOvercommitMemoryWarning(void) {
10878 if (linuxOvercommitMemoryValue() == 0) {
10879 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
10882 #endif /* __linux__ */
10884 static void daemonize(void) {
10888 if (fork() != 0) exit(0); /* parent exits */
10889 setsid(); /* create a new session */
10891 /* Every output goes to /dev/null. If Redis is daemonized but
10892 * the 'logfile' is set to 'stdout' in the configuration file
10893 * it will not log at all. */
10894 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
10895 dup2(fd
, STDIN_FILENO
);
10896 dup2(fd
, STDOUT_FILENO
);
10897 dup2(fd
, STDERR_FILENO
);
10898 if (fd
> STDERR_FILENO
) close(fd
);
10900 /* Try to write the pid file */
10901 fp
= fopen(server
.pidfile
,"w");
10903 fprintf(fp
,"%d\n",getpid());
10908 static void version() {
10909 printf("Redis server version %s (%s:%d)\n", REDIS_VERSION
,
10910 REDIS_GIT_SHA1
, atoi(REDIS_GIT_DIRTY
) > 0);
10914 static void usage() {
10915 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
10916 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
10920 int main(int argc
, char **argv
) {
10923 initServerConfig();
10925 if (strcmp(argv
[1], "-v") == 0 ||
10926 strcmp(argv
[1], "--version") == 0) version();
10927 if (strcmp(argv
[1], "--help") == 0) usage();
10928 resetServerSaveParams();
10929 loadServerConfig(argv
[1]);
10930 } else if ((argc
> 2)) {
10933 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
10935 if (server
.daemonize
) daemonize();
10937 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
10939 linuxOvercommitMemoryWarning();
10941 start
= time(NULL
);
10942 if (server
.appendonly
) {
10943 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
10944 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
10946 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
10947 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
10949 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
10950 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
10952 aeDeleteEventLoop(server
.el
);
10956 /* ============================= Backtrace support ========================= */
10958 #ifdef HAVE_BACKTRACE
10959 static char *findFuncName(void *pointer
, unsigned long *offset
);
10961 static void *getMcontextEip(ucontext_t
*uc
) {
10962 #if defined(__FreeBSD__)
10963 return (void*) uc
->uc_mcontext
.mc_eip
;
10964 #elif defined(__dietlibc__)
10965 return (void*) uc
->uc_mcontext
.eip
;
10966 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
10968 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
10970 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
10972 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
10973 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
10974 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
10976 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
10978 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
10979 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
10980 #elif defined(__ia64__) /* Linux IA64 */
10981 return (void*) uc
->uc_mcontext
.sc_ip
;
10987 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
10989 char **messages
= NULL
;
10990 int i
, trace_size
= 0;
10991 unsigned long offset
=0;
10992 ucontext_t
*uc
= (ucontext_t
*) secret
;
10994 REDIS_NOTUSED(info
);
10996 redisLog(REDIS_WARNING
,
10997 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
10998 infostring
= genRedisInfoString();
10999 redisLog(REDIS_WARNING
, "%s",infostring
);
11000 /* It's not safe to sdsfree() the returned string under memory
11001 * corruption conditions. Let it leak as we are going to abort */
11003 trace_size
= backtrace(trace
, 100);
11004 /* overwrite sigaction with caller's address */
11005 if (getMcontextEip(uc
) != NULL
) {
11006 trace
[1] = getMcontextEip(uc
);
11008 messages
= backtrace_symbols(trace
, trace_size
);
11010 for (i
=1; i
<trace_size
; ++i
) {
11011 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
11013 p
= strchr(messages
[i
],'+');
11014 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
11015 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
11017 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
11020 /* free(messages); Don't call free() with possibly corrupted memory. */
11024 static void sigtermHandler(int sig
) {
11025 REDIS_NOTUSED(sig
);
11027 redisLog(REDIS_WARNING
,"SIGTERM received, scheduling shutting down...");
11028 server
.shutdown_asap
= 1;
11031 static void setupSigSegvAction(void) {
11032 struct sigaction act
;
11034 sigemptyset (&act
.sa_mask
);
11035 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
11036 * is used. Otherwise, sa_handler is used */
11037 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
11038 act
.sa_sigaction
= segvHandler
;
11039 sigaction (SIGSEGV
, &act
, NULL
);
11040 sigaction (SIGBUS
, &act
, NULL
);
11041 sigaction (SIGFPE
, &act
, NULL
);
11042 sigaction (SIGILL
, &act
, NULL
);
11043 sigaction (SIGBUS
, &act
, NULL
);
11045 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
;
11046 act
.sa_handler
= sigtermHandler
;
11047 sigaction (SIGTERM
, &act
, NULL
);
11051 #include "staticsymbols.h"
11052 /* This function try to convert a pointer into a function name. It's used in
11053 * oreder to provide a backtrace under segmentation fault that's able to
11054 * display functions declared as static (otherwise the backtrace is useless). */
11055 static char *findFuncName(void *pointer
, unsigned long *offset
){
11057 unsigned long off
, minoff
= 0;
11059 /* Try to match against the Symbol with the smallest offset */
11060 for (i
=0; symsTable
[i
].pointer
; i
++) {
11061 unsigned long lp
= (unsigned long) pointer
;
11063 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
11064 off
=lp
-symsTable
[i
].pointer
;
11065 if (ret
< 0 || off
< minoff
) {
11071 if (ret
== -1) return NULL
;
11073 return symsTable
[ret
].name
;
11075 #else /* HAVE_BACKTRACE */
11076 static void setupSigSegvAction(void) {
11078 #endif /* HAVE_BACKTRACE */