2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "2.1.1"
45 #endif /* HAVE_BACKTRACE */
53 #include <arpa/inet.h>
57 #include <sys/resource.h>
65 #include "solarisfixes.h"
69 #include "ae.h" /* Event driven programming library */
70 #include "sds.h" /* Dynamic safe strings */
71 #include "anet.h" /* Networking the easy way */
72 #include "dict.h" /* Hash tables */
73 #include "adlist.h" /* Linked lists */
74 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
75 #include "lzf.h" /* LZF compression library */
76 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
77 #include "zipmap.h" /* Compact dictionary-alike data structure */
78 #include "sha1.h" /* SHA1 is used for DEBUG DIGEST */
79 #include "release.h" /* Release and/or git repository information */
85 /* Static server configuration */
86 #define REDIS_SERVERPORT 6379 /* TCP port */
87 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
88 #define REDIS_IOBUF_LEN 1024
89 #define REDIS_LOADBUF_LEN 1024
90 #define REDIS_STATIC_ARGS 8
91 #define REDIS_DEFAULT_DBNUM 16
92 #define REDIS_CONFIGLINE_MAX 1024
93 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
94 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
95 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* lookup 10 expires per loop */
96 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
97 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
99 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
100 #define REDIS_WRITEV_THRESHOLD 3
101 /* Max number of iovecs used for each writev call */
102 #define REDIS_WRITEV_IOVEC_COUNT 256
104 /* Hash table parameters */
105 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
108 #define REDIS_CMD_BULK 1 /* Bulk write command */
109 #define REDIS_CMD_INLINE 2 /* Inline command */
110 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
111 this flags will return an error when the 'maxmemory' option is set in the
112 config file and the server is using more than maxmemory bytes of memory.
113 In short this commands are denied on low memory conditions. */
114 #define REDIS_CMD_DENYOOM 4
115 #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */
118 #define REDIS_STRING 0
124 /* Objects encoding. Some kind of objects like Strings and Hashes can be
125 * internally represented in multiple ways. The 'encoding' field of the object
126 * is set to one of this fields for this object. */
127 #define REDIS_ENCODING_RAW 0 /* Raw representation */
128 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
129 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
130 #define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
132 static char* strencoding
[] = {
133 "raw", "int", "zipmap", "hashtable"
136 /* Object types only used for dumping to disk */
137 #define REDIS_EXPIRETIME 253
138 #define REDIS_SELECTDB 254
139 #define REDIS_EOF 255
141 /* Defines related to the dump file format. To store 32 bits lengths for short
142 * keys requires a lot of space, so we check the most significant 2 bits of
143 * the first byte to interpreter the length:
145 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
146 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
147 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
148 * 11|000000 this means: specially encoded object will follow. The six bits
149 * number specify the kind of object that follows.
150 * See the REDIS_RDB_ENC_* defines.
152 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
153 * values, will fit inside. */
154 #define REDIS_RDB_6BITLEN 0
155 #define REDIS_RDB_14BITLEN 1
156 #define REDIS_RDB_32BITLEN 2
157 #define REDIS_RDB_ENCVAL 3
158 #define REDIS_RDB_LENERR UINT_MAX
160 /* When a length of a string object stored on disk has the first two bits
161 * set, the remaining two bits specify a special encoding for the object
162 * accordingly to the following defines: */
163 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
164 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
165 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
166 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
168 /* Virtual memory object->where field. */
169 #define REDIS_VM_MEMORY 0 /* The object is on memory */
170 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
171 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
172 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
174 /* Virtual memory static configuration stuff.
175 * Check vmFindContiguousPages() to know more about this magic numbers. */
176 #define REDIS_VM_MAX_NEAR_PAGES 65536
177 #define REDIS_VM_MAX_RANDOM_JUMP 4096
178 #define REDIS_VM_MAX_THREADS 32
179 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
180 /* The following is the *percentage* of completed I/O jobs to process when the
181 * handelr is called. While Virtual Memory I/O operations are performed by
182 * threads, this operations must be processed by the main thread when completed
183 * in order to take effect. */
184 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
187 #define REDIS_SLAVE 1 /* This client is a slave server */
188 #define REDIS_MASTER 2 /* This client is a master server */
189 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
190 #define REDIS_MULTI 8 /* This client is in a MULTI context */
191 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
192 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
193 #define REDIS_DIRTY_CAS 64 /* Watched keys modified. EXEC will fail. */
195 /* Slave replication state - slave side */
196 #define REDIS_REPL_NONE 0 /* No active replication */
197 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
198 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
200 /* Slave replication state - from the point of view of master
201 * Note that in SEND_BULK and ONLINE state the slave receives new updates
202 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
203 * to start the next background saving in order to send updates to it. */
204 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
205 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
206 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
207 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
209 /* List related stuff */
213 /* Sort operations */
214 #define REDIS_SORT_GET 0
215 #define REDIS_SORT_ASC 1
216 #define REDIS_SORT_DESC 2
217 #define REDIS_SORTKEY_MAX 1024
220 #define REDIS_DEBUG 0
221 #define REDIS_VERBOSE 1
222 #define REDIS_NOTICE 2
223 #define REDIS_WARNING 3
225 /* Anti-warning macro... */
226 #define REDIS_NOTUSED(V) ((void) V)
228 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
229 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
231 /* Append only defines */
232 #define APPENDFSYNC_NO 0
233 #define APPENDFSYNC_ALWAYS 1
234 #define APPENDFSYNC_EVERYSEC 2
236 /* Hashes related defaults */
237 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
238 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
240 /* We can print the stacktrace, so our assert is defined this way: */
241 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
242 #define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1)
243 static void _redisAssert(char *estr
, char *file
, int line
);
244 static void _redisPanic(char *msg
, char *file
, int line
);
246 /*================================= Data types ============================== */
248 /* A redis object, that is a type able to hold a string / list / set */
250 /* The VM object structure */
251 struct redisObjectVM
{
252 off_t page
; /* the page at witch the object is stored on disk */
253 off_t usedpages
; /* number of pages used on disk */
254 time_t atime
; /* Last access time */
257 /* The actual Redis Object */
258 typedef struct redisObject
{
261 unsigned char encoding
;
262 unsigned char storage
; /* If this object is a key, where is the value?
263 * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
264 unsigned char vtype
; /* If this object is a key, and value is swapped out,
265 * this is the type of the swapped out object. */
267 /* VM fields, this are only allocated if VM is active, otherwise the
268 * object allocation function will just allocate
269 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
270 * Redis without VM active will not have any overhead. */
271 struct redisObjectVM vm
;
274 /* Macro used to initalize a Redis object allocated on the stack.
275 * Note that this macro is taken near the structure definition to make sure
276 * we'll update it when the structure is changed, to avoid bugs like
277 * bug #85 introduced exactly in this way. */
278 #define initStaticStringObject(_var,_ptr) do { \
280 _var.type = REDIS_STRING; \
281 _var.encoding = REDIS_ENCODING_RAW; \
283 if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \
286 typedef struct redisDb
{
287 dict
*dict
; /* The keyspace for this DB */
288 dict
*expires
; /* Timeout of keys with a timeout set */
289 dict
*blocking_keys
; /* Keys with clients waiting for data (BLPOP) */
290 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
291 dict
*watched_keys
; /* WATCHED keys for MULTI/EXEC CAS */
295 /* Client MULTI/EXEC state */
296 typedef struct multiCmd
{
299 struct redisCommand
*cmd
;
302 typedef struct multiState
{
303 multiCmd
*commands
; /* Array of MULTI commands */
304 int count
; /* Total number of MULTI commands */
307 /* With multiplexing we need to take per-clinet state.
308 * Clients are taken in a liked list. */
309 typedef struct redisClient
{
314 robj
**argv
, **mbargv
;
316 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
317 int multibulk
; /* multi bulk command format active */
320 time_t lastinteraction
; /* time of the last interaction, used for timeout */
321 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
322 int slaveseldb
; /* slave selected db, if this client is a slave */
323 int authenticated
; /* when requirepass is non-NULL */
324 int replstate
; /* replication state if this is a slave */
325 int repldbfd
; /* replication DB file descriptor */
326 long repldboff
; /* replication DB file offset */
327 off_t repldbsize
; /* replication DB file size */
328 multiState mstate
; /* MULTI/EXEC state */
329 robj
**blocking_keys
; /* The key we are waiting to terminate a blocking
330 * operation such as BLPOP. Otherwise NULL. */
331 int blocking_keys_num
; /* Number of blocking keys */
332 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
333 * is >= blockingto then the operation timed out. */
334 list
*io_keys
; /* Keys this client is waiting to be loaded from the
335 * swap file in order to continue. */
336 list
*watched_keys
; /* Keys WATCHED for MULTI/EXEC CAS */
337 dict
*pubsub_channels
; /* channels a client is interested in (SUBSCRIBE) */
338 list
*pubsub_patterns
; /* patterns a client is interested in (SUBSCRIBE) */
346 /* Global server state structure */
351 long long dirty
; /* changes to DB from the last save */
353 list
*slaves
, *monitors
;
354 char neterr
[ANET_ERR_LEN
];
356 int cronloops
; /* number of times the cron function run */
357 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
358 time_t lastsave
; /* Unix time of last save succeeede */
359 /* Fields used only for stats */
360 time_t stat_starttime
; /* server start time */
361 long long stat_numcommands
; /* number of processed commands */
362 long long stat_numconnections
; /* number of connections received */
363 long long stat_expiredkeys
; /* number of expired keys */
377 pid_t bgsavechildpid
;
378 pid_t bgrewritechildpid
;
379 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
380 sds aofbuf
; /* AOF buffer, written before entering the event loop */
381 struct saveparam
*saveparams
;
386 char *appendfilename
;
390 /* Replication related */
395 redisClient
*master
; /* client that is master for this slave */
397 unsigned int maxclients
;
398 unsigned long long maxmemory
;
399 unsigned int blpop_blocked_clients
;
400 unsigned int vm_blocked_clients
;
401 /* Sort parameters - qsort_r() is only available under BSD so we
402 * have to take this state global, in order to pass it to sortCompare() */
406 /* Virtual memory configuration */
411 unsigned long long vm_max_memory
;
413 size_t hash_max_zipmap_entries
;
414 size_t hash_max_zipmap_value
;
415 /* Virtual memory state */
418 off_t vm_next_page
; /* Next probably empty page */
419 off_t vm_near_pages
; /* Number of pages allocated sequentially */
420 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
421 time_t unixtime
; /* Unix time sampled every second. */
422 /* Virtual memory I/O threads stuff */
423 /* An I/O thread process an element taken from the io_jobs queue and
424 * put the result of the operation in the io_done list. While the
425 * job is being processed, it's put on io_processing queue. */
426 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
427 list
*io_processing
; /* List of VM I/O jobs being processed */
428 list
*io_processed
; /* List of VM I/O jobs already processed */
429 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
430 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
431 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
432 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
433 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
434 int io_active_threads
; /* Number of running I/O threads */
435 int vm_max_threads
; /* Max number of I/O threads running at the same time */
436 /* Our main thread is blocked on the event loop, locking for sockets ready
437 * to be read or written, so when a threaded I/O operation is ready to be
438 * processed by the main thread, the I/O thread will use a unix pipe to
439 * awake the main thread. The followings are the two pipe FDs. */
440 int io_ready_pipe_read
;
441 int io_ready_pipe_write
;
442 /* Virtual memory stats */
443 unsigned long long vm_stats_used_pages
;
444 unsigned long long vm_stats_swapped_objects
;
445 unsigned long long vm_stats_swapouts
;
446 unsigned long long vm_stats_swapins
;
448 dict
*pubsub_channels
; /* Map channels to list of subscribed clients */
449 list
*pubsub_patterns
; /* A list of pubsub_patterns */
454 typedef struct pubsubPattern
{
459 typedef void redisCommandProc(redisClient
*c
);
460 typedef void redisVmPreloadProc(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
461 struct redisCommand
{
463 redisCommandProc
*proc
;
466 /* Use a function to determine which keys need to be loaded
467 * in the background prior to executing this command. Takes precedence
468 * over vm_firstkey and others, ignored when NULL */
469 redisVmPreloadProc
*vm_preload_proc
;
470 /* What keys should be loaded in background when calling this command? */
471 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
472 int vm_lastkey
; /* THe last argument that's a key */
473 int vm_keystep
; /* The step between first and last key */
476 struct redisFunctionSym
{
478 unsigned long pointer
;
481 typedef struct _redisSortObject
{
489 typedef struct _redisSortOperation
{
492 } redisSortOperation
;
494 /* ZSETs use a specialized version of Skiplists */
496 typedef struct zskiplistNode
{
497 struct zskiplistNode
**forward
;
498 struct zskiplistNode
*backward
;
504 typedef struct zskiplist
{
505 struct zskiplistNode
*header
, *tail
;
506 unsigned long length
;
510 typedef struct zset
{
515 /* Our shared "common" objects */
517 #define REDIS_SHARED_INTEGERS 10000
518 struct sharedObjectsStruct
{
519 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
520 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
521 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
522 *outofrangeerr
, *plus
,
523 *select0
, *select1
, *select2
, *select3
, *select4
,
524 *select5
, *select6
, *select7
, *select8
, *select9
,
525 *messagebulk
, *pmessagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
,
526 *mbulk4
, *psubscribebulk
, *punsubscribebulk
,
527 *integers
[REDIS_SHARED_INTEGERS
];
530 /* Global vars that are actally used as constants. The following double
531 * values are used for double on-disk serialization, and are initialized
532 * at runtime to avoid strange compiler optimizations. */
534 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
536 /* VM threaded I/O request message */
537 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
538 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
539 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
540 typedef struct iojob
{
541 int type
; /* Request type, REDIS_IOJOB_* */
542 redisDb
*db
;/* Redis database */
543 robj
*key
; /* This I/O request is about swapping this key */
544 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
545 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
546 off_t page
; /* Swap page where to read/write the object */
547 off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */
548 int canceled
; /* True if this command was canceled by blocking side of VM */
549 pthread_t thread
; /* ID of the thread processing this entry */
552 /*================================ Prototypes =============================== */
554 static void freeStringObject(robj
*o
);
555 static void freeListObject(robj
*o
);
556 static void freeSetObject(robj
*o
);
557 static void decrRefCount(void *o
);
558 static robj
*createObject(int type
, void *ptr
);
559 static void freeClient(redisClient
*c
);
560 static int rdbLoad(char *filename
);
561 static void addReply(redisClient
*c
, robj
*obj
);
562 static void addReplySds(redisClient
*c
, sds s
);
563 static void incrRefCount(robj
*o
);
564 static int rdbSaveBackground(char *filename
);
565 static robj
*createStringObject(char *ptr
, size_t len
);
566 static robj
*dupStringObject(robj
*o
);
567 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
);
568 static void replicationFeedMonitors(list
*monitors
, int dictid
, robj
**argv
, int argc
);
569 static void flushAppendOnlyFile(void);
570 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
571 static int syncWithMaster(void);
572 static robj
*tryObjectEncoding(robj
*o
);
573 static robj
*getDecodedObject(robj
*o
);
574 static int removeExpire(redisDb
*db
, robj
*key
);
575 static int expireIfNeeded(redisDb
*db
, robj
*key
);
576 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
577 static int deleteIfSwapped(redisDb
*db
, robj
*key
);
578 static int deleteKey(redisDb
*db
, robj
*key
);
579 static time_t getExpire(redisDb
*db
, robj
*key
);
580 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
581 static void updateSlavesWaitingBgsave(int bgsaveerr
);
582 static void freeMemoryIfNeeded(void);
583 static int processCommand(redisClient
*c
);
584 static void setupSigSegvAction(void);
585 static void rdbRemoveTempFile(pid_t childpid
);
586 static void aofRemoveTempFile(pid_t childpid
);
587 static size_t stringObjectLen(robj
*o
);
588 static void processInputBuffer(redisClient
*c
);
589 static zskiplist
*zslCreate(void);
590 static void zslFree(zskiplist
*zsl
);
591 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
592 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
593 static void initClientMultiState(redisClient
*c
);
594 static void freeClientMultiState(redisClient
*c
);
595 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
596 static void unblockClientWaitingData(redisClient
*c
);
597 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
598 static void vmInit(void);
599 static void vmMarkPagesFree(off_t page
, off_t count
);
600 static robj
*vmLoadObject(robj
*key
);
601 static robj
*vmPreviewObject(robj
*key
);
602 static int vmSwapOneObjectBlocking(void);
603 static int vmSwapOneObjectThreaded(void);
604 static int vmCanSwapOut(void);
605 static int tryFreeOneObjectFromFreelist(void);
606 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
607 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
608 static void vmCancelThreadedIOJob(robj
*o
);
609 static void lockThreadedIO(void);
610 static void unlockThreadedIO(void);
611 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
612 static void freeIOJob(iojob
*j
);
613 static void queueIOJob(iojob
*j
);
614 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
615 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
616 static void waitEmptyIOJobsQueue(void);
617 static void vmReopenSwapFile(void);
618 static int vmFreePage(off_t page
);
619 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
620 static void execBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
621 static int blockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
);
622 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
623 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
624 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
625 static struct redisCommand
*lookupCommand(char *name
);
626 static void call(redisClient
*c
, struct redisCommand
*cmd
);
627 static void resetClient(redisClient
*c
);
628 static void convertToRealHash(robj
*o
);
629 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
);
630 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
);
631 static void freePubsubPattern(void *p
);
632 static int listMatchPubsubPattern(void *a
, void *b
);
633 static int compareStringObjects(robj
*a
, robj
*b
);
634 static int equalStringObjects(robj
*a
, robj
*b
);
636 static int rewriteAppendOnlyFileBackground(void);
637 static int vmSwapObjectBlocking(robj
*key
, robj
*val
);
638 static int prepareForShutdown();
639 static void touchWatchedKey(redisDb
*db
, robj
*key
);
640 static void touchWatchedKeysOnFlush(int dbid
);
641 static void unwatchAllKeys(redisClient
*c
);
643 static void authCommand(redisClient
*c
);
644 static void pingCommand(redisClient
*c
);
645 static void echoCommand(redisClient
*c
);
646 static void setCommand(redisClient
*c
);
647 static void setnxCommand(redisClient
*c
);
648 static void setexCommand(redisClient
*c
);
649 static void getCommand(redisClient
*c
);
650 static void delCommand(redisClient
*c
);
651 static void existsCommand(redisClient
*c
);
652 static void incrCommand(redisClient
*c
);
653 static void decrCommand(redisClient
*c
);
654 static void incrbyCommand(redisClient
*c
);
655 static void decrbyCommand(redisClient
*c
);
656 static void selectCommand(redisClient
*c
);
657 static void randomkeyCommand(redisClient
*c
);
658 static void keysCommand(redisClient
*c
);
659 static void dbsizeCommand(redisClient
*c
);
660 static void lastsaveCommand(redisClient
*c
);
661 static void saveCommand(redisClient
*c
);
662 static void bgsaveCommand(redisClient
*c
);
663 static void bgrewriteaofCommand(redisClient
*c
);
664 static void shutdownCommand(redisClient
*c
);
665 static void moveCommand(redisClient
*c
);
666 static void renameCommand(redisClient
*c
);
667 static void renamenxCommand(redisClient
*c
);
668 static void lpushCommand(redisClient
*c
);
669 static void rpushCommand(redisClient
*c
);
670 static void lpopCommand(redisClient
*c
);
671 static void rpopCommand(redisClient
*c
);
672 static void llenCommand(redisClient
*c
);
673 static void lindexCommand(redisClient
*c
);
674 static void lrangeCommand(redisClient
*c
);
675 static void ltrimCommand(redisClient
*c
);
676 static void typeCommand(redisClient
*c
);
677 static void lsetCommand(redisClient
*c
);
678 static void saddCommand(redisClient
*c
);
679 static void sremCommand(redisClient
*c
);
680 static void smoveCommand(redisClient
*c
);
681 static void sismemberCommand(redisClient
*c
);
682 static void scardCommand(redisClient
*c
);
683 static void spopCommand(redisClient
*c
);
684 static void srandmemberCommand(redisClient
*c
);
685 static void sinterCommand(redisClient
*c
);
686 static void sinterstoreCommand(redisClient
*c
);
687 static void sunionCommand(redisClient
*c
);
688 static void sunionstoreCommand(redisClient
*c
);
689 static void sdiffCommand(redisClient
*c
);
690 static void sdiffstoreCommand(redisClient
*c
);
691 static void syncCommand(redisClient
*c
);
692 static void flushdbCommand(redisClient
*c
);
693 static void flushallCommand(redisClient
*c
);
694 static void sortCommand(redisClient
*c
);
695 static void lremCommand(redisClient
*c
);
696 static void rpoplpushcommand(redisClient
*c
);
697 static void infoCommand(redisClient
*c
);
698 static void mgetCommand(redisClient
*c
);
699 static void monitorCommand(redisClient
*c
);
700 static void expireCommand(redisClient
*c
);
701 static void expireatCommand(redisClient
*c
);
702 static void getsetCommand(redisClient
*c
);
703 static void ttlCommand(redisClient
*c
);
704 static void slaveofCommand(redisClient
*c
);
705 static void debugCommand(redisClient
*c
);
706 static void msetCommand(redisClient
*c
);
707 static void msetnxCommand(redisClient
*c
);
708 static void zaddCommand(redisClient
*c
);
709 static void zincrbyCommand(redisClient
*c
);
710 static void zrangeCommand(redisClient
*c
);
711 static void zrangebyscoreCommand(redisClient
*c
);
712 static void zcountCommand(redisClient
*c
);
713 static void zrevrangeCommand(redisClient
*c
);
714 static void zcardCommand(redisClient
*c
);
715 static void zremCommand(redisClient
*c
);
716 static void zscoreCommand(redisClient
*c
);
717 static void zremrangebyscoreCommand(redisClient
*c
);
718 static void multiCommand(redisClient
*c
);
719 static void execCommand(redisClient
*c
);
720 static void discardCommand(redisClient
*c
);
721 static void blpopCommand(redisClient
*c
);
722 static void brpopCommand(redisClient
*c
);
723 static void appendCommand(redisClient
*c
);
724 static void substrCommand(redisClient
*c
);
725 static void zrankCommand(redisClient
*c
);
726 static void zrevrankCommand(redisClient
*c
);
727 static void hsetCommand(redisClient
*c
);
728 static void hsetnxCommand(redisClient
*c
);
729 static void hgetCommand(redisClient
*c
);
730 static void hmsetCommand(redisClient
*c
);
731 static void hmgetCommand(redisClient
*c
);
732 static void hdelCommand(redisClient
*c
);
733 static void hlenCommand(redisClient
*c
);
734 static void zremrangebyrankCommand(redisClient
*c
);
735 static void zunionstoreCommand(redisClient
*c
);
736 static void zinterstoreCommand(redisClient
*c
);
737 static void hkeysCommand(redisClient
*c
);
738 static void hvalsCommand(redisClient
*c
);
739 static void hgetallCommand(redisClient
*c
);
740 static void hexistsCommand(redisClient
*c
);
741 static void configCommand(redisClient
*c
);
742 static void hincrbyCommand(redisClient
*c
);
743 static void subscribeCommand(redisClient
*c
);
744 static void unsubscribeCommand(redisClient
*c
);
745 static void psubscribeCommand(redisClient
*c
);
746 static void punsubscribeCommand(redisClient
*c
);
747 static void publishCommand(redisClient
*c
);
748 static void watchCommand(redisClient
*c
);
749 static void unwatchCommand(redisClient
*c
);
751 /*================================= Globals ================================= */
754 static struct redisServer server
; /* server global state */
755 static struct redisCommand
*commandTable
;
756 static struct redisCommand readonlyCommandTable
[] = {
757 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
758 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
759 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
760 {"setex",setexCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
761 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
762 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
763 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
764 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
765 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
766 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
767 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
768 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
769 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
770 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
771 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
772 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
773 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
774 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
775 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
776 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
777 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
778 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
779 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
780 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
781 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
782 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
783 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
784 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
785 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
786 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
787 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
788 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
789 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
790 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
791 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
792 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
793 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
794 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
795 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
796 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
797 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
798 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
799 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
800 {"zunionstore",zunionstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
801 {"zinterstore",zinterstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
802 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
803 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
804 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
805 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
806 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
807 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
808 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
809 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
810 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
811 {"hsetnx",hsetnxCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
812 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
813 {"hmset",hmsetCommand
,-4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
814 {"hmget",hmgetCommand
,-3,REDIS_CMD_BULK
,NULL
,1,1,1},
815 {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
816 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
817 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
818 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
819 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
820 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
821 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
822 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
823 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
824 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
825 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
826 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
827 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
828 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
829 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
830 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
831 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
832 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
833 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
834 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
835 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
836 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
837 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
838 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
839 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
840 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
841 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
842 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
843 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
844 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
845 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
846 {"exec",execCommand
,1,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,execBlockClientOnSwappedKeys
,0,0,0},
847 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
848 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
849 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
850 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
851 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
852 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
853 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
854 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
855 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
856 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
857 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
858 {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
859 {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
860 {"psubscribe",psubscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
861 {"punsubscribe",punsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
862 {"publish",publishCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_FORCE_REPLICATION
,NULL
,0,0,0},
863 {"watch",watchCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
864 {"unwatch",unwatchCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}
867 /*============================ Utility functions ============================ */
869 /* Glob-style pattern matching. */
870 static int stringmatchlen(const char *pattern
, int patternLen
,
871 const char *string
, int stringLen
, int nocase
)
876 while (pattern
[1] == '*') {
881 return 1; /* match */
883 if (stringmatchlen(pattern
+1, patternLen
-1,
884 string
, stringLen
, nocase
))
885 return 1; /* match */
889 return 0; /* no match */
893 return 0; /* no match */
903 not = pattern
[0] == '^';
910 if (pattern
[0] == '\\') {
913 if (pattern
[0] == string
[0])
915 } else if (pattern
[0] == ']') {
917 } else if (patternLen
== 0) {
921 } else if (pattern
[1] == '-' && patternLen
>= 3) {
922 int start
= pattern
[0];
923 int end
= pattern
[2];
931 start
= tolower(start
);
937 if (c
>= start
&& c
<= end
)
941 if (pattern
[0] == string
[0])
944 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
954 return 0; /* no match */
960 if (patternLen
>= 2) {
967 if (pattern
[0] != string
[0])
968 return 0; /* no match */
970 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
971 return 0; /* no match */
979 if (stringLen
== 0) {
980 while(*pattern
== '*') {
987 if (patternLen
== 0 && stringLen
== 0)
992 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
993 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
996 /* Convert a string representing an amount of memory into the number of
997 * bytes, so for instance memtoll("1Gi") will return 1073741824 that is
1000 * On parsing error, if *err is not NULL, it's set to 1, otherwise it's
1002 static long long memtoll(const char *p
, int *err
) {
1005 long mul
; /* unit multiplier */
1007 unsigned int digits
;
1010 /* Search the first non digit character. */
1013 while(*u
&& isdigit(*u
)) u
++;
1014 if (*u
== '\0' || !strcasecmp(u
,"b")) {
1016 } else if (!strcasecmp(u
,"k")) {
1018 } else if (!strcasecmp(u
,"kb")) {
1020 } else if (!strcasecmp(u
,"m")) {
1022 } else if (!strcasecmp(u
,"mb")) {
1024 } else if (!strcasecmp(u
,"g")) {
1025 mul
= 1000L*1000*1000;
1026 } else if (!strcasecmp(u
,"gb")) {
1027 mul
= 1024L*1024*1024;
1033 if (digits
>= sizeof(buf
)) {
1037 memcpy(buf
,p
,digits
);
1039 val
= strtoll(buf
,NULL
,10);
1043 /* Convert a long long into a string. Returns the number of
1044 * characters needed to represent the number, that can be shorter if passed
1045 * buffer length is not enough to store the whole number. */
1046 static int ll2string(char *s
, size_t len
, long long value
) {
1048 unsigned long long v
;
1051 if (len
== 0) return 0;
1052 v
= (value
< 0) ? -value
: value
;
1053 p
= buf
+31; /* point to the last character */
1058 if (value
< 0) *p
-- = '-';
1061 if (l
+1 > len
) l
= len
-1; /* Make sure it fits, including the nul term */
1067 static void redisLog(int level
, const char *fmt
, ...) {
1071 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
1075 if (level
>= server
.verbosity
) {
1081 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
1082 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
1083 vfprintf(fp
, fmt
, ap
);
1089 if (server
.logfile
) fclose(fp
);
1092 /*====================== Hash table type implementation ==================== */
1094 /* This is an hash table type that uses the SDS dynamic strings libary as
1095 * keys and radis objects as values (objects can hold SDS strings,
1098 static void dictVanillaFree(void *privdata
, void *val
)
1100 DICT_NOTUSED(privdata
);
1104 static void dictListDestructor(void *privdata
, void *val
)
1106 DICT_NOTUSED(privdata
);
1107 listRelease((list
*)val
);
1110 static int sdsDictKeyCompare(void *privdata
, const void *key1
,
1114 DICT_NOTUSED(privdata
);
1116 l1
= sdslen((sds
)key1
);
1117 l2
= sdslen((sds
)key2
);
1118 if (l1
!= l2
) return 0;
1119 return memcmp(key1
, key2
, l1
) == 0;
1122 static void dictRedisObjectDestructor(void *privdata
, void *val
)
1124 DICT_NOTUSED(privdata
);
1126 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
1130 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1133 const robj
*o1
= key1
, *o2
= key2
;
1134 return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1137 static unsigned int dictObjHash(const void *key
) {
1138 const robj
*o
= key
;
1139 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1142 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1145 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1148 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1149 o2
->encoding
== REDIS_ENCODING_INT
)
1150 return o1
->ptr
== o2
->ptr
;
1152 o1
= getDecodedObject(o1
);
1153 o2
= getDecodedObject(o2
);
1154 cmp
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1160 static unsigned int dictEncObjHash(const void *key
) {
1161 robj
*o
= (robj
*) key
;
1163 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1164 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1166 if (o
->encoding
== REDIS_ENCODING_INT
) {
1170 len
= ll2string(buf
,32,(long)o
->ptr
);
1171 return dictGenHashFunction((unsigned char*)buf
, len
);
1175 o
= getDecodedObject(o
);
1176 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1183 /* Sets type and expires */
1184 static dictType setDictType
= {
1185 dictEncObjHash
, /* hash function */
1188 dictEncObjKeyCompare
, /* key compare */
1189 dictRedisObjectDestructor
, /* key destructor */
1190 NULL
/* val destructor */
1193 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1194 static dictType zsetDictType
= {
1195 dictEncObjHash
, /* hash function */
1198 dictEncObjKeyCompare
, /* key compare */
1199 dictRedisObjectDestructor
, /* key destructor */
1200 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1204 static dictType dbDictType
= {
1205 dictObjHash
, /* hash function */
1208 dictObjKeyCompare
, /* key compare */
1209 dictRedisObjectDestructor
, /* key destructor */
1210 dictRedisObjectDestructor
/* val destructor */
1214 static dictType keyptrDictType
= {
1215 dictObjHash
, /* hash function */
1218 dictObjKeyCompare
, /* key compare */
1219 dictRedisObjectDestructor
, /* key destructor */
1220 NULL
/* val destructor */
1223 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1224 static dictType hashDictType
= {
1225 dictEncObjHash
, /* hash function */
1228 dictEncObjKeyCompare
, /* key compare */
1229 dictRedisObjectDestructor
, /* key destructor */
1230 dictRedisObjectDestructor
/* val destructor */
1233 /* Keylist hash table type has unencoded redis objects as keys and
1234 * lists as values. It's used for blocking operations (BLPOP) and to
1235 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1236 static dictType keylistDictType
= {
1237 dictObjHash
, /* hash function */
1240 dictObjKeyCompare
, /* key compare */
1241 dictRedisObjectDestructor
, /* key destructor */
1242 dictListDestructor
/* val destructor */
1245 static void version();
1247 /* ========================= Random utility functions ======================= */
1249 /* Redis generally does not try to recover from out of memory conditions
1250 * when allocating objects or strings, it is not clear if it will be possible
1251 * to report this condition to the client since the networking layer itself
1252 * is based on heap allocation for send buffers, so we simply abort.
1253 * At least the code will be simpler to read... */
1254 static void oom(const char *msg
) {
1255 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1260 /* ====================== Redis server networking stuff ===================== */
1261 static void closeTimedoutClients(void) {
1264 time_t now
= time(NULL
);
1267 listRewind(server
.clients
,&li
);
1268 while ((ln
= listNext(&li
)) != NULL
) {
1269 c
= listNodeValue(ln
);
1270 if (server
.maxidletime
&&
1271 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1272 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1273 dictSize(c
->pubsub_channels
) == 0 && /* no timeout for pubsub */
1274 listLength(c
->pubsub_patterns
) == 0 &&
1275 (now
- c
->lastinteraction
> server
.maxidletime
))
1277 redisLog(REDIS_VERBOSE
,"Closing idle client");
1279 } else if (c
->flags
& REDIS_BLOCKED
) {
1280 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1281 addReply(c
,shared
.nullmultibulk
);
1282 unblockClientWaitingData(c
);
1288 static int htNeedsResize(dict
*dict
) {
1289 long long size
, used
;
1291 size
= dictSlots(dict
);
1292 used
= dictSize(dict
);
1293 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1294 (used
*100/size
< REDIS_HT_MINFILL
));
1297 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1298 * we resize the hash table to save memory */
1299 static void tryResizeHashTables(void) {
1302 for (j
= 0; j
< server
.dbnum
; j
++) {
1303 if (htNeedsResize(server
.db
[j
].dict
))
1304 dictResize(server
.db
[j
].dict
);
1305 if (htNeedsResize(server
.db
[j
].expires
))
1306 dictResize(server
.db
[j
].expires
);
1310 /* Our hash table implementation performs rehashing incrementally while
1311 * we write/read from the hash table. Still if the server is idle, the hash
1312 * table will use two tables for a long time. So we try to use 1 millisecond
1313 * of CPU time at every serverCron() loop in order to rehash some key. */
1314 static void incrementallyRehash(void) {
1317 for (j
= 0; j
< server
.dbnum
; j
++) {
1318 if (dictIsRehashing(server
.db
[j
].dict
)) {
1319 dictRehashMilliseconds(server
.db
[j
].dict
,1);
1320 break; /* already used our millisecond for this loop... */
1325 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1326 void backgroundSaveDoneHandler(int statloc
) {
1327 int exitcode
= WEXITSTATUS(statloc
);
1328 int bysignal
= WIFSIGNALED(statloc
);
1330 if (!bysignal
&& exitcode
== 0) {
1331 redisLog(REDIS_NOTICE
,
1332 "Background saving terminated with success");
1334 server
.lastsave
= time(NULL
);
1335 } else if (!bysignal
&& exitcode
!= 0) {
1336 redisLog(REDIS_WARNING
, "Background saving error");
1338 redisLog(REDIS_WARNING
,
1339 "Background saving terminated by signal %d", WTERMSIG(statloc
));
1340 rdbRemoveTempFile(server
.bgsavechildpid
);
1342 server
.bgsavechildpid
= -1;
1343 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1344 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1345 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1348 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1350 void backgroundRewriteDoneHandler(int statloc
) {
1351 int exitcode
= WEXITSTATUS(statloc
);
1352 int bysignal
= WIFSIGNALED(statloc
);
1354 if (!bysignal
&& exitcode
== 0) {
1358 redisLog(REDIS_NOTICE
,
1359 "Background append only file rewriting terminated with success");
1360 /* Now it's time to flush the differences accumulated by the parent */
1361 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1362 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1364 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1367 /* Flush our data... */
1368 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1369 (signed) sdslen(server
.bgrewritebuf
)) {
1370 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1374 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1375 /* Now our work is to rename the temp file into the stable file. And
1376 * switch the file descriptor used by the server for append only. */
1377 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1378 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1382 /* Mission completed... almost */
1383 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1384 if (server
.appendfd
!= -1) {
1385 /* If append only is actually enabled... */
1386 close(server
.appendfd
);
1387 server
.appendfd
= fd
;
1389 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1390 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1392 /* If append only is disabled we just generate a dump in this
1393 * format. Why not? */
1396 } else if (!bysignal
&& exitcode
!= 0) {
1397 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1399 redisLog(REDIS_WARNING
,
1400 "Background append only file rewriting terminated by signal %d",
1404 sdsfree(server
.bgrewritebuf
);
1405 server
.bgrewritebuf
= sdsempty();
1406 aofRemoveTempFile(server
.bgrewritechildpid
);
1407 server
.bgrewritechildpid
= -1;
1410 /* This function is called once a background process of some kind terminates,
1411 * as we want to avoid resizing the hash tables when there is a child in order
1412 * to play well with copy-on-write (otherwise when a resize happens lots of
1413 * memory pages are copied). The goal of this function is to update the ability
1414 * for dict.c to resize the hash tables accordingly to the fact we have o not
1415 * running childs. */
1416 static void updateDictResizePolicy(void) {
1417 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1)
1420 dictDisableResize();
1423 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1424 int j
, loops
= server
.cronloops
++;
1425 REDIS_NOTUSED(eventLoop
);
1427 REDIS_NOTUSED(clientData
);
1429 /* We take a cached value of the unix time in the global state because
1430 * with virtual memory and aging there is to store the current time
1431 * in objects at every object access, and accuracy is not needed.
1432 * To access a global var is faster than calling time(NULL) */
1433 server
.unixtime
= time(NULL
);
1435 /* We received a SIGTERM, shutting down here in a safe way, as it is
1436 * not ok doing so inside the signal handler. */
1437 if (server
.shutdown_asap
) {
1438 if (prepareForShutdown() == REDIS_OK
) exit(0);
1439 redisLog(REDIS_WARNING
,"SIGTERM received but errors trying to shut down the server, check the logs for more information");
1442 /* Show some info about non-empty databases */
1443 for (j
= 0; j
< server
.dbnum
; j
++) {
1444 long long size
, used
, vkeys
;
1446 size
= dictSlots(server
.db
[j
].dict
);
1447 used
= dictSize(server
.db
[j
].dict
);
1448 vkeys
= dictSize(server
.db
[j
].expires
);
1449 if (!(loops
% 50) && (used
|| vkeys
)) {
1450 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1451 /* dictPrintStats(server.dict); */
1455 /* We don't want to resize the hash tables while a bacground saving
1456 * is in progress: the saving child is created using fork() that is
1457 * implemented with a copy-on-write semantic in most modern systems, so
1458 * if we resize the HT while there is the saving child at work actually
1459 * a lot of memory movements in the parent will cause a lot of pages
1461 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1) {
1462 if (!(loops
% 10)) tryResizeHashTables();
1463 if (server
.activerehashing
) incrementallyRehash();
1466 /* Show information about connected clients */
1467 if (!(loops
% 50)) {
1468 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use",
1469 listLength(server
.clients
)-listLength(server
.slaves
),
1470 listLength(server
.slaves
),
1471 zmalloc_used_memory());
1474 /* Close connections of timedout clients */
1475 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1476 closeTimedoutClients();
1478 /* Check if a background saving or AOF rewrite in progress terminated */
1479 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1483 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1484 if (pid
== server
.bgsavechildpid
) {
1485 backgroundSaveDoneHandler(statloc
);
1487 backgroundRewriteDoneHandler(statloc
);
1489 updateDictResizePolicy();
1492 /* If there is not a background saving in progress check if
1493 * we have to save now */
1494 time_t now
= time(NULL
);
1495 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1496 struct saveparam
*sp
= server
.saveparams
+j
;
1498 if (server
.dirty
>= sp
->changes
&&
1499 now
-server
.lastsave
> sp
->seconds
) {
1500 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1501 sp
->changes
, sp
->seconds
);
1502 rdbSaveBackground(server
.dbfilename
);
1508 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1509 * will use few CPU cycles if there are few expiring keys, otherwise
1510 * it will get more aggressive to avoid that too much memory is used by
1511 * keys that can be removed from the keyspace. */
1512 for (j
= 0; j
< server
.dbnum
; j
++) {
1514 redisDb
*db
= server
.db
+j
;
1516 /* Continue to expire if at the end of the cycle more than 25%
1517 * of the keys were expired. */
1519 long num
= dictSize(db
->expires
);
1520 time_t now
= time(NULL
);
1523 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1524 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1529 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1530 t
= (time_t) dictGetEntryVal(de
);
1532 deleteKey(db
,dictGetEntryKey(de
));
1534 server
.stat_expiredkeys
++;
1537 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1540 /* Swap a few keys on disk if we are over the memory limit and VM
1541 * is enbled. Try to free objects from the free list first. */
1542 if (vmCanSwapOut()) {
1543 while (server
.vm_enabled
&& zmalloc_used_memory() >
1544 server
.vm_max_memory
)
1548 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1549 retval
= (server
.vm_max_threads
== 0) ?
1550 vmSwapOneObjectBlocking() :
1551 vmSwapOneObjectThreaded();
1552 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1553 zmalloc_used_memory() >
1554 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1556 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1558 /* Note that when using threade I/O we free just one object,
1559 * because anyway when the I/O thread in charge to swap this
1560 * object out will finish, the handler of completed jobs
1561 * will try to swap more objects if we are still out of memory. */
1562 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1566 /* Check if we should connect to a MASTER */
1567 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1568 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1569 if (syncWithMaster() == REDIS_OK
) {
1570 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1571 if (server
.appendonly
) rewriteAppendOnlyFileBackground();
1577 /* This function gets called every time Redis is entering the
1578 * main loop of the event driven library, that is, before to sleep
1579 * for ready file descriptors. */
1580 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1581 REDIS_NOTUSED(eventLoop
);
1583 /* Awake clients that got all the swapped keys they requested */
1584 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1588 listRewind(server
.io_ready_clients
,&li
);
1589 while((ln
= listNext(&li
))) {
1590 redisClient
*c
= ln
->value
;
1591 struct redisCommand
*cmd
;
1593 /* Resume the client. */
1594 listDelNode(server
.io_ready_clients
,ln
);
1595 c
->flags
&= (~REDIS_IO_WAIT
);
1596 server
.vm_blocked_clients
--;
1597 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1598 readQueryFromClient
, c
);
1599 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1600 assert(cmd
!= NULL
);
1603 /* There may be more data to process in the input buffer. */
1604 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1605 processInputBuffer(c
);
1608 /* Write the AOF buffer on disk */
1609 flushAppendOnlyFile();
1612 static void createSharedObjects(void) {
1615 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1616 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1617 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1618 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1619 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1620 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1621 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1622 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1623 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1624 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1625 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1626 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1627 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1628 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1629 "-ERR no such key\r\n"));
1630 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1631 "-ERR syntax error\r\n"));
1632 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1633 "-ERR source and destination objects are the same\r\n"));
1634 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1635 "-ERR index out of range\r\n"));
1636 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1637 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1638 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1639 shared
.select0
= createStringObject("select 0\r\n",10);
1640 shared
.select1
= createStringObject("select 1\r\n",10);
1641 shared
.select2
= createStringObject("select 2\r\n",10);
1642 shared
.select3
= createStringObject("select 3\r\n",10);
1643 shared
.select4
= createStringObject("select 4\r\n",10);
1644 shared
.select5
= createStringObject("select 5\r\n",10);
1645 shared
.select6
= createStringObject("select 6\r\n",10);
1646 shared
.select7
= createStringObject("select 7\r\n",10);
1647 shared
.select8
= createStringObject("select 8\r\n",10);
1648 shared
.select9
= createStringObject("select 9\r\n",10);
1649 shared
.messagebulk
= createStringObject("$7\r\nmessage\r\n",13);
1650 shared
.pmessagebulk
= createStringObject("$8\r\npmessage\r\n",14);
1651 shared
.subscribebulk
= createStringObject("$9\r\nsubscribe\r\n",15);
1652 shared
.unsubscribebulk
= createStringObject("$11\r\nunsubscribe\r\n",18);
1653 shared
.psubscribebulk
= createStringObject("$10\r\npsubscribe\r\n",17);
1654 shared
.punsubscribebulk
= createStringObject("$12\r\npunsubscribe\r\n",19);
1655 shared
.mbulk3
= createStringObject("*3\r\n",4);
1656 shared
.mbulk4
= createStringObject("*4\r\n",4);
1657 for (j
= 0; j
< REDIS_SHARED_INTEGERS
; j
++) {
1658 shared
.integers
[j
] = createObject(REDIS_STRING
,(void*)(long)j
);
1659 shared
.integers
[j
]->encoding
= REDIS_ENCODING_INT
;
1663 static void appendServerSaveParams(time_t seconds
, int changes
) {
1664 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1665 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1666 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1667 server
.saveparamslen
++;
1670 static void resetServerSaveParams() {
1671 zfree(server
.saveparams
);
1672 server
.saveparams
= NULL
;
1673 server
.saveparamslen
= 0;
1676 static void initServerConfig() {
1677 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1678 server
.port
= REDIS_SERVERPORT
;
1679 server
.verbosity
= REDIS_VERBOSE
;
1680 server
.maxidletime
= REDIS_MAXIDLETIME
;
1681 server
.saveparams
= NULL
;
1682 server
.logfile
= NULL
; /* NULL = log on standard output */
1683 server
.bindaddr
= NULL
;
1684 server
.glueoutputbuf
= 1;
1685 server
.daemonize
= 0;
1686 server
.appendonly
= 0;
1687 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1688 server
.lastfsync
= time(NULL
);
1689 server
.appendfd
= -1;
1690 server
.appendseldb
= -1; /* Make sure the first time will not match */
1691 server
.pidfile
= zstrdup("/var/run/redis.pid");
1692 server
.dbfilename
= zstrdup("dump.rdb");
1693 server
.appendfilename
= zstrdup("appendonly.aof");
1694 server
.requirepass
= NULL
;
1695 server
.rdbcompression
= 1;
1696 server
.activerehashing
= 1;
1697 server
.maxclients
= 0;
1698 server
.blpop_blocked_clients
= 0;
1699 server
.maxmemory
= 0;
1700 server
.vm_enabled
= 0;
1701 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1702 server
.vm_page_size
= 256; /* 256 bytes per page */
1703 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1704 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1705 server
.vm_max_threads
= 4;
1706 server
.vm_blocked_clients
= 0;
1707 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1708 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1709 server
.shutdown_asap
= 0;
1711 resetServerSaveParams();
1713 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1714 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1715 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1716 /* Replication related */
1718 server
.masterauth
= NULL
;
1719 server
.masterhost
= NULL
;
1720 server
.masterport
= 6379;
1721 server
.master
= NULL
;
1722 server
.replstate
= REDIS_REPL_NONE
;
1724 /* Double constants initialization */
1726 R_PosInf
= 1.0/R_Zero
;
1727 R_NegInf
= -1.0/R_Zero
;
1728 R_Nan
= R_Zero
/R_Zero
;
1731 static void initServer() {
1734 signal(SIGHUP
, SIG_IGN
);
1735 signal(SIGPIPE
, SIG_IGN
);
1736 setupSigSegvAction();
1738 server
.devnull
= fopen("/dev/null","w");
1739 if (server
.devnull
== NULL
) {
1740 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1743 server
.clients
= listCreate();
1744 server
.slaves
= listCreate();
1745 server
.monitors
= listCreate();
1746 server
.objfreelist
= listCreate();
1747 createSharedObjects();
1748 server
.el
= aeCreateEventLoop();
1749 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1750 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1751 if (server
.fd
== -1) {
1752 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1755 for (j
= 0; j
< server
.dbnum
; j
++) {
1756 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1757 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1758 server
.db
[j
].blocking_keys
= dictCreate(&keylistDictType
,NULL
);
1759 server
.db
[j
].watched_keys
= dictCreate(&keylistDictType
,NULL
);
1760 if (server
.vm_enabled
)
1761 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1762 server
.db
[j
].id
= j
;
1764 server
.pubsub_channels
= dictCreate(&keylistDictType
,NULL
);
1765 server
.pubsub_patterns
= listCreate();
1766 listSetFreeMethod(server
.pubsub_patterns
,freePubsubPattern
);
1767 listSetMatchMethod(server
.pubsub_patterns
,listMatchPubsubPattern
);
1768 server
.cronloops
= 0;
1769 server
.bgsavechildpid
= -1;
1770 server
.bgrewritechildpid
= -1;
1771 server
.bgrewritebuf
= sdsempty();
1772 server
.aofbuf
= sdsempty();
1773 server
.lastsave
= time(NULL
);
1775 server
.stat_numcommands
= 0;
1776 server
.stat_numconnections
= 0;
1777 server
.stat_expiredkeys
= 0;
1778 server
.stat_starttime
= time(NULL
);
1779 server
.unixtime
= time(NULL
);
1780 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1781 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1782 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1784 if (server
.appendonly
) {
1785 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1786 if (server
.appendfd
== -1) {
1787 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1793 if (server
.vm_enabled
) vmInit();
1796 /* Empty the whole database */
1797 static long long emptyDb() {
1799 long long removed
= 0;
1801 for (j
= 0; j
< server
.dbnum
; j
++) {
1802 removed
+= dictSize(server
.db
[j
].dict
);
1803 dictEmpty(server
.db
[j
].dict
);
1804 dictEmpty(server
.db
[j
].expires
);
1809 static int yesnotoi(char *s
) {
1810 if (!strcasecmp(s
,"yes")) return 1;
1811 else if (!strcasecmp(s
,"no")) return 0;
1815 /* I agree, this is a very rudimental way to load a configuration...
1816 will improve later if the config gets more complex */
1817 static void loadServerConfig(char *filename
) {
1819 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1823 if (filename
[0] == '-' && filename
[1] == '\0')
1826 if ((fp
= fopen(filename
,"r")) == NULL
) {
1827 redisLog(REDIS_WARNING
, "Fatal error, can't open config file '%s'", filename
);
1832 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1838 line
= sdstrim(line
," \t\r\n");
1840 /* Skip comments and blank lines*/
1841 if (line
[0] == '#' || line
[0] == '\0') {
1846 /* Split into arguments */
1847 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1848 sdstolower(argv
[0]);
1850 /* Execute config directives */
1851 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1852 server
.maxidletime
= atoi(argv
[1]);
1853 if (server
.maxidletime
< 0) {
1854 err
= "Invalid timeout value"; goto loaderr
;
1856 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1857 server
.port
= atoi(argv
[1]);
1858 if (server
.port
< 1 || server
.port
> 65535) {
1859 err
= "Invalid port"; goto loaderr
;
1861 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1862 server
.bindaddr
= zstrdup(argv
[1]);
1863 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1864 int seconds
= atoi(argv
[1]);
1865 int changes
= atoi(argv
[2]);
1866 if (seconds
< 1 || changes
< 0) {
1867 err
= "Invalid save parameters"; goto loaderr
;
1869 appendServerSaveParams(seconds
,changes
);
1870 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1871 if (chdir(argv
[1]) == -1) {
1872 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1873 argv
[1], strerror(errno
));
1876 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1877 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1878 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1879 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1880 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1882 err
= "Invalid log level. Must be one of debug, notice, warning";
1885 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1888 server
.logfile
= zstrdup(argv
[1]);
1889 if (!strcasecmp(server
.logfile
,"stdout")) {
1890 zfree(server
.logfile
);
1891 server
.logfile
= NULL
;
1893 if (server
.logfile
) {
1894 /* Test if we are able to open the file. The server will not
1895 * be able to abort just for this problem later... */
1896 logfp
= fopen(server
.logfile
,"a");
1897 if (logfp
== NULL
) {
1898 err
= sdscatprintf(sdsempty(),
1899 "Can't open the log file: %s", strerror(errno
));
1904 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1905 server
.dbnum
= atoi(argv
[1]);
1906 if (server
.dbnum
< 1) {
1907 err
= "Invalid number of databases"; goto loaderr
;
1909 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1910 loadServerConfig(argv
[1]);
1911 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1912 server
.maxclients
= atoi(argv
[1]);
1913 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1914 server
.maxmemory
= memtoll(argv
[1],NULL
);
1915 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1916 server
.masterhost
= sdsnew(argv
[1]);
1917 server
.masterport
= atoi(argv
[2]);
1918 server
.replstate
= REDIS_REPL_CONNECT
;
1919 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1920 server
.masterauth
= zstrdup(argv
[1]);
1921 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1922 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1923 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1925 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1926 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1927 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1929 } else if (!strcasecmp(argv
[0],"activerehashing") && argc
== 2) {
1930 if ((server
.activerehashing
= yesnotoi(argv
[1])) == -1) {
1931 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1933 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1934 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1935 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1937 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1938 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1939 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1941 } else if (!strcasecmp(argv
[0],"appendfilename") && argc
== 2) {
1942 zfree(server
.appendfilename
);
1943 server
.appendfilename
= zstrdup(argv
[1]);
1944 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
1945 if (!strcasecmp(argv
[1],"no")) {
1946 server
.appendfsync
= APPENDFSYNC_NO
;
1947 } else if (!strcasecmp(argv
[1],"always")) {
1948 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1949 } else if (!strcasecmp(argv
[1],"everysec")) {
1950 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1952 err
= "argument must be 'no', 'always' or 'everysec'";
1955 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
1956 server
.requirepass
= zstrdup(argv
[1]);
1957 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
1958 zfree(server
.pidfile
);
1959 server
.pidfile
= zstrdup(argv
[1]);
1960 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
1961 zfree(server
.dbfilename
);
1962 server
.dbfilename
= zstrdup(argv
[1]);
1963 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
1964 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
1965 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1967 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
1968 zfree(server
.vm_swap_file
);
1969 server
.vm_swap_file
= zstrdup(argv
[1]);
1970 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
1971 server
.vm_max_memory
= memtoll(argv
[1],NULL
);
1972 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
1973 server
.vm_page_size
= memtoll(argv
[1], NULL
);
1974 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
1975 server
.vm_pages
= memtoll(argv
[1], NULL
);
1976 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1977 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1978 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
1979 server
.hash_max_zipmap_entries
= memtoll(argv
[1], NULL
);
1980 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
1981 server
.hash_max_zipmap_value
= memtoll(argv
[1], NULL
);
1983 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
1985 for (j
= 0; j
< argc
; j
++)
1990 if (fp
!= stdin
) fclose(fp
);
1994 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
1995 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
1996 fprintf(stderr
, ">>> '%s'\n", line
);
1997 fprintf(stderr
, "%s\n", err
);
2001 static void freeClientArgv(redisClient
*c
) {
2004 for (j
= 0; j
< c
->argc
; j
++)
2005 decrRefCount(c
->argv
[j
]);
2006 for (j
= 0; j
< c
->mbargc
; j
++)
2007 decrRefCount(c
->mbargv
[j
]);
2012 static void freeClient(redisClient
*c
) {
2015 /* Note that if the client we are freeing is blocked into a blocking
2016 * call, we have to set querybuf to NULL *before* to call
2017 * unblockClientWaitingData() to avoid processInputBuffer() will get
2018 * called. Also it is important to remove the file events after
2019 * this, because this call adds the READABLE event. */
2020 sdsfree(c
->querybuf
);
2022 if (c
->flags
& REDIS_BLOCKED
)
2023 unblockClientWaitingData(c
);
2025 /* UNWATCH all the keys */
2027 listRelease(c
->watched_keys
);
2028 /* Unsubscribe from all the pubsub channels */
2029 pubsubUnsubscribeAllChannels(c
,0);
2030 pubsubUnsubscribeAllPatterns(c
,0);
2031 dictRelease(c
->pubsub_channels
);
2032 listRelease(c
->pubsub_patterns
);
2033 /* Obvious cleanup */
2034 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
2035 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2036 listRelease(c
->reply
);
2039 /* Remove from the list of clients */
2040 ln
= listSearchKey(server
.clients
,c
);
2041 redisAssert(ln
!= NULL
);
2042 listDelNode(server
.clients
,ln
);
2043 /* Remove from the list of clients that are now ready to be restarted
2044 * after waiting for swapped keys */
2045 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
2046 ln
= listSearchKey(server
.io_ready_clients
,c
);
2048 listDelNode(server
.io_ready_clients
,ln
);
2049 server
.vm_blocked_clients
--;
2052 /* Remove from the list of clients waiting for swapped keys */
2053 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
2054 ln
= listFirst(c
->io_keys
);
2055 dontWaitForSwappedKey(c
,ln
->value
);
2057 listRelease(c
->io_keys
);
2058 /* Master/slave cleanup */
2059 if (c
->flags
& REDIS_SLAVE
) {
2060 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
2062 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
2063 ln
= listSearchKey(l
,c
);
2064 redisAssert(ln
!= NULL
);
2067 if (c
->flags
& REDIS_MASTER
) {
2068 server
.master
= NULL
;
2069 server
.replstate
= REDIS_REPL_CONNECT
;
2071 /* Release memory */
2074 freeClientMultiState(c
);
2078 #define GLUEREPLY_UP_TO (1024)
2079 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
2081 char buf
[GLUEREPLY_UP_TO
];
2086 listRewind(c
->reply
,&li
);
2087 while((ln
= listNext(&li
))) {
2091 objlen
= sdslen(o
->ptr
);
2092 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
2093 memcpy(buf
+copylen
,o
->ptr
,objlen
);
2095 listDelNode(c
->reply
,ln
);
2097 if (copylen
== 0) return;
2101 /* Now the output buffer is empty, add the new single element */
2102 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
2103 listAddNodeHead(c
->reply
,o
);
2106 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2107 redisClient
*c
= privdata
;
2108 int nwritten
= 0, totwritten
= 0, objlen
;
2111 REDIS_NOTUSED(mask
);
2113 /* Use writev() if we have enough buffers to send */
2114 if (!server
.glueoutputbuf
&&
2115 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
2116 !(c
->flags
& REDIS_MASTER
))
2118 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
2122 while(listLength(c
->reply
)) {
2123 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
2124 glueReplyBuffersIfNeeded(c
);
2126 o
= listNodeValue(listFirst(c
->reply
));
2127 objlen
= sdslen(o
->ptr
);
2130 listDelNode(c
->reply
,listFirst(c
->reply
));
2134 if (c
->flags
& REDIS_MASTER
) {
2135 /* Don't reply to a master */
2136 nwritten
= objlen
- c
->sentlen
;
2138 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
2139 if (nwritten
<= 0) break;
2141 c
->sentlen
+= nwritten
;
2142 totwritten
+= nwritten
;
2143 /* If we fully sent the object on head go to the next one */
2144 if (c
->sentlen
== objlen
) {
2145 listDelNode(c
->reply
,listFirst(c
->reply
));
2148 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
2149 * bytes, in a single threaded server it's a good idea to serve
2150 * other clients as well, even if a very large request comes from
2151 * super fast link that is always able to accept data (in real world
2152 * scenario think about 'KEYS *' against the loopback interfae) */
2153 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
2155 if (nwritten
== -1) {
2156 if (errno
== EAGAIN
) {
2159 redisLog(REDIS_VERBOSE
,
2160 "Error writing to client: %s", strerror(errno
));
2165 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
2166 if (listLength(c
->reply
) == 0) {
2168 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2172 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
2174 redisClient
*c
= privdata
;
2175 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
2177 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
2178 int offset
, ion
= 0;
2180 REDIS_NOTUSED(mask
);
2183 while (listLength(c
->reply
)) {
2184 offset
= c
->sentlen
;
2188 /* fill-in the iov[] array */
2189 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
2190 o
= listNodeValue(node
);
2191 objlen
= sdslen(o
->ptr
);
2193 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
2196 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2197 break; /* no more iovecs */
2199 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2200 iov
[ion
].iov_len
= objlen
- offset
;
2201 willwrite
+= objlen
- offset
;
2202 offset
= 0; /* just for the first item */
2209 /* write all collected blocks at once */
2210 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2211 if (errno
!= EAGAIN
) {
2212 redisLog(REDIS_VERBOSE
,
2213 "Error writing to client: %s", strerror(errno
));
2220 totwritten
+= nwritten
;
2221 offset
= c
->sentlen
;
2223 /* remove written robjs from c->reply */
2224 while (nwritten
&& listLength(c
->reply
)) {
2225 o
= listNodeValue(listFirst(c
->reply
));
2226 objlen
= sdslen(o
->ptr
);
2228 if(nwritten
>= objlen
- offset
) {
2229 listDelNode(c
->reply
, listFirst(c
->reply
));
2230 nwritten
-= objlen
- offset
;
2234 c
->sentlen
+= nwritten
;
2242 c
->lastinteraction
= time(NULL
);
2244 if (listLength(c
->reply
) == 0) {
2246 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2250 static int qsortRedisCommands(const void *r1
, const void *r2
) {
2252 ((struct redisCommand
*)r1
)->name
,
2253 ((struct redisCommand
*)r2
)->name
);
2256 static void sortCommandTable() {
2257 /* Copy and sort the read-only version of the command table */
2258 commandTable
= (struct redisCommand
*)malloc(sizeof(readonlyCommandTable
));
2259 memcpy(commandTable
,readonlyCommandTable
,sizeof(readonlyCommandTable
));
2261 sizeof(readonlyCommandTable
)/sizeof(struct redisCommand
),
2262 sizeof(struct redisCommand
),qsortRedisCommands
);
2265 static struct redisCommand
*lookupCommand(char *name
) {
2266 struct redisCommand tmp
= {name
,NULL
,0,0,NULL
,0,0,0};
2270 sizeof(readonlyCommandTable
)/sizeof(struct redisCommand
),
2271 sizeof(struct redisCommand
),
2272 qsortRedisCommands
);
2275 /* resetClient prepare the client to process the next command */
2276 static void resetClient(redisClient
*c
) {
2282 /* Call() is the core of Redis execution of a command */
2283 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2286 dirty
= server
.dirty
;
2288 dirty
= server
.dirty
-dirty
;
2290 if (server
.appendonly
&& dirty
)
2291 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2292 if ((dirty
|| cmd
->flags
& REDIS_CMD_FORCE_REPLICATION
) &&
2293 listLength(server
.slaves
))
2294 replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
);
2295 if (listLength(server
.monitors
))
2296 replicationFeedMonitors(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
);
2297 server
.stat_numcommands
++;
2300 /* If this function gets called we already read a whole
2301 * command, argments are in the client argv/argc fields.
2302 * processCommand() execute the command or prepare the
2303 * server for a bulk read from the client.
2305 * If 1 is returned the client is still alive and valid and
2306 * and other operations can be performed by the caller. Otherwise
2307 * if 0 is returned the client was destroied (i.e. after QUIT). */
2308 static int processCommand(redisClient
*c
) {
2309 struct redisCommand
*cmd
;
2311 /* Free some memory if needed (maxmemory setting) */
2312 if (server
.maxmemory
) freeMemoryIfNeeded();
2314 /* Handle the multi bulk command type. This is an alternative protocol
2315 * supported by Redis in order to receive commands that are composed of
2316 * multiple binary-safe "bulk" arguments. The latency of processing is
2317 * a bit higher but this allows things like multi-sets, so if this
2318 * protocol is used only for MSET and similar commands this is a big win. */
2319 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2320 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2321 if (c
->multibulk
<= 0) {
2325 decrRefCount(c
->argv
[c
->argc
-1]);
2329 } else if (c
->multibulk
) {
2330 if (c
->bulklen
== -1) {
2331 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2332 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2336 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2337 decrRefCount(c
->argv
[0]);
2338 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2340 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2345 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2349 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2350 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2354 if (c
->multibulk
== 0) {
2358 /* Here we need to swap the multi-bulk argc/argv with the
2359 * normal argc/argv of the client structure. */
2361 c
->argv
= c
->mbargv
;
2362 c
->mbargv
= auxargv
;
2365 c
->argc
= c
->mbargc
;
2366 c
->mbargc
= auxargc
;
2368 /* We need to set bulklen to something different than -1
2369 * in order for the code below to process the command without
2370 * to try to read the last argument of a bulk command as
2371 * a special argument. */
2373 /* continue below and process the command */
2380 /* -- end of multi bulk commands processing -- */
2382 /* The QUIT command is handled as a special case. Normal command
2383 * procs are unable to close the client connection safely */
2384 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2389 /* Now lookup the command and check ASAP about trivial error conditions
2390 * such wrong arity, bad command name and so forth. */
2391 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2394 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2395 (char*)c
->argv
[0]->ptr
));
2398 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2399 (c
->argc
< -cmd
->arity
)) {
2401 sdscatprintf(sdsempty(),
2402 "-ERR wrong number of arguments for '%s' command\r\n",
2406 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2407 /* This is a bulk command, we have to read the last argument yet. */
2408 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2410 decrRefCount(c
->argv
[c
->argc
-1]);
2411 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2413 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2418 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2419 /* It is possible that the bulk read is already in the
2420 * buffer. Check this condition and handle it accordingly.
2421 * This is just a fast path, alternative to call processInputBuffer().
2422 * It's a good idea since the code is small and this condition
2423 * happens most of the times. */
2424 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2425 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2427 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2429 /* Otherwise return... there is to read the last argument
2430 * from the socket. */
2434 /* Let's try to encode the bulk object to save space. */
2435 if (cmd
->flags
& REDIS_CMD_BULK
)
2436 c
->argv
[c
->argc
-1] = tryObjectEncoding(c
->argv
[c
->argc
-1]);
2438 /* Check if the user is authenticated */
2439 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2440 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2445 /* Handle the maxmemory directive */
2446 if (server
.maxmemory
&& (cmd
->flags
& REDIS_CMD_DENYOOM
) &&
2447 zmalloc_used_memory() > server
.maxmemory
)
2449 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2454 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
2455 if ((dictSize(c
->pubsub_channels
) > 0 || listLength(c
->pubsub_patterns
) > 0)
2457 cmd
->proc
!= subscribeCommand
&& cmd
->proc
!= unsubscribeCommand
&&
2458 cmd
->proc
!= psubscribeCommand
&& cmd
->proc
!= punsubscribeCommand
) {
2459 addReplySds(c
,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n"));
2464 /* Exec the command */
2465 if (c
->flags
& REDIS_MULTI
&&
2466 cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
&&
2467 cmd
->proc
!= multiCommand
&& cmd
->proc
!= watchCommand
)
2469 queueMultiCommand(c
,cmd
);
2470 addReply(c
,shared
.queued
);
2472 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2473 blockClientOnSwappedKeys(c
,cmd
)) return 1;
2477 /* Prepare the client for the next command */
2482 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
) {
2487 /* We need 1+(ARGS*3) objects since commands are using the new protocol
2488 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
2489 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
2490 robj
*static_outv
[REDIS_STATIC_ARGS
*3+1];
2493 if (argc
<= REDIS_STATIC_ARGS
) {
2496 outv
= zmalloc(sizeof(robj
*)*(argc
*3+1));
2499 lenobj
= createObject(REDIS_STRING
,
2500 sdscatprintf(sdsempty(), "*%d\r\n", argc
));
2501 lenobj
->refcount
= 0;
2502 outv
[outc
++] = lenobj
;
2503 for (j
= 0; j
< argc
; j
++) {
2504 lenobj
= createObject(REDIS_STRING
,
2505 sdscatprintf(sdsempty(),"$%lu\r\n",
2506 (unsigned long) stringObjectLen(argv
[j
])));
2507 lenobj
->refcount
= 0;
2508 outv
[outc
++] = lenobj
;
2509 outv
[outc
++] = argv
[j
];
2510 outv
[outc
++] = shared
.crlf
;
2513 /* Increment all the refcounts at start and decrement at end in order to
2514 * be sure to free objects if there is no slave in a replication state
2515 * able to be feed with commands */
2516 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2517 listRewind(slaves
,&li
);
2518 while((ln
= listNext(&li
))) {
2519 redisClient
*slave
= ln
->value
;
2521 /* Don't feed slaves that are still waiting for BGSAVE to start */
2522 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2524 /* Feed all the other slaves, MONITORs and so on */
2525 if (slave
->slaveseldb
!= dictid
) {
2529 case 0: selectcmd
= shared
.select0
; break;
2530 case 1: selectcmd
= shared
.select1
; break;
2531 case 2: selectcmd
= shared
.select2
; break;
2532 case 3: selectcmd
= shared
.select3
; break;
2533 case 4: selectcmd
= shared
.select4
; break;
2534 case 5: selectcmd
= shared
.select5
; break;
2535 case 6: selectcmd
= shared
.select6
; break;
2536 case 7: selectcmd
= shared
.select7
; break;
2537 case 8: selectcmd
= shared
.select8
; break;
2538 case 9: selectcmd
= shared
.select9
; break;
2540 selectcmd
= createObject(REDIS_STRING
,
2541 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2542 selectcmd
->refcount
= 0;
2545 addReply(slave
,selectcmd
);
2546 slave
->slaveseldb
= dictid
;
2548 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2550 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2551 if (outv
!= static_outv
) zfree(outv
);
2554 static sds
sdscatrepr(sds s
, char *p
, size_t len
) {
2555 s
= sdscatlen(s
,"\"",1);
2560 s
= sdscatprintf(s
,"\\%c",*p
);
2562 case '\n': s
= sdscatlen(s
,"\\n",1); break;
2563 case '\r': s
= sdscatlen(s
,"\\r",1); break;
2564 case '\t': s
= sdscatlen(s
,"\\t",1); break;
2565 case '\a': s
= sdscatlen(s
,"\\a",1); break;
2566 case '\b': s
= sdscatlen(s
,"\\b",1); break;
2569 s
= sdscatprintf(s
,"%c",*p
);
2571 s
= sdscatprintf(s
,"\\x%02x",(unsigned char)*p
);
2576 return sdscatlen(s
,"\"",1);
2579 static void replicationFeedMonitors(list
*monitors
, int dictid
, robj
**argv
, int argc
) {
2583 sds cmdrepr
= sdsnew("+");
2587 gettimeofday(&tv
,NULL
);
2588 cmdrepr
= sdscatprintf(cmdrepr
,"%ld.%ld ",(long)tv
.tv_sec
,(long)tv
.tv_usec
);
2589 if (dictid
!= 0) cmdrepr
= sdscatprintf(cmdrepr
,"(db %d) ", dictid
);
2591 for (j
= 0; j
< argc
; j
++) {
2592 if (argv
[j
]->encoding
== REDIS_ENCODING_INT
) {
2593 cmdrepr
= sdscatprintf(cmdrepr
, "%ld", (long)argv
[j
]->ptr
);
2595 cmdrepr
= sdscatrepr(cmdrepr
,(char*)argv
[j
]->ptr
,
2596 sdslen(argv
[j
]->ptr
));
2599 cmdrepr
= sdscatlen(cmdrepr
," ",1);
2601 cmdrepr
= sdscatlen(cmdrepr
,"\r\n",2);
2602 cmdobj
= createObject(REDIS_STRING
,cmdrepr
);
2604 listRewind(monitors
,&li
);
2605 while((ln
= listNext(&li
))) {
2606 redisClient
*monitor
= ln
->value
;
2607 addReply(monitor
,cmdobj
);
2609 decrRefCount(cmdobj
);
2612 static void processInputBuffer(redisClient
*c
) {
2614 /* Before to process the input buffer, make sure the client is not
2615 * waitig for a blocking operation such as BLPOP. Note that the first
2616 * iteration the client is never blocked, otherwise the processInputBuffer
2617 * would not be called at all, but after the execution of the first commands
2618 * in the input buffer the client may be blocked, and the "goto again"
2619 * will try to reiterate. The following line will make it return asap. */
2620 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2621 if (c
->bulklen
== -1) {
2622 /* Read the first line of the query */
2623 char *p
= strchr(c
->querybuf
,'\n');
2630 query
= c
->querybuf
;
2631 c
->querybuf
= sdsempty();
2632 querylen
= 1+(p
-(query
));
2633 if (sdslen(query
) > querylen
) {
2634 /* leave data after the first line of the query in the buffer */
2635 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2637 *p
= '\0'; /* remove "\n" */
2638 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2639 sdsupdatelen(query
);
2641 /* Now we can split the query in arguments */
2642 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2645 if (c
->argv
) zfree(c
->argv
);
2646 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2648 for (j
= 0; j
< argc
; j
++) {
2649 if (sdslen(argv
[j
])) {
2650 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2658 /* Execute the command. If the client is still valid
2659 * after processCommand() return and there is something
2660 * on the query buffer try to process the next command. */
2661 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2663 /* Nothing to process, argc == 0. Just process the query
2664 * buffer if it's not empty or return to the caller */
2665 if (sdslen(c
->querybuf
)) goto again
;
2668 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2669 redisLog(REDIS_VERBOSE
, "Client protocol error");
2674 /* Bulk read handling. Note that if we are at this point
2675 the client already sent a command terminated with a newline,
2676 we are reading the bulk data that is actually the last
2677 argument of the command. */
2678 int qbl
= sdslen(c
->querybuf
);
2680 if (c
->bulklen
<= qbl
) {
2681 /* Copy everything but the final CRLF as final argument */
2682 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2684 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2685 /* Process the command. If the client is still valid after
2686 * the processing and there is more data in the buffer
2687 * try to parse it. */
2688 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2694 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2695 redisClient
*c
= (redisClient
*) privdata
;
2696 char buf
[REDIS_IOBUF_LEN
];
2699 REDIS_NOTUSED(mask
);
2701 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2703 if (errno
== EAGAIN
) {
2706 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2710 } else if (nread
== 0) {
2711 redisLog(REDIS_VERBOSE
, "Client closed connection");
2716 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2717 c
->lastinteraction
= time(NULL
);
2721 processInputBuffer(c
);
2724 static int selectDb(redisClient
*c
, int id
) {
2725 if (id
< 0 || id
>= server
.dbnum
)
2727 c
->db
= &server
.db
[id
];
2731 static void *dupClientReplyValue(void *o
) {
2732 incrRefCount((robj
*)o
);
2736 static int listMatchObjects(void *a
, void *b
) {
2737 return equalStringObjects(a
,b
);
2740 static redisClient
*createClient(int fd
) {
2741 redisClient
*c
= zmalloc(sizeof(*c
));
2743 anetNonBlock(NULL
,fd
);
2744 anetTcpNoDelay(NULL
,fd
);
2745 if (!c
) return NULL
;
2748 c
->querybuf
= sdsempty();
2757 c
->lastinteraction
= time(NULL
);
2758 c
->authenticated
= 0;
2759 c
->replstate
= REDIS_REPL_NONE
;
2760 c
->reply
= listCreate();
2761 listSetFreeMethod(c
->reply
,decrRefCount
);
2762 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2763 c
->blocking_keys
= NULL
;
2764 c
->blocking_keys_num
= 0;
2765 c
->io_keys
= listCreate();
2766 c
->watched_keys
= listCreate();
2767 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2768 c
->pubsub_channels
= dictCreate(&setDictType
,NULL
);
2769 c
->pubsub_patterns
= listCreate();
2770 listSetFreeMethod(c
->pubsub_patterns
,decrRefCount
);
2771 listSetMatchMethod(c
->pubsub_patterns
,listMatchObjects
);
2772 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2773 readQueryFromClient
, c
) == AE_ERR
) {
2777 listAddNodeTail(server
.clients
,c
);
2778 initClientMultiState(c
);
2782 static void addReply(redisClient
*c
, robj
*obj
) {
2783 if (listLength(c
->reply
) == 0 &&
2784 (c
->replstate
== REDIS_REPL_NONE
||
2785 c
->replstate
== REDIS_REPL_ONLINE
) &&
2786 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2787 sendReplyToClient
, c
) == AE_ERR
) return;
2789 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2790 obj
= dupStringObject(obj
);
2791 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2793 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2796 static void addReplySds(redisClient
*c
, sds s
) {
2797 robj
*o
= createObject(REDIS_STRING
,s
);
2802 static void addReplyDouble(redisClient
*c
, double d
) {
2805 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2806 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2807 (unsigned long) strlen(buf
),buf
));
2810 static void addReplyLongLong(redisClient
*c
, long long ll
) {
2815 addReply(c
,shared
.czero
);
2817 } else if (ll
== 1) {
2818 addReply(c
,shared
.cone
);
2822 len
= ll2string(buf
+1,sizeof(buf
)-1,ll
);
2825 addReplySds(c
,sdsnewlen(buf
,len
+3));
2828 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2833 addReply(c
,shared
.czero
);
2835 } else if (ul
== 1) {
2836 addReply(c
,shared
.cone
);
2839 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2840 addReplySds(c
,sdsnewlen(buf
,len
));
2843 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2847 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2848 len
= sdslen(obj
->ptr
);
2850 long n
= (long)obj
->ptr
;
2852 /* Compute how many bytes will take this integer as a radix 10 string */
2858 while((n
= n
/10) != 0) {
2863 intlen
= ll2string(buf
+1,sizeof(buf
)-1,(long long)len
);
2864 buf
[intlen
+1] = '\r';
2865 buf
[intlen
+2] = '\n';
2866 addReplySds(c
,sdsnewlen(buf
,intlen
+3));
2869 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2870 addReplyBulkLen(c
,obj
);
2872 addReply(c
,shared
.crlf
);
2875 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2876 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2878 addReply(c
,shared
.nullbulk
);
2880 robj
*o
= createStringObject(s
,strlen(s
));
2886 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2891 REDIS_NOTUSED(mask
);
2892 REDIS_NOTUSED(privdata
);
2894 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2895 if (cfd
== AE_ERR
) {
2896 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2899 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2900 if ((c
= createClient(cfd
)) == NULL
) {
2901 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2902 close(cfd
); /* May be already closed, just ingore errors */
2905 /* If maxclient directive is set and this is one client more... close the
2906 * connection. Note that we create the client instead to check before
2907 * for this condition, since now the socket is already set in nonblocking
2908 * mode and we can send an error for free using the Kernel I/O */
2909 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2910 char *err
= "-ERR max number of clients reached\r\n";
2912 /* That's a best effort error message, don't check write errors */
2913 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2914 /* Nothing to do, Just to avoid the warning... */
2919 server
.stat_numconnections
++;
2922 /* ======================= Redis objects implementation ===================== */
2924 static robj
*createObject(int type
, void *ptr
) {
2927 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2928 if (listLength(server
.objfreelist
)) {
2929 listNode
*head
= listFirst(server
.objfreelist
);
2930 o
= listNodeValue(head
);
2931 listDelNode(server
.objfreelist
,head
);
2932 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2934 if (server
.vm_enabled
) {
2935 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2936 o
= zmalloc(sizeof(*o
));
2938 o
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
));
2942 o
->encoding
= REDIS_ENCODING_RAW
;
2945 if (server
.vm_enabled
) {
2946 /* Note that this code may run in the context of an I/O thread
2947 * and accessing to server.unixtime in theory is an error
2948 * (no locks). But in practice this is safe, and even if we read
2949 * garbage Redis will not fail, as it's just a statistical info */
2950 o
->vm
.atime
= server
.unixtime
;
2951 o
->storage
= REDIS_VM_MEMORY
;
2956 static robj
*createStringObject(char *ptr
, size_t len
) {
2957 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
2960 static robj
*createStringObjectFromLongLong(long long value
) {
2962 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
2963 incrRefCount(shared
.integers
[value
]);
2964 o
= shared
.integers
[value
];
2966 if (value
>= LONG_MIN
&& value
<= LONG_MAX
) {
2967 o
= createObject(REDIS_STRING
, NULL
);
2968 o
->encoding
= REDIS_ENCODING_INT
;
2969 o
->ptr
= (void*)((long)value
);
2971 o
= createObject(REDIS_STRING
,sdsfromlonglong(value
));
2977 static robj
*dupStringObject(robj
*o
) {
2978 assert(o
->encoding
== REDIS_ENCODING_RAW
);
2979 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
2982 static robj
*createListObject(void) {
2983 list
*l
= listCreate();
2985 listSetFreeMethod(l
,decrRefCount
);
2986 return createObject(REDIS_LIST
,l
);
2989 static robj
*createSetObject(void) {
2990 dict
*d
= dictCreate(&setDictType
,NULL
);
2991 return createObject(REDIS_SET
,d
);
2994 static robj
*createHashObject(void) {
2995 /* All the Hashes start as zipmaps. Will be automatically converted
2996 * into hash tables if there are enough elements or big elements
2998 unsigned char *zm
= zipmapNew();
2999 robj
*o
= createObject(REDIS_HASH
,zm
);
3000 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
3004 static robj
*createZsetObject(void) {
3005 zset
*zs
= zmalloc(sizeof(*zs
));
3007 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
3008 zs
->zsl
= zslCreate();
3009 return createObject(REDIS_ZSET
,zs
);
3012 static void freeStringObject(robj
*o
) {
3013 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3018 static void freeListObject(robj
*o
) {
3019 listRelease((list
*) o
->ptr
);
3022 static void freeSetObject(robj
*o
) {
3023 dictRelease((dict
*) o
->ptr
);
3026 static void freeZsetObject(robj
*o
) {
3029 dictRelease(zs
->dict
);
3034 static void freeHashObject(robj
*o
) {
3035 switch (o
->encoding
) {
3036 case REDIS_ENCODING_HT
:
3037 dictRelease((dict
*) o
->ptr
);
3039 case REDIS_ENCODING_ZIPMAP
:
3043 redisPanic("Unknown hash encoding type");
3048 static void incrRefCount(robj
*o
) {
3052 static void decrRefCount(void *obj
) {
3055 if (o
->refcount
<= 0) redisPanic("decrRefCount against refcount <= 0");
3056 /* Object is a key of a swapped out value, or in the process of being
3058 if (server
.vm_enabled
&&
3059 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
3061 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
);
3062 redisAssert(o
->type
== REDIS_STRING
);
3063 freeStringObject(o
);
3064 vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
);
3065 pthread_mutex_lock(&server
.obj_freelist_mutex
);
3066 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
3067 !listAddNodeHead(server
.objfreelist
,o
))
3069 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
3070 server
.vm_stats_swapped_objects
--;
3073 /* Object is in memory, or in the process of being swapped out. */
3074 if (--(o
->refcount
) == 0) {
3075 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
3076 vmCancelThreadedIOJob(obj
);
3078 case REDIS_STRING
: freeStringObject(o
); break;
3079 case REDIS_LIST
: freeListObject(o
); break;
3080 case REDIS_SET
: freeSetObject(o
); break;
3081 case REDIS_ZSET
: freeZsetObject(o
); break;
3082 case REDIS_HASH
: freeHashObject(o
); break;
3083 default: redisPanic("Unknown object type"); break;
3085 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
3086 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
3087 !listAddNodeHead(server
.objfreelist
,o
))
3089 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
3093 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
3094 dictEntry
*de
= dictFind(db
->dict
,key
);
3096 robj
*key
= dictGetEntryKey(de
);
3097 robj
*val
= dictGetEntryVal(de
);
3099 if (server
.vm_enabled
) {
3100 if (key
->storage
== REDIS_VM_MEMORY
||
3101 key
->storage
== REDIS_VM_SWAPPING
)
3103 /* If we were swapping the object out, stop it, this key
3105 if (key
->storage
== REDIS_VM_SWAPPING
)
3106 vmCancelThreadedIOJob(key
);
3107 /* Update the access time of the key for the aging algorithm. */
3108 key
->vm
.atime
= server
.unixtime
;
3110 int notify
= (key
->storage
== REDIS_VM_LOADING
);
3112 /* Our value was swapped on disk. Bring it at home. */
3113 redisAssert(val
== NULL
);
3114 val
= vmLoadObject(key
);
3115 dictGetEntryVal(de
) = val
;
3117 /* Clients blocked by the VM subsystem may be waiting for
3119 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
3128 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
3129 expireIfNeeded(db
,key
);
3130 return lookupKey(db
,key
);
3133 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
3134 deleteIfVolatile(db
,key
);
3135 touchWatchedKey(db
,key
);
3136 return lookupKey(db
,key
);
3139 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3140 robj
*o
= lookupKeyRead(c
->db
, key
);
3141 if (!o
) addReply(c
,reply
);
3145 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3146 robj
*o
= lookupKeyWrite(c
->db
, key
);
3147 if (!o
) addReply(c
,reply
);
3151 static int checkType(redisClient
*c
, robj
*o
, int type
) {
3152 if (o
->type
!= type
) {
3153 addReply(c
,shared
.wrongtypeerr
);
3159 static int deleteKey(redisDb
*db
, robj
*key
) {
3162 /* We need to protect key from destruction: after the first dictDelete()
3163 * it may happen that 'key' is no longer valid if we don't increment
3164 * it's count. This may happen when we get the object reference directly
3165 * from the hash table with dictRandomKey() or dict iterators */
3167 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
);
3168 retval
= dictDelete(db
->dict
,key
);
3171 return retval
== DICT_OK
;
3174 /* Check if the nul-terminated string 's' can be represented by a long
3175 * (that is, is a number that fits into long without any other space or
3176 * character before or after the digits).
3178 * If so, the function returns REDIS_OK and *longval is set to the value
3179 * of the number. Otherwise REDIS_ERR is returned */
3180 static int isStringRepresentableAsLong(sds s
, long *longval
) {
3181 char buf
[32], *endptr
;
3185 value
= strtol(s
, &endptr
, 10);
3186 if (endptr
[0] != '\0') return REDIS_ERR
;
3187 slen
= ll2string(buf
,32,value
);
3189 /* If the number converted back into a string is not identical
3190 * then it's not possible to encode the string as integer */
3191 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
3192 if (longval
) *longval
= value
;
3196 /* Try to encode a string object in order to save space */
3197 static robj
*tryObjectEncoding(robj
*o
) {
3201 if (o
->encoding
!= REDIS_ENCODING_RAW
)
3202 return o
; /* Already encoded */
3204 /* It's not safe to encode shared objects: shared objects can be shared
3205 * everywhere in the "object space" of Redis. Encoded objects can only
3206 * appear as "values" (and not, for instance, as keys) */
3207 if (o
->refcount
> 1) return o
;
3209 /* Currently we try to encode only strings */
3210 redisAssert(o
->type
== REDIS_STRING
);
3212 /* Check if we can represent this string as a long integer */
3213 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return o
;
3215 /* Ok, this object can be encoded */
3216 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
3218 incrRefCount(shared
.integers
[value
]);
3219 return shared
.integers
[value
];
3221 o
->encoding
= REDIS_ENCODING_INT
;
3223 o
->ptr
= (void*) value
;
3228 /* Get a decoded version of an encoded object (returned as a new object).
3229 * If the object is already raw-encoded just increment the ref count. */
3230 static robj
*getDecodedObject(robj
*o
) {
3233 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3237 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
3240 ll2string(buf
,32,(long)o
->ptr
);
3241 dec
= createStringObject(buf
,strlen(buf
));
3244 redisPanic("Unknown encoding type");
3248 /* Compare two string objects via strcmp() or alike.
3249 * Note that the objects may be integer-encoded. In such a case we
3250 * use ll2string() to get a string representation of the numbers on the stack
3251 * and compare the strings, it's much faster than calling getDecodedObject().
3253 * Important note: if objects are not integer encoded, but binary-safe strings,
3254 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
3256 static int compareStringObjects(robj
*a
, robj
*b
) {
3257 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
3258 char bufa
[128], bufb
[128], *astr
, *bstr
;
3261 if (a
== b
) return 0;
3262 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
3263 ll2string(bufa
,sizeof(bufa
),(long) a
->ptr
);
3269 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
3270 ll2string(bufb
,sizeof(bufb
),(long) b
->ptr
);
3276 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
3279 /* Equal string objects return 1 if the two objects are the same from the
3280 * point of view of a string comparison, otherwise 0 is returned. Note that
3281 * this function is faster then checking for (compareStringObject(a,b) == 0)
3282 * because it can perform some more optimization. */
3283 static int equalStringObjects(robj
*a
, robj
*b
) {
3284 if (a
->encoding
!= REDIS_ENCODING_RAW
&& b
->encoding
!= REDIS_ENCODING_RAW
){
3285 return a
->ptr
== b
->ptr
;
3287 return compareStringObjects(a
,b
) == 0;
3291 static size_t stringObjectLen(robj
*o
) {
3292 redisAssert(o
->type
== REDIS_STRING
);
3293 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3294 return sdslen(o
->ptr
);
3298 return ll2string(buf
,32,(long)o
->ptr
);
3302 static int getDoubleFromObject(robj
*o
, double *target
) {
3309 redisAssert(o
->type
== REDIS_STRING
);
3310 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3311 value
= strtod(o
->ptr
, &eptr
);
3312 if (eptr
[0] != '\0') return REDIS_ERR
;
3313 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3314 value
= (long)o
->ptr
;
3316 redisPanic("Unknown string encoding");
3324 static int getDoubleFromObjectOrReply(redisClient
*c
, robj
*o
, double *target
, const char *msg
) {
3326 if (getDoubleFromObject(o
, &value
) != REDIS_OK
) {
3328 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3330 addReplySds(c
, sdsnew("-ERR value is not a double\r\n"));
3339 static int getLongLongFromObject(robj
*o
, long long *target
) {
3346 redisAssert(o
->type
== REDIS_STRING
);
3347 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3348 value
= strtoll(o
->ptr
, &eptr
, 10);
3349 if (eptr
[0] != '\0') return REDIS_ERR
;
3350 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3351 value
= (long)o
->ptr
;
3353 redisPanic("Unknown string encoding");
3361 static int getLongLongFromObjectOrReply(redisClient
*c
, robj
*o
, long long *target
, const char *msg
) {
3363 if (getLongLongFromObject(o
, &value
) != REDIS_OK
) {
3365 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3367 addReplySds(c
, sdsnew("-ERR value is not an integer\r\n"));
3376 static int getLongFromObjectOrReply(redisClient
*c
, robj
*o
, long *target
, const char *msg
) {
3379 if (getLongLongFromObjectOrReply(c
, o
, &value
, msg
) != REDIS_OK
) return REDIS_ERR
;
3380 if (value
< LONG_MIN
|| value
> LONG_MAX
) {
3382 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3384 addReplySds(c
, sdsnew("-ERR value is out of range\r\n"));
3393 /*============================ RDB saving/loading =========================== */
3395 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3396 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3400 static int rdbSaveTime(FILE *fp
, time_t t
) {
3401 int32_t t32
= (int32_t) t
;
3402 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3406 /* check rdbLoadLen() comments for more info */
3407 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3408 unsigned char buf
[2];
3411 /* Save a 6 bit len */
3412 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3413 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3414 } else if (len
< (1<<14)) {
3415 /* Save a 14 bit len */
3416 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3418 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3420 /* Save a 32 bit len */
3421 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3422 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3424 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3429 /* Encode 'value' as an integer if possible (if integer will fit the
3430 * supported range). If the function sucessful encoded the integer
3431 * then the (up to 5 bytes) encoded representation is written in the
3432 * string pointed by 'enc' and the length is returned. Otherwise
3434 static int rdbEncodeInteger(long long value
, unsigned char *enc
) {
3435 /* Finally check if it fits in our ranges */
3436 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3437 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3438 enc
[1] = value
&0xFF;
3440 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3441 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3442 enc
[1] = value
&0xFF;
3443 enc
[2] = (value
>>8)&0xFF;
3445 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3446 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3447 enc
[1] = value
&0xFF;
3448 enc
[2] = (value
>>8)&0xFF;
3449 enc
[3] = (value
>>16)&0xFF;
3450 enc
[4] = (value
>>24)&0xFF;
3457 /* String objects in the form "2391" "-100" without any space and with a
3458 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3459 * encoded as integers to save space */
3460 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3462 char *endptr
, buf
[32];
3464 /* Check if it's possible to encode this value as a number */
3465 value
= strtoll(s
, &endptr
, 10);
3466 if (endptr
[0] != '\0') return 0;
3467 ll2string(buf
,32,value
);
3469 /* If the number converted back into a string is not identical
3470 * then it's not possible to encode the string as integer */
3471 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3473 return rdbEncodeInteger(value
,enc
);
3476 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3477 size_t comprlen
, outlen
;
3481 /* We require at least four bytes compression for this to be worth it */
3482 if (len
<= 4) return 0;
3484 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3485 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3486 if (comprlen
== 0) {
3490 /* Data compressed! Let's save it on disk */
3491 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3492 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3493 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3494 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3495 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3504 /* Save a string objet as [len][data] on disk. If the object is a string
3505 * representation of an integer value we try to safe it in a special form */
3506 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3509 /* Try integer encoding */
3511 unsigned char buf
[5];
3512 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3513 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3518 /* Try LZF compression - under 20 bytes it's unable to compress even
3519 * aaaaaaaaaaaaaaaaaa so skip it */
3520 if (server
.rdbcompression
&& len
> 20) {
3523 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3524 if (retval
== -1) return -1;
3525 if (retval
> 0) return 0;
3526 /* retval == 0 means data can't be compressed, save the old way */
3529 /* Store verbatim */
3530 if (rdbSaveLen(fp
,len
) == -1) return -1;
3531 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3535 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3536 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3539 /* Avoid to decode the object, then encode it again, if the
3540 * object is alrady integer encoded. */
3541 if (obj
->encoding
== REDIS_ENCODING_INT
) {
3542 long val
= (long) obj
->ptr
;
3543 unsigned char buf
[5];
3546 if ((enclen
= rdbEncodeInteger(val
,buf
)) > 0) {
3547 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3550 /* otherwise... fall throught and continue with the usual
3554 /* Avoid incr/decr ref count business when possible.
3555 * This plays well with copy-on-write given that we are probably
3556 * in a child process (BGSAVE). Also this makes sure key objects
3557 * of swapped objects are not incRefCount-ed (an assert does not allow
3558 * this in order to avoid bugs) */
3559 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
3560 obj
= getDecodedObject(obj
);
3561 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3564 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3569 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3570 * 8 bit integer specifing the length of the representation.
3571 * This 8 bit integer has special values in order to specify the following
3577 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3578 unsigned char buf
[128];
3584 } else if (!isfinite(val
)) {
3586 buf
[0] = (val
< 0) ? 255 : 254;
3588 #if (DBL_MANT_DIG >= 52) && (LLONG_MAX == 0x7fffffffffffffffLL)
3589 /* Check if the float is in a safe range to be casted into a
3590 * long long. We are assuming that long long is 64 bit here.
3591 * Also we are assuming that there are no implementations around where
3592 * double has precision < 52 bit.
3594 * Under this assumptions we test if a double is inside an interval
3595 * where casting to long long is safe. Then using two castings we
3596 * make sure the decimal part is zero. If all this is true we use
3597 * integer printing function that is much faster. */
3598 double min
= -4503599627370495; /* (2^52)-1 */
3599 double max
= 4503599627370496; /* -(2^52) */
3600 if (val
> min
&& val
< max
&& val
== ((double)((long long)val
)))
3601 ll2string((char*)buf
+1,sizeof(buf
),(long long)val
);
3604 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3605 buf
[0] = strlen((char*)buf
+1);
3608 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3612 /* Save a Redis object. */
3613 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3614 if (o
->type
== REDIS_STRING
) {
3615 /* Save a string value */
3616 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3617 } else if (o
->type
== REDIS_LIST
) {
3618 /* Save a list value */
3619 list
*list
= o
->ptr
;
3623 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3624 listRewind(list
,&li
);
3625 while((ln
= listNext(&li
))) {
3626 robj
*eleobj
= listNodeValue(ln
);
3628 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3630 } else if (o
->type
== REDIS_SET
) {
3631 /* Save a set value */
3633 dictIterator
*di
= dictGetIterator(set
);
3636 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3637 while((de
= dictNext(di
)) != NULL
) {
3638 robj
*eleobj
= dictGetEntryKey(de
);
3640 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3642 dictReleaseIterator(di
);
3643 } else if (o
->type
== REDIS_ZSET
) {
3644 /* Save a set value */
3646 dictIterator
*di
= dictGetIterator(zs
->dict
);
3649 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3650 while((de
= dictNext(di
)) != NULL
) {
3651 robj
*eleobj
= dictGetEntryKey(de
);
3652 double *score
= dictGetEntryVal(de
);
3654 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3655 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3657 dictReleaseIterator(di
);
3658 } else if (o
->type
== REDIS_HASH
) {
3659 /* Save a hash value */
3660 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3661 unsigned char *p
= zipmapRewind(o
->ptr
);
3662 unsigned int count
= zipmapLen(o
->ptr
);
3663 unsigned char *key
, *val
;
3664 unsigned int klen
, vlen
;
3666 if (rdbSaveLen(fp
,count
) == -1) return -1;
3667 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3668 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3669 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3672 dictIterator
*di
= dictGetIterator(o
->ptr
);
3675 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3676 while((de
= dictNext(di
)) != NULL
) {
3677 robj
*key
= dictGetEntryKey(de
);
3678 robj
*val
= dictGetEntryVal(de
);
3680 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3681 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3683 dictReleaseIterator(di
);
3686 redisPanic("Unknown object type");
3691 /* Return the length the object will have on disk if saved with
3692 * the rdbSaveObject() function. Currently we use a trick to get
3693 * this length with very little changes to the code. In the future
3694 * we could switch to a faster solution. */
3695 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3696 if (fp
== NULL
) fp
= server
.devnull
;
3698 assert(rdbSaveObject(fp
,o
) != 1);
3702 /* Return the number of pages required to save this object in the swap file */
3703 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3704 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3706 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3709 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3710 static int rdbSave(char *filename
) {
3711 dictIterator
*di
= NULL
;
3716 time_t now
= time(NULL
);
3718 /* Wait for I/O therads to terminate, just in case this is a
3719 * foreground-saving, to avoid seeking the swap file descriptor at the
3721 if (server
.vm_enabled
)
3722 waitEmptyIOJobsQueue();
3724 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3725 fp
= fopen(tmpfile
,"w");
3727 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3730 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3731 for (j
= 0; j
< server
.dbnum
; j
++) {
3732 redisDb
*db
= server
.db
+j
;
3734 if (dictSize(d
) == 0) continue;
3735 di
= dictGetIterator(d
);
3741 /* Write the SELECT DB opcode */
3742 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3743 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3745 /* Iterate this DB writing every entry */
3746 while((de
= dictNext(di
)) != NULL
) {
3747 robj
*key
= dictGetEntryKey(de
);
3748 robj
*o
= dictGetEntryVal(de
);
3749 time_t expiretime
= getExpire(db
,key
);
3751 /* Save the expire time */
3752 if (expiretime
!= -1) {
3753 /* If this key is already expired skip it */
3754 if (expiretime
< now
) continue;
3755 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3756 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3758 /* Save the key and associated value. This requires special
3759 * handling if the value is swapped out. */
3760 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
3761 key
->storage
== REDIS_VM_SWAPPING
) {
3762 /* Save type, key, value */
3763 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3764 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3765 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3767 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3769 /* Get a preview of the object in memory */
3770 po
= vmPreviewObject(key
);
3771 /* Save type, key, value */
3772 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
;
3773 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3774 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3775 /* Remove the loaded object from memory */
3779 dictReleaseIterator(di
);
3782 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3784 /* Make sure data will not remain on the OS's output buffers */
3789 /* Use RENAME to make sure the DB file is changed atomically only
3790 * if the generate DB file is ok. */
3791 if (rename(tmpfile
,filename
) == -1) {
3792 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3796 redisLog(REDIS_NOTICE
,"DB saved on disk");
3798 server
.lastsave
= time(NULL
);
3804 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3805 if (di
) dictReleaseIterator(di
);
3809 static int rdbSaveBackground(char *filename
) {
3812 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3813 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3814 if ((childpid
= fork()) == 0) {
3816 if (server
.vm_enabled
) vmReopenSwapFile();
3818 if (rdbSave(filename
) == REDIS_OK
) {
3825 if (childpid
== -1) {
3826 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3830 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3831 server
.bgsavechildpid
= childpid
;
3832 updateDictResizePolicy();
3835 return REDIS_OK
; /* unreached */
3838 static void rdbRemoveTempFile(pid_t childpid
) {
3841 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
3845 static int rdbLoadType(FILE *fp
) {
3847 if (fread(&type
,1,1,fp
) == 0) return -1;
3851 static time_t rdbLoadTime(FILE *fp
) {
3853 if (fread(&t32
,4,1,fp
) == 0) return -1;
3854 return (time_t) t32
;
3857 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3858 * of this file for a description of how this are stored on disk.
3860 * isencoded is set to 1 if the readed length is not actually a length but
3861 * an "encoding type", check the above comments for more info */
3862 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
3863 unsigned char buf
[2];
3867 if (isencoded
) *isencoded
= 0;
3868 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3869 type
= (buf
[0]&0xC0)>>6;
3870 if (type
== REDIS_RDB_6BITLEN
) {
3871 /* Read a 6 bit len */
3873 } else if (type
== REDIS_RDB_ENCVAL
) {
3874 /* Read a 6 bit len encoding type */
3875 if (isencoded
) *isencoded
= 1;
3877 } else if (type
== REDIS_RDB_14BITLEN
) {
3878 /* Read a 14 bit len */
3879 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3880 return ((buf
[0]&0x3F)<<8)|buf
[1];
3882 /* Read a 32 bit len */
3883 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
3888 /* Load an integer-encoded object from file 'fp', with the specified
3889 * encoding type 'enctype'. If encode is true the function may return
3890 * an integer-encoded object as reply, otherwise the returned object
3891 * will always be encoded as a raw string. */
3892 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
, int encode
) {
3893 unsigned char enc
[4];
3896 if (enctype
== REDIS_RDB_ENC_INT8
) {
3897 if (fread(enc
,1,1,fp
) == 0) return NULL
;
3898 val
= (signed char)enc
[0];
3899 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
3901 if (fread(enc
,2,1,fp
) == 0) return NULL
;
3902 v
= enc
[0]|(enc
[1]<<8);
3904 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
3906 if (fread(enc
,4,1,fp
) == 0) return NULL
;
3907 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
3910 val
= 0; /* anti-warning */
3911 redisPanic("Unknown RDB integer encoding type");
3914 return createStringObjectFromLongLong(val
);
3916 return createObject(REDIS_STRING
,sdsfromlonglong(val
));
3919 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
3920 unsigned int len
, clen
;
3921 unsigned char *c
= NULL
;
3924 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3925 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3926 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
3927 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
3928 if (fread(c
,clen
,1,fp
) == 0) goto err
;
3929 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
3931 return createObject(REDIS_STRING
,val
);
3938 static robj
*rdbGenericLoadStringObject(FILE*fp
, int encode
) {
3943 len
= rdbLoadLen(fp
,&isencoded
);
3946 case REDIS_RDB_ENC_INT8
:
3947 case REDIS_RDB_ENC_INT16
:
3948 case REDIS_RDB_ENC_INT32
:
3949 return rdbLoadIntegerObject(fp
,len
,encode
);
3950 case REDIS_RDB_ENC_LZF
:
3951 return rdbLoadLzfStringObject(fp
);
3953 redisPanic("Unknown RDB encoding type");
3957 if (len
== REDIS_RDB_LENERR
) return NULL
;
3958 val
= sdsnewlen(NULL
,len
);
3959 if (len
&& fread(val
,len
,1,fp
) == 0) {
3963 return createObject(REDIS_STRING
,val
);
3966 static robj
*rdbLoadStringObject(FILE *fp
) {
3967 return rdbGenericLoadStringObject(fp
,0);
3970 static robj
*rdbLoadEncodedStringObject(FILE *fp
) {
3971 return rdbGenericLoadStringObject(fp
,1);
3974 /* For information about double serialization check rdbSaveDoubleValue() */
3975 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
3979 if (fread(&len
,1,1,fp
) == 0) return -1;
3981 case 255: *val
= R_NegInf
; return 0;
3982 case 254: *val
= R_PosInf
; return 0;
3983 case 253: *val
= R_Nan
; return 0;
3985 if (fread(buf
,len
,1,fp
) == 0) return -1;
3987 sscanf(buf
, "%lg", val
);
3992 /* Load a Redis object of the specified type from the specified file.
3993 * On success a newly allocated object is returned, otherwise NULL. */
3994 static robj
*rdbLoadObject(int type
, FILE *fp
) {
3997 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
3998 if (type
== REDIS_STRING
) {
3999 /* Read string value */
4000 if ((o
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4001 o
= tryObjectEncoding(o
);
4002 } else if (type
== REDIS_LIST
|| type
== REDIS_SET
) {
4003 /* Read list/set value */
4006 if ((listlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4007 o
= (type
== REDIS_LIST
) ? createListObject() : createSetObject();
4008 /* It's faster to expand the dict to the right size asap in order
4009 * to avoid rehashing */
4010 if (type
== REDIS_SET
&& listlen
> DICT_HT_INITIAL_SIZE
)
4011 dictExpand(o
->ptr
,listlen
);
4012 /* Load every single element of the list/set */
4016 if ((ele
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4017 ele
= tryObjectEncoding(ele
);
4018 if (type
== REDIS_LIST
) {
4019 listAddNodeTail((list
*)o
->ptr
,ele
);
4021 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
4024 } else if (type
== REDIS_ZSET
) {
4025 /* Read list/set value */
4029 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4030 o
= createZsetObject();
4032 /* Load every single element of the list/set */
4035 double *score
= zmalloc(sizeof(double));
4037 if ((ele
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4038 ele
= tryObjectEncoding(ele
);
4039 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
4040 dictAdd(zs
->dict
,ele
,score
);
4041 zslInsert(zs
->zsl
,*score
,ele
);
4042 incrRefCount(ele
); /* added to skiplist */
4044 } else if (type
== REDIS_HASH
) {
4047 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4048 o
= createHashObject();
4049 /* Too many entries? Use an hash table. */
4050 if (hashlen
> server
.hash_max_zipmap_entries
)
4051 convertToRealHash(o
);
4052 /* Load every key/value, then set it into the zipmap or hash
4053 * table, as needed. */
4057 if ((key
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
4058 if ((val
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
4059 /* If we are using a zipmap and there are too big values
4060 * the object is converted to real hash table encoding. */
4061 if (o
->encoding
!= REDIS_ENCODING_HT
&&
4062 (sdslen(key
->ptr
) > server
.hash_max_zipmap_value
||
4063 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
))
4065 convertToRealHash(o
);
4068 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
4069 unsigned char *zm
= o
->ptr
;
4071 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
4072 val
->ptr
,sdslen(val
->ptr
),NULL
);
4077 key
= tryObjectEncoding(key
);
4078 val
= tryObjectEncoding(val
);
4079 dictAdd((dict
*)o
->ptr
,key
,val
);
4083 redisPanic("Unknown object type");
4088 static int rdbLoad(char *filename
) {
4091 int type
, retval
, rdbver
;
4092 int swap_all_values
= 0;
4093 dict
*d
= server
.db
[0].dict
;
4094 redisDb
*db
= server
.db
+0;
4096 time_t expiretime
, now
= time(NULL
);
4097 long long loadedkeys
= 0;
4099 fp
= fopen(filename
,"r");
4100 if (!fp
) return REDIS_ERR
;
4101 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
4103 if (memcmp(buf
,"REDIS",5) != 0) {
4105 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
4108 rdbver
= atoi(buf
+5);
4111 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
4119 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
4120 if (type
== REDIS_EXPIRETIME
) {
4121 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
4122 /* We read the time so we need to read the object type again */
4123 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
4125 if (type
== REDIS_EOF
) break;
4126 /* Handle SELECT DB opcode as a special case */
4127 if (type
== REDIS_SELECTDB
) {
4128 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
4130 if (dbid
>= (unsigned)server
.dbnum
) {
4131 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
4134 db
= server
.db
+dbid
;
4139 if ((key
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
4141 if ((val
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
4142 /* Check if the key already expired */
4143 if (expiretime
!= -1 && expiretime
< now
) {
4148 /* Add the new object in the hash table */
4149 retval
= dictAdd(d
,key
,val
);
4150 if (retval
== DICT_ERR
) {
4151 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", key
->ptr
);
4155 /* Set the expire time if needed */
4156 if (expiretime
!= -1) setExpire(db
,key
,expiretime
);
4158 /* Handle swapping while loading big datasets when VM is on */
4160 /* If we detecter we are hopeless about fitting something in memory
4161 * we just swap every new key on disk. Directly...
4162 * Note that's important to check for this condition before resorting
4163 * to random sampling, otherwise we may try to swap already
4165 if (swap_all_values
) {
4166 dictEntry
*de
= dictFind(d
,key
);
4168 /* de may be NULL since the key already expired */
4170 key
= dictGetEntryKey(de
);
4171 val
= dictGetEntryVal(de
);
4173 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
4174 dictGetEntryVal(de
) = NULL
;
4180 /* If we have still some hope of having some value fitting memory
4181 * then we try random sampling. */
4182 if (!swap_all_values
&& server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
4183 while (zmalloc_used_memory() > server
.vm_max_memory
) {
4184 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
4186 if (zmalloc_used_memory() > server
.vm_max_memory
)
4187 swap_all_values
= 1; /* We are already using too much mem */
4193 eoferr
: /* unexpected end of file is handled here with a fatal exit */
4194 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
4196 return REDIS_ERR
; /* Just to avoid warning */
4199 /*================================== Shutdown =============================== */
4200 static int prepareForShutdown() {
4201 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4202 /* Kill the saving child if there is a background saving in progress.
4203 We want to avoid race conditions, for instance our saving child may
4204 overwrite the synchronous saving did by SHUTDOWN. */
4205 if (server
.bgsavechildpid
!= -1) {
4206 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4207 kill(server
.bgsavechildpid
,SIGKILL
);
4208 rdbRemoveTempFile(server
.bgsavechildpid
);
4210 if (server
.appendonly
) {
4211 /* Append only file: fsync() the AOF and exit */
4212 fsync(server
.appendfd
);
4213 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4215 /* Snapshotting. Perform a SYNC SAVE and exit */
4216 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4217 if (server
.daemonize
)
4218 unlink(server
.pidfile
);
4219 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4221 /* Ooops.. error saving! The best we can do is to continue
4222 * operating. Note that if there was a background saving process,
4223 * in the next cron() Redis will be notified that the background
4224 * saving aborted, handling special stuff like slaves pending for
4225 * synchronization... */
4226 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4230 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4234 /*================================== Commands =============================== */
4236 static void authCommand(redisClient
*c
) {
4237 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
4238 c
->authenticated
= 1;
4239 addReply(c
,shared
.ok
);
4241 c
->authenticated
= 0;
4242 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
4246 static void pingCommand(redisClient
*c
) {
4247 addReply(c
,shared
.pong
);
4250 static void echoCommand(redisClient
*c
) {
4251 addReplyBulk(c
,c
->argv
[1]);
4254 /*=================================== Strings =============================== */
4256 static void setGenericCommand(redisClient
*c
, int nx
, robj
*key
, robj
*val
, robj
*expire
) {
4258 long seconds
= 0; /* initialized to avoid an harmness warning */
4261 if (getLongFromObjectOrReply(c
, expire
, &seconds
, NULL
) != REDIS_OK
)
4264 addReplySds(c
,sdsnew("-ERR invalid expire time in SETEX\r\n"));
4269 touchWatchedKey(c
->db
,key
);
4270 if (nx
) deleteIfVolatile(c
->db
,key
);
4271 retval
= dictAdd(c
->db
->dict
,key
,val
);
4272 if (retval
== DICT_ERR
) {
4274 /* If the key is about a swapped value, we want a new key object
4275 * to overwrite the old. So we delete the old key in the database.
4276 * This will also make sure that swap pages about the old object
4277 * will be marked as free. */
4278 if (server
.vm_enabled
&& deleteIfSwapped(c
->db
,key
))
4280 dictReplace(c
->db
->dict
,key
,val
);
4283 addReply(c
,shared
.czero
);
4291 removeExpire(c
->db
,key
);
4292 if (expire
) setExpire(c
->db
,key
,time(NULL
)+seconds
);
4293 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4296 static void setCommand(redisClient
*c
) {
4297 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[2],NULL
);
4300 static void setnxCommand(redisClient
*c
) {
4301 setGenericCommand(c
,1,c
->argv
[1],c
->argv
[2],NULL
);
4304 static void setexCommand(redisClient
*c
) {
4305 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[3],c
->argv
[2]);
4308 static int getGenericCommand(redisClient
*c
) {
4311 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
4314 if (o
->type
!= REDIS_STRING
) {
4315 addReply(c
,shared
.wrongtypeerr
);
4323 static void getCommand(redisClient
*c
) {
4324 getGenericCommand(c
);
4327 static void getsetCommand(redisClient
*c
) {
4328 if (getGenericCommand(c
) == REDIS_ERR
) return;
4329 if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) {
4330 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4332 incrRefCount(c
->argv
[1]);
4334 incrRefCount(c
->argv
[2]);
4336 removeExpire(c
->db
,c
->argv
[1]);
4339 static void mgetCommand(redisClient
*c
) {
4342 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
4343 for (j
= 1; j
< c
->argc
; j
++) {
4344 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
4346 addReply(c
,shared
.nullbulk
);
4348 if (o
->type
!= REDIS_STRING
) {
4349 addReply(c
,shared
.nullbulk
);
4357 static void msetGenericCommand(redisClient
*c
, int nx
) {
4358 int j
, busykeys
= 0;
4360 if ((c
->argc
% 2) == 0) {
4361 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
4364 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
4365 * set nothing at all if at least one already key exists. */
4367 for (j
= 1; j
< c
->argc
; j
+= 2) {
4368 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
4374 addReply(c
, shared
.czero
);
4378 for (j
= 1; j
< c
->argc
; j
+= 2) {
4381 c
->argv
[j
+1] = tryObjectEncoding(c
->argv
[j
+1]);
4382 retval
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
4383 if (retval
== DICT_ERR
) {
4384 dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
4385 incrRefCount(c
->argv
[j
+1]);
4387 incrRefCount(c
->argv
[j
]);
4388 incrRefCount(c
->argv
[j
+1]);
4390 removeExpire(c
->db
,c
->argv
[j
]);
4392 server
.dirty
+= (c
->argc
-1)/2;
4393 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4396 static void msetCommand(redisClient
*c
) {
4397 msetGenericCommand(c
,0);
4400 static void msetnxCommand(redisClient
*c
) {
4401 msetGenericCommand(c
,1);
4404 static void incrDecrCommand(redisClient
*c
, long long incr
) {
4409 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4410 if (o
!= NULL
&& checkType(c
,o
,REDIS_STRING
)) return;
4411 if (getLongLongFromObjectOrReply(c
,o
,&value
,NULL
) != REDIS_OK
) return;
4414 o
= createStringObjectFromLongLong(value
);
4415 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],o
);
4416 if (retval
== DICT_ERR
) {
4417 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4418 removeExpire(c
->db
,c
->argv
[1]);
4420 incrRefCount(c
->argv
[1]);
4423 addReply(c
,shared
.colon
);
4425 addReply(c
,shared
.crlf
);
4428 static void incrCommand(redisClient
*c
) {
4429 incrDecrCommand(c
,1);
4432 static void decrCommand(redisClient
*c
) {
4433 incrDecrCommand(c
,-1);
4436 static void incrbyCommand(redisClient
*c
) {
4439 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4440 incrDecrCommand(c
,incr
);
4443 static void decrbyCommand(redisClient
*c
) {
4446 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4447 incrDecrCommand(c
,-incr
);
4450 static void appendCommand(redisClient
*c
) {
4455 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4457 /* Create the key */
4458 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4459 incrRefCount(c
->argv
[1]);
4460 incrRefCount(c
->argv
[2]);
4461 totlen
= stringObjectLen(c
->argv
[2]);
4465 de
= dictFind(c
->db
->dict
,c
->argv
[1]);
4468 o
= dictGetEntryVal(de
);
4469 if (o
->type
!= REDIS_STRING
) {
4470 addReply(c
,shared
.wrongtypeerr
);
4473 /* If the object is specially encoded or shared we have to make
4475 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
4476 robj
*decoded
= getDecodedObject(o
);
4478 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
4479 decrRefCount(decoded
);
4480 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4483 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
4484 o
->ptr
= sdscatlen(o
->ptr
,
4485 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
4487 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
4488 (unsigned long) c
->argv
[2]->ptr
);
4490 totlen
= sdslen(o
->ptr
);
4493 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
4496 static void substrCommand(redisClient
*c
) {
4498 long start
= atoi(c
->argv
[2]->ptr
);
4499 long end
= atoi(c
->argv
[3]->ptr
);
4500 size_t rangelen
, strlen
;
4503 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4504 checkType(c
,o
,REDIS_STRING
)) return;
4506 o
= getDecodedObject(o
);
4507 strlen
= sdslen(o
->ptr
);
4509 /* convert negative indexes */
4510 if (start
< 0) start
= strlen
+start
;
4511 if (end
< 0) end
= strlen
+end
;
4512 if (start
< 0) start
= 0;
4513 if (end
< 0) end
= 0;
4515 /* indexes sanity checks */
4516 if (start
> end
|| (size_t)start
>= strlen
) {
4517 /* Out of range start or start > end result in null reply */
4518 addReply(c
,shared
.nullbulk
);
4522 if ((size_t)end
>= strlen
) end
= strlen
-1;
4523 rangelen
= (end
-start
)+1;
4525 /* Return the result */
4526 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4527 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4528 addReplySds(c
,range
);
4529 addReply(c
,shared
.crlf
);
4533 /* ========================= Type agnostic commands ========================= */
4535 static void delCommand(redisClient
*c
) {
4538 for (j
= 1; j
< c
->argc
; j
++) {
4539 if (deleteKey(c
->db
,c
->argv
[j
])) {
4540 touchWatchedKey(c
->db
,c
->argv
[j
]);
4545 addReplyLongLong(c
,deleted
);
4548 static void existsCommand(redisClient
*c
) {
4549 expireIfNeeded(c
->db
,c
->argv
[1]);
4550 if (dictFind(c
->db
->dict
,c
->argv
[1])) {
4551 addReply(c
, shared
.cone
);
4553 addReply(c
, shared
.czero
);
4557 static void selectCommand(redisClient
*c
) {
4558 int id
= atoi(c
->argv
[1]->ptr
);
4560 if (selectDb(c
,id
) == REDIS_ERR
) {
4561 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4563 addReply(c
,shared
.ok
);
4567 static void randomkeyCommand(redisClient
*c
) {
4572 de
= dictGetRandomKey(c
->db
->dict
);
4573 if (!de
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break;
4577 addReply(c
,shared
.nullbulk
);
4581 key
= dictGetEntryKey(de
);
4582 if (server
.vm_enabled
) {
4583 key
= dupStringObject(key
);
4584 addReplyBulk(c
,key
);
4587 addReplyBulk(c
,key
);
4591 static void keysCommand(redisClient
*c
) {
4594 sds pattern
= c
->argv
[1]->ptr
;
4595 int plen
= sdslen(pattern
);
4596 unsigned long numkeys
= 0;
4597 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4599 di
= dictGetIterator(c
->db
->dict
);
4601 decrRefCount(lenobj
);
4602 while((de
= dictNext(di
)) != NULL
) {
4603 robj
*keyobj
= dictGetEntryKey(de
);
4605 sds key
= keyobj
->ptr
;
4606 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4607 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4608 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4609 addReplyBulk(c
,keyobj
);
4614 dictReleaseIterator(di
);
4615 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4618 static void dbsizeCommand(redisClient
*c
) {
4620 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4623 static void lastsaveCommand(redisClient
*c
) {
4625 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4628 static void typeCommand(redisClient
*c
) {
4632 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4637 case REDIS_STRING
: type
= "+string"; break;
4638 case REDIS_LIST
: type
= "+list"; break;
4639 case REDIS_SET
: type
= "+set"; break;
4640 case REDIS_ZSET
: type
= "+zset"; break;
4641 case REDIS_HASH
: type
= "+hash"; break;
4642 default: type
= "+unknown"; break;
4645 addReplySds(c
,sdsnew(type
));
4646 addReply(c
,shared
.crlf
);
4649 static void saveCommand(redisClient
*c
) {
4650 if (server
.bgsavechildpid
!= -1) {
4651 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4654 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4655 addReply(c
,shared
.ok
);
4657 addReply(c
,shared
.err
);
4661 static void bgsaveCommand(redisClient
*c
) {
4662 if (server
.bgsavechildpid
!= -1) {
4663 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4666 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4667 char *status
= "+Background saving started\r\n";
4668 addReplySds(c
,sdsnew(status
));
4670 addReply(c
,shared
.err
);
4674 static void shutdownCommand(redisClient
*c
) {
4675 if (prepareForShutdown() == REDIS_OK
)
4677 addReplySds(c
, sdsnew("-ERR Errors trying to SHUTDOWN. Check logs.\r\n"));
4680 static void renameGenericCommand(redisClient
*c
, int nx
) {
4683 /* To use the same key as src and dst is probably an error */
4684 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4685 addReply(c
,shared
.sameobjecterr
);
4689 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4693 deleteIfVolatile(c
->db
,c
->argv
[2]);
4694 if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) {
4697 addReply(c
,shared
.czero
);
4700 dictReplace(c
->db
->dict
,c
->argv
[2],o
);
4702 incrRefCount(c
->argv
[2]);
4704 deleteKey(c
->db
,c
->argv
[1]);
4705 touchWatchedKey(c
->db
,c
->argv
[2]);
4707 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4710 static void renameCommand(redisClient
*c
) {
4711 renameGenericCommand(c
,0);
4714 static void renamenxCommand(redisClient
*c
) {
4715 renameGenericCommand(c
,1);
4718 static void moveCommand(redisClient
*c
) {
4723 /* Obtain source and target DB pointers */
4726 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4727 addReply(c
,shared
.outofrangeerr
);
4731 selectDb(c
,srcid
); /* Back to the source DB */
4733 /* If the user is moving using as target the same
4734 * DB as the source DB it is probably an error. */
4736 addReply(c
,shared
.sameobjecterr
);
4740 /* Check if the element exists and get a reference */
4741 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4743 addReply(c
,shared
.czero
);
4747 /* Try to add the element to the target DB */
4748 deleteIfVolatile(dst
,c
->argv
[1]);
4749 if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) {
4750 addReply(c
,shared
.czero
);
4753 incrRefCount(c
->argv
[1]);
4756 /* OK! key moved, free the entry in the source DB */
4757 deleteKey(src
,c
->argv
[1]);
4759 addReply(c
,shared
.cone
);
4762 /* =================================== Lists ================================ */
4763 static void pushGenericCommand(redisClient
*c
, int where
) {
4767 lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4769 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4770 addReply(c
,shared
.cone
);
4773 lobj
= createListObject();
4775 if (where
== REDIS_HEAD
) {
4776 listAddNodeHead(list
,c
->argv
[2]);
4778 listAddNodeTail(list
,c
->argv
[2]);
4780 dictAdd(c
->db
->dict
,c
->argv
[1],lobj
);
4781 incrRefCount(c
->argv
[1]);
4782 incrRefCount(c
->argv
[2]);
4784 if (lobj
->type
!= REDIS_LIST
) {
4785 addReply(c
,shared
.wrongtypeerr
);
4788 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4789 addReply(c
,shared
.cone
);
4793 if (where
== REDIS_HEAD
) {
4794 listAddNodeHead(list
,c
->argv
[2]);
4796 listAddNodeTail(list
,c
->argv
[2]);
4798 incrRefCount(c
->argv
[2]);
4801 addReplyLongLong(c
,listLength(list
));
4804 static void lpushCommand(redisClient
*c
) {
4805 pushGenericCommand(c
,REDIS_HEAD
);
4808 static void rpushCommand(redisClient
*c
) {
4809 pushGenericCommand(c
,REDIS_TAIL
);
4812 static void llenCommand(redisClient
*c
) {
4816 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4817 checkType(c
,o
,REDIS_LIST
)) return;
4820 addReplyUlong(c
,listLength(l
));
4823 static void lindexCommand(redisClient
*c
) {
4825 int index
= atoi(c
->argv
[2]->ptr
);
4829 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4830 checkType(c
,o
,REDIS_LIST
)) return;
4833 ln
= listIndex(list
, index
);
4835 addReply(c
,shared
.nullbulk
);
4837 robj
*ele
= listNodeValue(ln
);
4838 addReplyBulk(c
,ele
);
4842 static void lsetCommand(redisClient
*c
) {
4844 int index
= atoi(c
->argv
[2]->ptr
);
4848 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
||
4849 checkType(c
,o
,REDIS_LIST
)) return;
4852 ln
= listIndex(list
, index
);
4854 addReply(c
,shared
.outofrangeerr
);
4856 robj
*ele
= listNodeValue(ln
);
4859 listNodeValue(ln
) = c
->argv
[3];
4860 incrRefCount(c
->argv
[3]);
4861 addReply(c
,shared
.ok
);
4866 static void popGenericCommand(redisClient
*c
, int where
) {
4871 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4872 checkType(c
,o
,REDIS_LIST
)) return;
4875 if (where
== REDIS_HEAD
)
4876 ln
= listFirst(list
);
4878 ln
= listLast(list
);
4881 addReply(c
,shared
.nullbulk
);
4883 robj
*ele
= listNodeValue(ln
);
4884 addReplyBulk(c
,ele
);
4885 listDelNode(list
,ln
);
4886 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4891 static void lpopCommand(redisClient
*c
) {
4892 popGenericCommand(c
,REDIS_HEAD
);
4895 static void rpopCommand(redisClient
*c
) {
4896 popGenericCommand(c
,REDIS_TAIL
);
4899 static void lrangeCommand(redisClient
*c
) {
4901 int start
= atoi(c
->argv
[2]->ptr
);
4902 int end
= atoi(c
->argv
[3]->ptr
);
4909 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
4910 || checkType(c
,o
,REDIS_LIST
)) return;
4912 llen
= listLength(list
);
4914 /* convert negative indexes */
4915 if (start
< 0) start
= llen
+start
;
4916 if (end
< 0) end
= llen
+end
;
4917 if (start
< 0) start
= 0;
4918 if (end
< 0) end
= 0;
4920 /* indexes sanity checks */
4921 if (start
> end
|| start
>= llen
) {
4922 /* Out of range start or start > end result in empty list */
4923 addReply(c
,shared
.emptymultibulk
);
4926 if (end
>= llen
) end
= llen
-1;
4927 rangelen
= (end
-start
)+1;
4929 /* Return the result in form of a multi-bulk reply */
4930 ln
= listIndex(list
, start
);
4931 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
4932 for (j
= 0; j
< rangelen
; j
++) {
4933 ele
= listNodeValue(ln
);
4934 addReplyBulk(c
,ele
);
4939 static void ltrimCommand(redisClient
*c
) {
4941 int start
= atoi(c
->argv
[2]->ptr
);
4942 int end
= atoi(c
->argv
[3]->ptr
);
4944 int j
, ltrim
, rtrim
;
4948 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
4949 checkType(c
,o
,REDIS_LIST
)) return;
4951 llen
= listLength(list
);
4953 /* convert negative indexes */
4954 if (start
< 0) start
= llen
+start
;
4955 if (end
< 0) end
= llen
+end
;
4956 if (start
< 0) start
= 0;
4957 if (end
< 0) end
= 0;
4959 /* indexes sanity checks */
4960 if (start
> end
|| start
>= llen
) {
4961 /* Out of range start or start > end result in empty list */
4965 if (end
>= llen
) end
= llen
-1;
4970 /* Remove list elements to perform the trim */
4971 for (j
= 0; j
< ltrim
; j
++) {
4972 ln
= listFirst(list
);
4973 listDelNode(list
,ln
);
4975 for (j
= 0; j
< rtrim
; j
++) {
4976 ln
= listLast(list
);
4977 listDelNode(list
,ln
);
4979 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4981 addReply(c
,shared
.ok
);
4984 static void lremCommand(redisClient
*c
) {
4987 listNode
*ln
, *next
;
4988 int toremove
= atoi(c
->argv
[2]->ptr
);
4992 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4993 checkType(c
,o
,REDIS_LIST
)) return;
4997 toremove
= -toremove
;
5000 ln
= fromtail
? list
->tail
: list
->head
;
5002 robj
*ele
= listNodeValue(ln
);
5004 next
= fromtail
? ln
->prev
: ln
->next
;
5005 if (equalStringObjects(ele
,c
->argv
[3])) {
5006 listDelNode(list
,ln
);
5009 if (toremove
&& removed
== toremove
) break;
5013 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5014 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
5017 /* This is the semantic of this command:
5018 * RPOPLPUSH srclist dstlist:
5019 * IF LLEN(srclist) > 0
5020 * element = RPOP srclist
5021 * LPUSH dstlist element
5028 * The idea is to be able to get an element from a list in a reliable way
5029 * since the element is not just returned but pushed against another list
5030 * as well. This command was originally proposed by Ezra Zygmuntowicz.
5032 static void rpoplpushcommand(redisClient
*c
) {
5037 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5038 checkType(c
,sobj
,REDIS_LIST
)) return;
5039 srclist
= sobj
->ptr
;
5040 ln
= listLast(srclist
);
5043 addReply(c
,shared
.nullbulk
);
5045 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
5046 robj
*ele
= listNodeValue(ln
);
5049 if (dobj
&& dobj
->type
!= REDIS_LIST
) {
5050 addReply(c
,shared
.wrongtypeerr
);
5054 /* Add the element to the target list (unless it's directly
5055 * passed to some BLPOP-ing client */
5056 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) {
5058 /* Create the list if the key does not exist */
5059 dobj
= createListObject();
5060 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
);
5061 incrRefCount(c
->argv
[2]);
5063 dstlist
= dobj
->ptr
;
5064 listAddNodeHead(dstlist
,ele
);
5068 /* Send the element to the client as reply as well */
5069 addReplyBulk(c
,ele
);
5071 /* Finally remove the element from the source list */
5072 listDelNode(srclist
,ln
);
5073 if (listLength(srclist
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5078 /* ==================================== Sets ================================ */
5080 static void saddCommand(redisClient
*c
) {
5083 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5085 set
= createSetObject();
5086 dictAdd(c
->db
->dict
,c
->argv
[1],set
);
5087 incrRefCount(c
->argv
[1]);
5089 if (set
->type
!= REDIS_SET
) {
5090 addReply(c
,shared
.wrongtypeerr
);
5094 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
5095 incrRefCount(c
->argv
[2]);
5097 addReply(c
,shared
.cone
);
5099 addReply(c
,shared
.czero
);
5103 static void sremCommand(redisClient
*c
) {
5106 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5107 checkType(c
,set
,REDIS_SET
)) return;
5109 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
5111 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
5112 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5113 addReply(c
,shared
.cone
);
5115 addReply(c
,shared
.czero
);
5119 static void smoveCommand(redisClient
*c
) {
5120 robj
*srcset
, *dstset
;
5122 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5123 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
5125 /* If the source key does not exist return 0, if it's of the wrong type
5127 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
5128 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
5131 /* Error if the destination key is not a set as well */
5132 if (dstset
&& dstset
->type
!= REDIS_SET
) {
5133 addReply(c
,shared
.wrongtypeerr
);
5136 /* Remove the element from the source set */
5137 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
5138 /* Key not found in the src set! return zero */
5139 addReply(c
,shared
.czero
);
5142 if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset
!= dstset
)
5143 deleteKey(c
->db
,c
->argv
[1]);
5145 /* Add the element to the destination set */
5147 dstset
= createSetObject();
5148 dictAdd(c
->db
->dict
,c
->argv
[2],dstset
);
5149 incrRefCount(c
->argv
[2]);
5151 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
5152 incrRefCount(c
->argv
[3]);
5153 addReply(c
,shared
.cone
);
5156 static void sismemberCommand(redisClient
*c
) {
5159 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5160 checkType(c
,set
,REDIS_SET
)) return;
5162 if (dictFind(set
->ptr
,c
->argv
[2]))
5163 addReply(c
,shared
.cone
);
5165 addReply(c
,shared
.czero
);
5168 static void scardCommand(redisClient
*c
) {
5172 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5173 checkType(c
,o
,REDIS_SET
)) return;
5176 addReplyUlong(c
,dictSize(s
));
5179 static void spopCommand(redisClient
*c
) {
5183 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5184 checkType(c
,set
,REDIS_SET
)) return;
5186 de
= dictGetRandomKey(set
->ptr
);
5188 addReply(c
,shared
.nullbulk
);
5190 robj
*ele
= dictGetEntryKey(de
);
5192 addReplyBulk(c
,ele
);
5193 dictDelete(set
->ptr
,ele
);
5194 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
5195 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5200 static void srandmemberCommand(redisClient
*c
) {
5204 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5205 checkType(c
,set
,REDIS_SET
)) return;
5207 de
= dictGetRandomKey(set
->ptr
);
5209 addReply(c
,shared
.nullbulk
);
5211 robj
*ele
= dictGetEntryKey(de
);
5213 addReplyBulk(c
,ele
);
5217 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
5218 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
5220 return dictSize(*d1
)-dictSize(*d2
);
5223 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
5224 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
5227 robj
*lenobj
= NULL
, *dstset
= NULL
;
5228 unsigned long j
, cardinality
= 0;
5230 for (j
= 0; j
< setsnum
; j
++) {
5234 lookupKeyWrite(c
->db
,setskeys
[j
]) :
5235 lookupKeyRead(c
->db
,setskeys
[j
]);
5239 if (deleteKey(c
->db
,dstkey
))
5241 addReply(c
,shared
.czero
);
5243 addReply(c
,shared
.emptymultibulk
);
5247 if (setobj
->type
!= REDIS_SET
) {
5249 addReply(c
,shared
.wrongtypeerr
);
5252 dv
[j
] = setobj
->ptr
;
5254 /* Sort sets from the smallest to largest, this will improve our
5255 * algorithm's performace */
5256 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
5258 /* The first thing we should output is the total number of elements...
5259 * since this is a multi-bulk write, but at this stage we don't know
5260 * the intersection set size, so we use a trick, append an empty object
5261 * to the output list and save the pointer to later modify it with the
5264 lenobj
= createObject(REDIS_STRING
,NULL
);
5266 decrRefCount(lenobj
);
5268 /* If we have a target key where to store the resulting set
5269 * create this key with an empty set inside */
5270 dstset
= createSetObject();
5273 /* Iterate all the elements of the first (smallest) set, and test
5274 * the element against all the other sets, if at least one set does
5275 * not include the element it is discarded */
5276 di
= dictGetIterator(dv
[0]);
5278 while((de
= dictNext(di
)) != NULL
) {
5281 for (j
= 1; j
< setsnum
; j
++)
5282 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
5284 continue; /* at least one set does not contain the member */
5285 ele
= dictGetEntryKey(de
);
5287 addReplyBulk(c
,ele
);
5290 dictAdd(dstset
->ptr
,ele
,NULL
);
5294 dictReleaseIterator(di
);
5297 /* Store the resulting set into the target, if the intersection
5298 * is not an empty set. */
5299 deleteKey(c
->db
,dstkey
);
5300 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5301 dictAdd(c
->db
->dict
,dstkey
,dstset
);
5302 incrRefCount(dstkey
);
5303 addReplyLongLong(c
,dictSize((dict
*)dstset
->ptr
));
5305 decrRefCount(dstset
);
5306 addReply(c
,shared
.czero
);
5310 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
5315 static void sinterCommand(redisClient
*c
) {
5316 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
5319 static void sinterstoreCommand(redisClient
*c
) {
5320 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
5323 #define REDIS_OP_UNION 0
5324 #define REDIS_OP_DIFF 1
5325 #define REDIS_OP_INTER 2
5327 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
5328 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
5331 robj
*dstset
= NULL
;
5332 int j
, cardinality
= 0;
5334 for (j
= 0; j
< setsnum
; j
++) {
5338 lookupKeyWrite(c
->db
,setskeys
[j
]) :
5339 lookupKeyRead(c
->db
,setskeys
[j
]);
5344 if (setobj
->type
!= REDIS_SET
) {
5346 addReply(c
,shared
.wrongtypeerr
);
5349 dv
[j
] = setobj
->ptr
;
5352 /* We need a temp set object to store our union. If the dstkey
5353 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
5354 * this set object will be the resulting object to set into the target key*/
5355 dstset
= createSetObject();
5357 /* Iterate all the elements of all the sets, add every element a single
5358 * time to the result set */
5359 for (j
= 0; j
< setsnum
; j
++) {
5360 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
5361 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
5363 di
= dictGetIterator(dv
[j
]);
5365 while((de
= dictNext(di
)) != NULL
) {
5368 /* dictAdd will not add the same element multiple times */
5369 ele
= dictGetEntryKey(de
);
5370 if (op
== REDIS_OP_UNION
|| j
== 0) {
5371 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
5375 } else if (op
== REDIS_OP_DIFF
) {
5376 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
5381 dictReleaseIterator(di
);
5383 /* result set is empty? Exit asap. */
5384 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
5387 /* Output the content of the resulting set, if not in STORE mode */
5389 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
5390 di
= dictGetIterator(dstset
->ptr
);
5391 while((de
= dictNext(di
)) != NULL
) {
5394 ele
= dictGetEntryKey(de
);
5395 addReplyBulk(c
,ele
);
5397 dictReleaseIterator(di
);
5398 decrRefCount(dstset
);
5400 /* If we have a target key where to store the resulting set
5401 * create this key with the result set inside */
5402 deleteKey(c
->db
,dstkey
);
5403 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5404 dictAdd(c
->db
->dict
,dstkey
,dstset
);
5405 incrRefCount(dstkey
);
5406 addReplyLongLong(c
,dictSize((dict
*)dstset
->ptr
));
5408 decrRefCount(dstset
);
5409 addReply(c
,shared
.czero
);
5416 static void sunionCommand(redisClient
*c
) {
5417 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
5420 static void sunionstoreCommand(redisClient
*c
) {
5421 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
5424 static void sdiffCommand(redisClient
*c
) {
5425 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
5428 static void sdiffstoreCommand(redisClient
*c
) {
5429 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
5432 /* ==================================== ZSets =============================== */
5434 /* ZSETs are ordered sets using two data structures to hold the same elements
5435 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
5438 * The elements are added to an hash table mapping Redis objects to scores.
5439 * At the same time the elements are added to a skip list mapping scores
5440 * to Redis objects (so objects are sorted by scores in this "view"). */
5442 /* This skiplist implementation is almost a C translation of the original
5443 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
5444 * Alternative to Balanced Trees", modified in three ways:
5445 * a) this implementation allows for repeated values.
5446 * b) the comparison is not just by key (our 'score') but by satellite data.
5447 * c) there is a back pointer, so it's a doubly linked list with the back
5448 * pointers being only at "level 1". This allows to traverse the list
5449 * from tail to head, useful for ZREVRANGE. */
5451 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
5452 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
5454 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
5456 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
5464 static zskiplist
*zslCreate(void) {
5468 zsl
= zmalloc(sizeof(*zsl
));
5471 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
5472 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
5473 zsl
->header
->forward
[j
] = NULL
;
5475 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
5476 if (j
< ZSKIPLIST_MAXLEVEL
-1)
5477 zsl
->header
->span
[j
] = 0;
5479 zsl
->header
->backward
= NULL
;
5484 static void zslFreeNode(zskiplistNode
*node
) {
5485 decrRefCount(node
->obj
);
5486 zfree(node
->forward
);
5491 static void zslFree(zskiplist
*zsl
) {
5492 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
5494 zfree(zsl
->header
->forward
);
5495 zfree(zsl
->header
->span
);
5498 next
= node
->forward
[0];
5505 static int zslRandomLevel(void) {
5507 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
5509 return (level
<ZSKIPLIST_MAXLEVEL
) ? level
: ZSKIPLIST_MAXLEVEL
;
5512 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
5513 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5514 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
5518 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5519 /* store rank that is crossed to reach the insert position */
5520 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
5522 while (x
->forward
[i
] &&
5523 (x
->forward
[i
]->score
< score
||
5524 (x
->forward
[i
]->score
== score
&&
5525 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
5526 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
5531 /* we assume the key is not already inside, since we allow duplicated
5532 * scores, and the re-insertion of score and redis object should never
5533 * happpen since the caller of zslInsert() should test in the hash table
5534 * if the element is already inside or not. */
5535 level
= zslRandomLevel();
5536 if (level
> zsl
->level
) {
5537 for (i
= zsl
->level
; i
< level
; i
++) {
5539 update
[i
] = zsl
->header
;
5540 update
[i
]->span
[i
-1] = zsl
->length
;
5544 x
= zslCreateNode(level
,score
,obj
);
5545 for (i
= 0; i
< level
; i
++) {
5546 x
->forward
[i
] = update
[i
]->forward
[i
];
5547 update
[i
]->forward
[i
] = x
;
5549 /* update span covered by update[i] as x is inserted here */
5551 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5552 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5556 /* increment span for untouched levels */
5557 for (i
= level
; i
< zsl
->level
; i
++) {
5558 update
[i
]->span
[i
-1]++;
5561 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
5563 x
->forward
[0]->backward
= x
;
5569 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
5570 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
5572 for (i
= 0; i
< zsl
->level
; i
++) {
5573 if (update
[i
]->forward
[i
] == x
) {
5575 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5577 update
[i
]->forward
[i
] = x
->forward
[i
];
5579 /* invariant: i > 0, because update[0]->forward[0]
5580 * is always equal to x */
5581 update
[i
]->span
[i
-1] -= 1;
5584 if (x
->forward
[0]) {
5585 x
->forward
[0]->backward
= x
->backward
;
5587 zsl
->tail
= x
->backward
;
5589 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5594 /* Delete an element with matching score/object from the skiplist. */
5595 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
5596 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5600 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5601 while (x
->forward
[i
] &&
5602 (x
->forward
[i
]->score
< score
||
5603 (x
->forward
[i
]->score
== score
&&
5604 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
5608 /* We may have multiple elements with the same score, what we need
5609 * is to find the element with both the right score and object. */
5611 if (x
&& score
== x
->score
&& equalStringObjects(x
->obj
,obj
)) {
5612 zslDeleteNode(zsl
, x
, update
);
5616 return 0; /* not found */
5618 return 0; /* not found */
5621 /* Delete all the elements with score between min and max from the skiplist.
5622 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5623 * Note that this function takes the reference to the hash table view of the
5624 * sorted set, in order to remove the elements from the hash table too. */
5625 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
5626 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5627 unsigned long removed
= 0;
5631 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5632 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
5636 /* We may have multiple elements with the same score, what we need
5637 * is to find the element with both the right score and object. */
5639 while (x
&& x
->score
<= max
) {
5640 zskiplistNode
*next
= x
->forward
[0];
5641 zslDeleteNode(zsl
, x
, update
);
5642 dictDelete(dict
,x
->obj
);
5647 return removed
; /* not found */
5650 /* Delete all the elements with rank between start and end from the skiplist.
5651 * Start and end are inclusive. Note that start and end need to be 1-based */
5652 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
5653 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5654 unsigned long traversed
= 0, removed
= 0;
5658 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5659 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
5660 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5668 while (x
&& traversed
<= end
) {
5669 zskiplistNode
*next
= x
->forward
[0];
5670 zslDeleteNode(zsl
, x
, update
);
5671 dictDelete(dict
,x
->obj
);
5680 /* Find the first node having a score equal or greater than the specified one.
5681 * Returns NULL if there is no match. */
5682 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
5687 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5688 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
5691 /* We may have multiple elements with the same score, what we need
5692 * is to find the element with both the right score and object. */
5693 return x
->forward
[0];
5696 /* Find the rank for an element by both score and key.
5697 * Returns 0 when the element cannot be found, rank otherwise.
5698 * Note that the rank is 1-based due to the span of zsl->header to the
5700 static unsigned long zslGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
5702 unsigned long rank
= 0;
5706 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5707 while (x
->forward
[i
] &&
5708 (x
->forward
[i
]->score
< score
||
5709 (x
->forward
[i
]->score
== score
&&
5710 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
5711 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
5715 /* x might be equal to zsl->header, so test if obj is non-NULL */
5716 if (x
->obj
&& equalStringObjects(x
->obj
,o
)) {
5723 /* Finds an element by its rank. The rank argument needs to be 1-based. */
5724 zskiplistNode
* zslGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
5726 unsigned long traversed
= 0;
5730 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5731 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
5733 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5736 if (traversed
== rank
) {
5743 /* The actual Z-commands implementations */
5745 /* This generic command implements both ZADD and ZINCRBY.
5746 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
5747 * the increment if the operation is a ZINCRBY (doincrement == 1). */
5748 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
5753 if (isnan(scoreval
)) {
5754 addReplySds(c
,sdsnew("-ERR provide score is Not A Number (nan)\r\n"));
5758 zsetobj
= lookupKeyWrite(c
->db
,key
);
5759 if (zsetobj
== NULL
) {
5760 zsetobj
= createZsetObject();
5761 dictAdd(c
->db
->dict
,key
,zsetobj
);
5764 if (zsetobj
->type
!= REDIS_ZSET
) {
5765 addReply(c
,shared
.wrongtypeerr
);
5771 /* Ok now since we implement both ZADD and ZINCRBY here the code
5772 * needs to handle the two different conditions. It's all about setting
5773 * '*score', that is, the new score to set, to the right value. */
5774 score
= zmalloc(sizeof(double));
5778 /* Read the old score. If the element was not present starts from 0 */
5779 de
= dictFind(zs
->dict
,ele
);
5781 double *oldscore
= dictGetEntryVal(de
);
5782 *score
= *oldscore
+ scoreval
;
5786 if (isnan(*score
)) {
5788 sdsnew("-ERR resulting score is Not A Number (nan)\r\n"));
5790 /* Note that we don't need to check if the zset may be empty and
5791 * should be removed here, as we can only obtain Nan as score if
5792 * there was already an element in the sorted set. */
5799 /* What follows is a simple remove and re-insert operation that is common
5800 * to both ZADD and ZINCRBY... */
5801 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
5802 /* case 1: New element */
5803 incrRefCount(ele
); /* added to hash */
5804 zslInsert(zs
->zsl
,*score
,ele
);
5805 incrRefCount(ele
); /* added to skiplist */
5808 addReplyDouble(c
,*score
);
5810 addReply(c
,shared
.cone
);
5815 /* case 2: Score update operation */
5816 de
= dictFind(zs
->dict
,ele
);
5817 redisAssert(de
!= NULL
);
5818 oldscore
= dictGetEntryVal(de
);
5819 if (*score
!= *oldscore
) {
5822 /* Remove and insert the element in the skip list with new score */
5823 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
5824 redisAssert(deleted
!= 0);
5825 zslInsert(zs
->zsl
,*score
,ele
);
5827 /* Update the score in the hash table */
5828 dictReplace(zs
->dict
,ele
,score
);
5834 addReplyDouble(c
,*score
);
5836 addReply(c
,shared
.czero
);
5840 static void zaddCommand(redisClient
*c
) {
5843 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
5844 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
5847 static void zincrbyCommand(redisClient
*c
) {
5850 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
5851 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
5854 static void zremCommand(redisClient
*c
) {
5861 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5862 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5865 de
= dictFind(zs
->dict
,c
->argv
[2]);
5867 addReply(c
,shared
.czero
);
5870 /* Delete from the skiplist */
5871 oldscore
= dictGetEntryVal(de
);
5872 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
5873 redisAssert(deleted
!= 0);
5875 /* Delete from the hash table */
5876 dictDelete(zs
->dict
,c
->argv
[2]);
5877 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5878 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5880 addReply(c
,shared
.cone
);
5883 static void zremrangebyscoreCommand(redisClient
*c
) {
5890 if ((getDoubleFromObjectOrReply(c
, c
->argv
[2], &min
, NULL
) != REDIS_OK
) ||
5891 (getDoubleFromObjectOrReply(c
, c
->argv
[3], &max
, NULL
) != REDIS_OK
)) return;
5893 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5894 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5897 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
5898 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5899 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5900 server
.dirty
+= deleted
;
5901 addReplyLongLong(c
,deleted
);
5904 static void zremrangebyrankCommand(redisClient
*c
) {
5912 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
5913 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
5915 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5916 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5918 llen
= zs
->zsl
->length
;
5920 /* convert negative indexes */
5921 if (start
< 0) start
= llen
+start
;
5922 if (end
< 0) end
= llen
+end
;
5923 if (start
< 0) start
= 0;
5924 if (end
< 0) end
= 0;
5926 /* indexes sanity checks */
5927 if (start
> end
|| start
>= llen
) {
5928 addReply(c
,shared
.czero
);
5931 if (end
>= llen
) end
= llen
-1;
5933 /* increment start and end because zsl*Rank functions
5934 * use 1-based rank */
5935 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
5936 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5937 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5938 server
.dirty
+= deleted
;
5939 addReplyLongLong(c
, deleted
);
5947 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
5948 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
5949 unsigned long size1
, size2
;
5950 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
5951 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
5952 return size1
- size2
;
5955 #define REDIS_AGGR_SUM 1
5956 #define REDIS_AGGR_MIN 2
5957 #define REDIS_AGGR_MAX 3
5958 #define zunionInterDictValue(_e) (dictGetEntryVal(_e) == NULL ? 1.0 : *(double*)dictGetEntryVal(_e))
5960 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
5961 if (aggregate
== REDIS_AGGR_SUM
) {
5962 *target
= *target
+ val
;
5963 } else if (aggregate
== REDIS_AGGR_MIN
) {
5964 *target
= val
< *target
? val
: *target
;
5965 } else if (aggregate
== REDIS_AGGR_MAX
) {
5966 *target
= val
> *target
? val
: *target
;
5969 redisPanic("Unknown ZUNION/INTER aggregate type");
5973 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
5975 int aggregate
= REDIS_AGGR_SUM
;
5982 /* expect setnum input keys to be given */
5983 setnum
= atoi(c
->argv
[2]->ptr
);
5985 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNIONSTORE/ZINTERSTORE\r\n"));
5989 /* test if the expected number of keys would overflow */
5990 if (3+setnum
> c
->argc
) {
5991 addReply(c
,shared
.syntaxerr
);
5995 /* read keys to be used for input */
5996 src
= zmalloc(sizeof(zsetopsrc
) * setnum
);
5997 for (i
= 0, j
= 3; i
< setnum
; i
++, j
++) {
5998 robj
*obj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
6002 if (obj
->type
== REDIS_ZSET
) {
6003 src
[i
].dict
= ((zset
*)obj
->ptr
)->dict
;
6004 } else if (obj
->type
== REDIS_SET
) {
6005 src
[i
].dict
= (obj
->ptr
);
6008 addReply(c
,shared
.wrongtypeerr
);
6013 /* default all weights to 1 */
6014 src
[i
].weight
= 1.0;
6017 /* parse optional extra arguments */
6019 int remaining
= c
->argc
- j
;
6022 if (remaining
>= (setnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
6024 for (i
= 0; i
< setnum
; i
++, j
++, remaining
--) {
6025 if (getDoubleFromObjectOrReply(c
, c
->argv
[j
], &src
[i
].weight
, NULL
) != REDIS_OK
)
6028 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
6030 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
6031 aggregate
= REDIS_AGGR_SUM
;
6032 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
6033 aggregate
= REDIS_AGGR_MIN
;
6034 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
6035 aggregate
= REDIS_AGGR_MAX
;
6038 addReply(c
,shared
.syntaxerr
);
6044 addReply(c
,shared
.syntaxerr
);
6050 /* sort sets from the smallest to largest, this will improve our
6051 * algorithm's performance */
6052 qsort(src
,setnum
,sizeof(zsetopsrc
),qsortCompareZsetopsrcByCardinality
);
6054 dstobj
= createZsetObject();
6055 dstzset
= dstobj
->ptr
;
6057 if (op
== REDIS_OP_INTER
) {
6058 /* skip going over all entries if the smallest zset is NULL or empty */
6059 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
6060 /* precondition: as src[0].dict is non-empty and the zsets are ordered
6061 * from small to large, all src[i > 0].dict are non-empty too */
6062 di
= dictGetIterator(src
[0].dict
);
6063 while((de
= dictNext(di
)) != NULL
) {
6064 double *score
= zmalloc(sizeof(double)), value
;
6065 *score
= src
[0].weight
* zunionInterDictValue(de
);
6067 for (j
= 1; j
< setnum
; j
++) {
6068 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
6070 value
= src
[j
].weight
* zunionInterDictValue(other
);
6071 zunionInterAggregate(score
, value
, aggregate
);
6077 /* skip entry when not present in every source dict */
6081 robj
*o
= dictGetEntryKey(de
);
6082 dictAdd(dstzset
->dict
,o
,score
);
6083 incrRefCount(o
); /* added to dictionary */
6084 zslInsert(dstzset
->zsl
,*score
,o
);
6085 incrRefCount(o
); /* added to skiplist */
6088 dictReleaseIterator(di
);
6090 } else if (op
== REDIS_OP_UNION
) {
6091 for (i
= 0; i
< setnum
; i
++) {
6092 if (!src
[i
].dict
) continue;
6094 di
= dictGetIterator(src
[i
].dict
);
6095 while((de
= dictNext(di
)) != NULL
) {
6096 /* skip key when already processed */
6097 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
6099 double *score
= zmalloc(sizeof(double)), value
;
6100 *score
= src
[i
].weight
* zunionInterDictValue(de
);
6102 /* because the zsets are sorted by size, its only possible
6103 * for sets at larger indices to hold this entry */
6104 for (j
= (i
+1); j
< setnum
; j
++) {
6105 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
6107 value
= src
[j
].weight
* zunionInterDictValue(other
);
6108 zunionInterAggregate(score
, value
, aggregate
);
6112 robj
*o
= dictGetEntryKey(de
);
6113 dictAdd(dstzset
->dict
,o
,score
);
6114 incrRefCount(o
); /* added to dictionary */
6115 zslInsert(dstzset
->zsl
,*score
,o
);
6116 incrRefCount(o
); /* added to skiplist */
6118 dictReleaseIterator(di
);
6121 /* unknown operator */
6122 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
6125 deleteKey(c
->db
,dstkey
);
6126 if (dstzset
->zsl
->length
) {
6127 dictAdd(c
->db
->dict
,dstkey
,dstobj
);
6128 incrRefCount(dstkey
);
6129 addReplyLongLong(c
, dstzset
->zsl
->length
);
6132 decrRefCount(dstobj
);
6133 addReply(c
, shared
.czero
);
6138 static void zunionstoreCommand(redisClient
*c
) {
6139 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
6142 static void zinterstoreCommand(redisClient
*c
) {
6143 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
6146 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
6158 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
6159 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
6161 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
6163 } else if (c
->argc
>= 5) {
6164 addReply(c
,shared
.syntaxerr
);
6168 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6169 || checkType(c
,o
,REDIS_ZSET
)) return;
6174 /* convert negative indexes */
6175 if (start
< 0) start
= llen
+start
;
6176 if (end
< 0) end
= llen
+end
;
6177 if (start
< 0) start
= 0;
6178 if (end
< 0) end
= 0;
6180 /* indexes sanity checks */
6181 if (start
> end
|| start
>= llen
) {
6182 /* Out of range start or start > end result in empty list */
6183 addReply(c
,shared
.emptymultibulk
);
6186 if (end
>= llen
) end
= llen
-1;
6187 rangelen
= (end
-start
)+1;
6189 /* check if starting point is trivial, before searching
6190 * the element in log(N) time */
6192 ln
= start
== 0 ? zsl
->tail
: zslGetElementByRank(zsl
, llen
-start
);
6195 zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1);
6198 /* Return the result in form of a multi-bulk reply */
6199 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
6200 withscores
? (rangelen
*2) : rangelen
));
6201 for (j
= 0; j
< rangelen
; j
++) {
6203 addReplyBulk(c
,ele
);
6205 addReplyDouble(c
,ln
->score
);
6206 ln
= reverse
? ln
->backward
: ln
->forward
[0];
6210 static void zrangeCommand(redisClient
*c
) {
6211 zrangeGenericCommand(c
,0);
6214 static void zrevrangeCommand(redisClient
*c
) {
6215 zrangeGenericCommand(c
,1);
6218 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
6219 * If justcount is non-zero, just the count is returned. */
6220 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
6223 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
6224 int offset
= 0, limit
= -1;
6228 /* Parse the min-max interval. If one of the values is prefixed
6229 * by the "(" character, it's considered "open". For instance
6230 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
6231 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
6232 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
6233 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
6236 min
= strtod(c
->argv
[2]->ptr
,NULL
);
6238 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
6239 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
6242 max
= strtod(c
->argv
[3]->ptr
,NULL
);
6245 /* Parse "WITHSCORES": note that if the command was called with
6246 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
6247 * enter the following paths to parse WITHSCORES and LIMIT. */
6248 if (c
->argc
== 5 || c
->argc
== 8) {
6249 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
6254 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
6258 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
6263 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
6264 addReply(c
,shared
.syntaxerr
);
6266 } else if (c
->argc
== (7 + withscores
)) {
6267 offset
= atoi(c
->argv
[5]->ptr
);
6268 limit
= atoi(c
->argv
[6]->ptr
);
6269 if (offset
< 0) offset
= 0;
6272 /* Ok, lookup the key and get the range */
6273 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
6275 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6277 if (o
->type
!= REDIS_ZSET
) {
6278 addReply(c
,shared
.wrongtypeerr
);
6280 zset
*zsetobj
= o
->ptr
;
6281 zskiplist
*zsl
= zsetobj
->zsl
;
6283 robj
*ele
, *lenobj
= NULL
;
6284 unsigned long rangelen
= 0;
6286 /* Get the first node with the score >= min, or with
6287 * score > min if 'minex' is true. */
6288 ln
= zslFirstWithScore(zsl
,min
);
6289 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
6292 /* No element matching the speciifed interval */
6293 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6297 /* We don't know in advance how many matching elements there
6298 * are in the list, so we push this object that will represent
6299 * the multi-bulk length in the output buffer, and will "fix"
6302 lenobj
= createObject(REDIS_STRING
,NULL
);
6304 decrRefCount(lenobj
);
6307 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
6310 ln
= ln
->forward
[0];
6313 if (limit
== 0) break;
6316 addReplyBulk(c
,ele
);
6318 addReplyDouble(c
,ln
->score
);
6320 ln
= ln
->forward
[0];
6322 if (limit
> 0) limit
--;
6325 addReplyLongLong(c
,(long)rangelen
);
6327 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
6328 withscores
? (rangelen
*2) : rangelen
);
6334 static void zrangebyscoreCommand(redisClient
*c
) {
6335 genericZrangebyscoreCommand(c
,0);
6338 static void zcountCommand(redisClient
*c
) {
6339 genericZrangebyscoreCommand(c
,1);
6342 static void zcardCommand(redisClient
*c
) {
6346 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6347 checkType(c
,o
,REDIS_ZSET
)) return;
6350 addReplyUlong(c
,zs
->zsl
->length
);
6353 static void zscoreCommand(redisClient
*c
) {
6358 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6359 checkType(c
,o
,REDIS_ZSET
)) return;
6362 de
= dictFind(zs
->dict
,c
->argv
[2]);
6364 addReply(c
,shared
.nullbulk
);
6366 double *score
= dictGetEntryVal(de
);
6368 addReplyDouble(c
,*score
);
6372 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
6380 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6381 checkType(c
,o
,REDIS_ZSET
)) return;
6385 de
= dictFind(zs
->dict
,c
->argv
[2]);
6387 addReply(c
,shared
.nullbulk
);
6391 score
= dictGetEntryVal(de
);
6392 rank
= zslGetRank(zsl
, *score
, c
->argv
[2]);
6395 addReplyLongLong(c
, zsl
->length
- rank
);
6397 addReplyLongLong(c
, rank
-1);
6400 addReply(c
,shared
.nullbulk
);
6404 static void zrankCommand(redisClient
*c
) {
6405 zrankGenericCommand(c
, 0);
6408 static void zrevrankCommand(redisClient
*c
) {
6409 zrankGenericCommand(c
, 1);
6412 /* ========================= Hashes utility functions ======================= */
6413 #define REDIS_HASH_KEY 1
6414 #define REDIS_HASH_VALUE 2
6416 /* Check the length of a number of objects to see if we need to convert a
6417 * zipmap to a real hash. Note that we only check string encoded objects
6418 * as their string length can be queried in constant time. */
6419 static void hashTryConversion(robj
*subject
, robj
**argv
, int start
, int end
) {
6421 if (subject
->encoding
!= REDIS_ENCODING_ZIPMAP
) return;
6423 for (i
= start
; i
<= end
; i
++) {
6424 if (argv
[i
]->encoding
== REDIS_ENCODING_RAW
&&
6425 sdslen(argv
[i
]->ptr
) > server
.hash_max_zipmap_value
)
6427 convertToRealHash(subject
);
6433 /* Encode given objects in-place when the hash uses a dict. */
6434 static void hashTryObjectEncoding(robj
*subject
, robj
**o1
, robj
**o2
) {
6435 if (subject
->encoding
== REDIS_ENCODING_HT
) {
6436 if (o1
) *o1
= tryObjectEncoding(*o1
);
6437 if (o2
) *o2
= tryObjectEncoding(*o2
);
6441 /* Get the value from a hash identified by key. Returns either a string
6442 * object or NULL if the value cannot be found. The refcount of the object
6443 * is always increased by 1 when the value was found. */
6444 static robj
*hashGet(robj
*o
, robj
*key
) {
6446 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6449 key
= getDecodedObject(key
);
6450 if (zipmapGet(o
->ptr
,key
->ptr
,sdslen(key
->ptr
),&v
,&vlen
)) {
6451 value
= createStringObject((char*)v
,vlen
);
6455 dictEntry
*de
= dictFind(o
->ptr
,key
);
6457 value
= dictGetEntryVal(de
);
6458 incrRefCount(value
);
6464 /* Test if the key exists in the given hash. Returns 1 if the key
6465 * exists and 0 when it doesn't. */
6466 static int hashExists(robj
*o
, robj
*key
) {
6467 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6468 key
= getDecodedObject(key
);
6469 if (zipmapExists(o
->ptr
,key
->ptr
,sdslen(key
->ptr
))) {
6475 if (dictFind(o
->ptr
,key
) != NULL
) {
6482 /* Add an element, discard the old if the key already exists.
6483 * Return 0 on insert and 1 on update. */
6484 static int hashSet(robj
*o
, robj
*key
, robj
*value
) {
6486 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6487 key
= getDecodedObject(key
);
6488 value
= getDecodedObject(value
);
6489 o
->ptr
= zipmapSet(o
->ptr
,
6490 key
->ptr
,sdslen(key
->ptr
),
6491 value
->ptr
,sdslen(value
->ptr
), &update
);
6493 decrRefCount(value
);
6495 /* Check if the zipmap needs to be upgraded to a real hash table */
6496 if (zipmapLen(o
->ptr
) > server
.hash_max_zipmap_entries
)
6497 convertToRealHash(o
);
6499 if (dictReplace(o
->ptr
,key
,value
)) {
6506 incrRefCount(value
);
6511 /* Delete an element from a hash.
6512 * Return 1 on deleted and 0 on not found. */
6513 static int hashDelete(robj
*o
, robj
*key
) {
6515 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6516 key
= getDecodedObject(key
);
6517 o
->ptr
= zipmapDel(o
->ptr
,key
->ptr
,sdslen(key
->ptr
), &deleted
);
6520 deleted
= dictDelete((dict
*)o
->ptr
,key
) == DICT_OK
;
6521 /* Always check if the dictionary needs a resize after a delete. */
6522 if (deleted
&& htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
6527 /* Return the number of elements in a hash. */
6528 static unsigned long hashLength(robj
*o
) {
6529 return (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
6530 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
6533 /* Structure to hold hash iteration abstration. Note that iteration over
6534 * hashes involves both fields and values. Because it is possible that
6535 * not both are required, store pointers in the iterator to avoid
6536 * unnecessary memory allocation for fields/values. */
6540 unsigned char *zk
, *zv
;
6541 unsigned int zklen
, zvlen
;
6547 static hashIterator
*hashInitIterator(robj
*subject
) {
6548 hashIterator
*hi
= zmalloc(sizeof(hashIterator
));
6549 hi
->encoding
= subject
->encoding
;
6550 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6551 hi
->zi
= zipmapRewind(subject
->ptr
);
6552 } else if (hi
->encoding
== REDIS_ENCODING_HT
) {
6553 hi
->di
= dictGetIterator(subject
->ptr
);
6560 static void hashReleaseIterator(hashIterator
*hi
) {
6561 if (hi
->encoding
== REDIS_ENCODING_HT
) {
6562 dictReleaseIterator(hi
->di
);
6567 /* Move to the next entry in the hash. Return REDIS_OK when the next entry
6568 * could be found and REDIS_ERR when the iterator reaches the end. */
6569 static int hashNext(hashIterator
*hi
) {
6570 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6571 if ((hi
->zi
= zipmapNext(hi
->zi
, &hi
->zk
, &hi
->zklen
,
6572 &hi
->zv
, &hi
->zvlen
)) == NULL
) return REDIS_ERR
;
6574 if ((hi
->de
= dictNext(hi
->di
)) == NULL
) return REDIS_ERR
;
6579 /* Get key or value object at current iteration position.
6580 * This increases the refcount of the field object by 1. */
6581 static robj
*hashCurrent(hashIterator
*hi
, int what
) {
6583 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6584 if (what
& REDIS_HASH_KEY
) {
6585 o
= createStringObject((char*)hi
->zk
,hi
->zklen
);
6587 o
= createStringObject((char*)hi
->zv
,hi
->zvlen
);
6590 if (what
& REDIS_HASH_KEY
) {
6591 o
= dictGetEntryKey(hi
->de
);
6593 o
= dictGetEntryVal(hi
->de
);
6600 static robj
*hashLookupWriteOrCreate(redisClient
*c
, robj
*key
) {
6601 robj
*o
= lookupKeyWrite(c
->db
,key
);
6603 o
= createHashObject();
6604 dictAdd(c
->db
->dict
,key
,o
);
6607 if (o
->type
!= REDIS_HASH
) {
6608 addReply(c
,shared
.wrongtypeerr
);
6615 /* ============================= Hash commands ============================== */
6616 static void hsetCommand(redisClient
*c
) {
6620 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6621 hashTryConversion(o
,c
->argv
,2,3);
6622 hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
6623 update
= hashSet(o
,c
->argv
[2],c
->argv
[3]);
6624 addReply(c
, update
? shared
.czero
: shared
.cone
);
6628 static void hsetnxCommand(redisClient
*c
) {
6630 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6631 hashTryConversion(o
,c
->argv
,2,3);
6633 if (hashExists(o
, c
->argv
[2])) {
6634 addReply(c
, shared
.czero
);
6636 hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
6637 hashSet(o
,c
->argv
[2],c
->argv
[3]);
6638 addReply(c
, shared
.cone
);
6643 static void hmsetCommand(redisClient
*c
) {
6647 if ((c
->argc
% 2) == 1) {
6648 addReplySds(c
,sdsnew("-ERR wrong number of arguments for HMSET\r\n"));
6652 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6653 hashTryConversion(o
,c
->argv
,2,c
->argc
-1);
6654 for (i
= 2; i
< c
->argc
; i
+= 2) {
6655 hashTryObjectEncoding(o
,&c
->argv
[i
], &c
->argv
[i
+1]);
6656 hashSet(o
,c
->argv
[i
],c
->argv
[i
+1]);
6658 addReply(c
, shared
.ok
);
6662 static void hincrbyCommand(redisClient
*c
) {
6663 long long value
, incr
;
6664 robj
*o
, *current
, *new;
6666 if (getLongLongFromObjectOrReply(c
,c
->argv
[3],&incr
,NULL
) != REDIS_OK
) return;
6667 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6668 if ((current
= hashGet(o
,c
->argv
[2])) != NULL
) {
6669 if (getLongLongFromObjectOrReply(c
,current
,&value
,
6670 "hash value is not an integer") != REDIS_OK
) {
6671 decrRefCount(current
);
6674 decrRefCount(current
);
6680 new = createStringObjectFromLongLong(value
);
6681 hashTryObjectEncoding(o
,&c
->argv
[2],NULL
);
6682 hashSet(o
,c
->argv
[2],new);
6684 addReplyLongLong(c
,value
);
6688 static void hgetCommand(redisClient
*c
) {
6690 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6691 checkType(c
,o
,REDIS_HASH
)) return;
6693 if ((value
= hashGet(o
,c
->argv
[2])) != NULL
) {
6694 addReplyBulk(c
,value
);
6695 decrRefCount(value
);
6697 addReply(c
,shared
.nullbulk
);
6701 static void hmgetCommand(redisClient
*c
) {
6704 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
6705 if (o
!= NULL
&& o
->type
!= REDIS_HASH
) {
6706 addReply(c
,shared
.wrongtypeerr
);
6709 /* Note the check for o != NULL happens inside the loop. This is
6710 * done because objects that cannot be found are considered to be
6711 * an empty hash. The reply should then be a series of NULLs. */
6712 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-2));
6713 for (i
= 2; i
< c
->argc
; i
++) {
6714 if (o
!= NULL
&& (value
= hashGet(o
,c
->argv
[i
])) != NULL
) {
6715 addReplyBulk(c
,value
);
6716 decrRefCount(value
);
6718 addReply(c
,shared
.nullbulk
);
6723 static void hdelCommand(redisClient
*c
) {
6725 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6726 checkType(c
,o
,REDIS_HASH
)) return;
6728 if (hashDelete(o
,c
->argv
[2])) {
6729 if (hashLength(o
) == 0) deleteKey(c
->db
,c
->argv
[1]);
6730 addReply(c
,shared
.cone
);
6733 addReply(c
,shared
.czero
);
6737 static void hlenCommand(redisClient
*c
) {
6739 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6740 checkType(c
,o
,REDIS_HASH
)) return;
6742 addReplyUlong(c
,hashLength(o
));
6745 static void genericHgetallCommand(redisClient
*c
, int flags
) {
6746 robj
*o
, *lenobj
, *obj
;
6747 unsigned long count
= 0;
6750 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6751 || checkType(c
,o
,REDIS_HASH
)) return;
6753 lenobj
= createObject(REDIS_STRING
,NULL
);
6755 decrRefCount(lenobj
);
6757 hi
= hashInitIterator(o
);
6758 while (hashNext(hi
) != REDIS_ERR
) {
6759 if (flags
& REDIS_HASH_KEY
) {
6760 obj
= hashCurrent(hi
,REDIS_HASH_KEY
);
6761 addReplyBulk(c
,obj
);
6765 if (flags
& REDIS_HASH_VALUE
) {
6766 obj
= hashCurrent(hi
,REDIS_HASH_VALUE
);
6767 addReplyBulk(c
,obj
);
6772 hashReleaseIterator(hi
);
6774 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
6777 static void hkeysCommand(redisClient
*c
) {
6778 genericHgetallCommand(c
,REDIS_HASH_KEY
);
6781 static void hvalsCommand(redisClient
*c
) {
6782 genericHgetallCommand(c
,REDIS_HASH_VALUE
);
6785 static void hgetallCommand(redisClient
*c
) {
6786 genericHgetallCommand(c
,REDIS_HASH_KEY
|REDIS_HASH_VALUE
);
6789 static void hexistsCommand(redisClient
*c
) {
6791 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6792 checkType(c
,o
,REDIS_HASH
)) return;
6794 addReply(c
, hashExists(o
,c
->argv
[2]) ? shared
.cone
: shared
.czero
);
6797 static void convertToRealHash(robj
*o
) {
6798 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
6799 unsigned int klen
, vlen
;
6800 dict
*dict
= dictCreate(&hashDictType
,NULL
);
6802 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
6803 p
= zipmapRewind(zm
);
6804 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
6805 robj
*keyobj
, *valobj
;
6807 keyobj
= createStringObject((char*)key
,klen
);
6808 valobj
= createStringObject((char*)val
,vlen
);
6809 keyobj
= tryObjectEncoding(keyobj
);
6810 valobj
= tryObjectEncoding(valobj
);
6811 dictAdd(dict
,keyobj
,valobj
);
6813 o
->encoding
= REDIS_ENCODING_HT
;
6818 /* ========================= Non type-specific commands ==================== */
6820 static void flushdbCommand(redisClient
*c
) {
6821 server
.dirty
+= dictSize(c
->db
->dict
);
6822 touchWatchedKeysOnFlush(c
->db
->id
);
6823 dictEmpty(c
->db
->dict
);
6824 dictEmpty(c
->db
->expires
);
6825 addReply(c
,shared
.ok
);
6828 static void flushallCommand(redisClient
*c
) {
6829 touchWatchedKeysOnFlush(-1);
6830 server
.dirty
+= emptyDb();
6831 addReply(c
,shared
.ok
);
6832 if (server
.bgsavechildpid
!= -1) {
6833 kill(server
.bgsavechildpid
,SIGKILL
);
6834 rdbRemoveTempFile(server
.bgsavechildpid
);
6836 rdbSave(server
.dbfilename
);
6840 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
6841 redisSortOperation
*so
= zmalloc(sizeof(*so
));
6843 so
->pattern
= pattern
;
6847 /* Return the value associated to the key with a name obtained
6848 * substituting the first occurence of '*' in 'pattern' with 'subst'.
6849 * The returned object will always have its refcount increased by 1
6850 * when it is non-NULL. */
6851 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
6854 robj keyobj
, fieldobj
, *o
;
6855 int prefixlen
, sublen
, postfixlen
, fieldlen
;
6856 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
6860 char buf
[REDIS_SORTKEY_MAX
+1];
6861 } keyname
, fieldname
;
6863 /* If the pattern is "#" return the substitution object itself in order
6864 * to implement the "SORT ... GET #" feature. */
6865 spat
= pattern
->ptr
;
6866 if (spat
[0] == '#' && spat
[1] == '\0') {
6867 incrRefCount(subst
);
6871 /* The substitution object may be specially encoded. If so we create
6872 * a decoded object on the fly. Otherwise getDecodedObject will just
6873 * increment the ref count, that we'll decrement later. */
6874 subst
= getDecodedObject(subst
);
6877 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
6878 p
= strchr(spat
,'*');
6880 decrRefCount(subst
);
6884 /* Find out if we're dealing with a hash dereference. */
6885 if ((f
= strstr(p
+1, "->")) != NULL
) {
6886 fieldlen
= sdslen(spat
)-(f
-spat
);
6887 /* this also copies \0 character */
6888 memcpy(fieldname
.buf
,f
+2,fieldlen
-1);
6889 fieldname
.len
= fieldlen
-2;
6895 sublen
= sdslen(ssub
);
6896 postfixlen
= sdslen(spat
)-(prefixlen
+1)-fieldlen
;
6897 memcpy(keyname
.buf
,spat
,prefixlen
);
6898 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
6899 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
6900 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
6901 keyname
.len
= prefixlen
+sublen
+postfixlen
;
6902 decrRefCount(subst
);
6904 /* Lookup substituted key */
6905 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2));
6906 o
= lookupKeyRead(db
,&keyobj
);
6907 if (o
== NULL
) return NULL
;
6910 if (o
->type
!= REDIS_HASH
|| fieldname
.len
< 1) return NULL
;
6912 /* Retrieve value from hash by the field name. This operation
6913 * already increases the refcount of the returned object. */
6914 initStaticStringObject(fieldobj
,((char*)&fieldname
)+(sizeof(long)*2));
6915 o
= hashGet(o
, &fieldobj
);
6917 if (o
->type
!= REDIS_STRING
) return NULL
;
6919 /* Every object that this function returns needs to have its refcount
6920 * increased. sortCommand decreases it again. */
6927 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
6928 * the additional parameter is not standard but a BSD-specific we have to
6929 * pass sorting parameters via the global 'server' structure */
6930 static int sortCompare(const void *s1
, const void *s2
) {
6931 const redisSortObject
*so1
= s1
, *so2
= s2
;
6934 if (!server
.sort_alpha
) {
6935 /* Numeric sorting. Here it's trivial as we precomputed scores */
6936 if (so1
->u
.score
> so2
->u
.score
) {
6938 } else if (so1
->u
.score
< so2
->u
.score
) {
6944 /* Alphanumeric sorting */
6945 if (server
.sort_bypattern
) {
6946 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
6947 /* At least one compare object is NULL */
6948 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
6950 else if (so1
->u
.cmpobj
== NULL
)
6955 /* We have both the objects, use strcoll */
6956 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
6959 /* Compare elements directly. */
6960 cmp
= compareStringObjects(so1
->obj
,so2
->obj
);
6963 return server
.sort_desc
? -cmp
: cmp
;
6966 /* The SORT command is the most complex command in Redis. Warning: this code
6967 * is optimized for speed and a bit less for readability */
6968 static void sortCommand(redisClient
*c
) {
6971 int desc
= 0, alpha
= 0;
6972 int limit_start
= 0, limit_count
= -1, start
, end
;
6973 int j
, dontsort
= 0, vectorlen
;
6974 int getop
= 0; /* GET operation counter */
6975 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
6976 redisSortObject
*vector
; /* Resulting vector to sort */
6978 /* Lookup the key to sort. It must be of the right types */
6979 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
6980 if (sortval
== NULL
) {
6981 addReply(c
,shared
.emptymultibulk
);
6984 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
6985 sortval
->type
!= REDIS_ZSET
)
6987 addReply(c
,shared
.wrongtypeerr
);
6991 /* Create a list of operations to perform for every sorted element.
6992 * Operations can be GET/DEL/INCR/DECR */
6993 operations
= listCreate();
6994 listSetFreeMethod(operations
,zfree
);
6997 /* Now we need to protect sortval incrementing its count, in the future
6998 * SORT may have options able to overwrite/delete keys during the sorting
6999 * and the sorted key itself may get destroied */
7000 incrRefCount(sortval
);
7002 /* The SORT command has an SQL-alike syntax, parse it */
7003 while(j
< c
->argc
) {
7004 int leftargs
= c
->argc
-j
-1;
7005 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
7007 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
7009 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
7011 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
7012 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
7013 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
7015 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
7016 storekey
= c
->argv
[j
+1];
7018 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
7019 sortby
= c
->argv
[j
+1];
7020 /* If the BY pattern does not contain '*', i.e. it is constant,
7021 * we don't need to sort nor to lookup the weight keys. */
7022 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
7024 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
7025 listAddNodeTail(operations
,createSortOperation(
7026 REDIS_SORT_GET
,c
->argv
[j
+1]));
7030 decrRefCount(sortval
);
7031 listRelease(operations
);
7032 addReply(c
,shared
.syntaxerr
);
7038 /* Load the sorting vector with all the objects to sort */
7039 switch(sortval
->type
) {
7040 case REDIS_LIST
: vectorlen
= listLength((list
*)sortval
->ptr
); break;
7041 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
7042 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
7043 default: vectorlen
= 0; redisPanic("Bad SORT type"); /* Avoid GCC warning */
7045 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
7048 if (sortval
->type
== REDIS_LIST
) {
7049 list
*list
= sortval
->ptr
;
7053 listRewind(list
,&li
);
7054 while((ln
= listNext(&li
))) {
7055 robj
*ele
= ln
->value
;
7056 vector
[j
].obj
= ele
;
7057 vector
[j
].u
.score
= 0;
7058 vector
[j
].u
.cmpobj
= NULL
;
7066 if (sortval
->type
== REDIS_SET
) {
7069 zset
*zs
= sortval
->ptr
;
7073 di
= dictGetIterator(set
);
7074 while((setele
= dictNext(di
)) != NULL
) {
7075 vector
[j
].obj
= dictGetEntryKey(setele
);
7076 vector
[j
].u
.score
= 0;
7077 vector
[j
].u
.cmpobj
= NULL
;
7080 dictReleaseIterator(di
);
7082 redisAssert(j
== vectorlen
);
7084 /* Now it's time to load the right scores in the sorting vector */
7085 if (dontsort
== 0) {
7086 for (j
= 0; j
< vectorlen
; j
++) {
7089 /* lookup value to sort by */
7090 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
7091 if (!byval
) continue;
7093 /* use object itself to sort by */
7094 byval
= vector
[j
].obj
;
7098 if (sortby
) vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
7100 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
7101 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
7102 } else if (byval
->encoding
== REDIS_ENCODING_INT
) {
7103 /* Don't need to decode the object if it's
7104 * integer-encoded (the only encoding supported) so
7105 * far. We can just cast it */
7106 vector
[j
].u
.score
= (long)byval
->ptr
;
7108 redisAssert(1 != 1);
7112 /* when the object was retrieved using lookupKeyByPattern,
7113 * its refcount needs to be decreased. */
7115 decrRefCount(byval
);
7120 /* We are ready to sort the vector... perform a bit of sanity check
7121 * on the LIMIT option too. We'll use a partial version of quicksort. */
7122 start
= (limit_start
< 0) ? 0 : limit_start
;
7123 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
7124 if (start
>= vectorlen
) {
7125 start
= vectorlen
-1;
7128 if (end
>= vectorlen
) end
= vectorlen
-1;
7130 if (dontsort
== 0) {
7131 server
.sort_desc
= desc
;
7132 server
.sort_alpha
= alpha
;
7133 server
.sort_bypattern
= sortby
? 1 : 0;
7134 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
7135 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
7137 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
7140 /* Send command output to the output buffer, performing the specified
7141 * GET/DEL/INCR/DECR operations if any. */
7142 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
7143 if (storekey
== NULL
) {
7144 /* STORE option not specified, sent the sorting result to client */
7145 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
7146 for (j
= start
; j
<= end
; j
++) {
7150 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
7151 listRewind(operations
,&li
);
7152 while((ln
= listNext(&li
))) {
7153 redisSortOperation
*sop
= ln
->value
;
7154 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
7157 if (sop
->type
== REDIS_SORT_GET
) {
7159 addReply(c
,shared
.nullbulk
);
7161 addReplyBulk(c
,val
);
7165 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
7170 robj
*listObject
= createListObject();
7171 list
*listPtr
= (list
*) listObject
->ptr
;
7173 /* STORE option specified, set the sorting result as a List object */
7174 for (j
= start
; j
<= end
; j
++) {
7179 listAddNodeTail(listPtr
,vector
[j
].obj
);
7180 incrRefCount(vector
[j
].obj
);
7182 listRewind(operations
,&li
);
7183 while((ln
= listNext(&li
))) {
7184 redisSortOperation
*sop
= ln
->value
;
7185 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
7188 if (sop
->type
== REDIS_SORT_GET
) {
7190 listAddNodeTail(listPtr
,createStringObject("",0));
7192 /* We should do a incrRefCount on val because it is
7193 * added to the list, but also a decrRefCount because
7194 * it is returned by lookupKeyByPattern. This results
7195 * in doing nothing at all. */
7196 listAddNodeTail(listPtr
,val
);
7199 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
7203 if (dictReplace(c
->db
->dict
,storekey
,listObject
)) {
7204 incrRefCount(storekey
);
7206 /* Note: we add 1 because the DB is dirty anyway since even if the
7207 * SORT result is empty a new key is set and maybe the old content
7209 server
.dirty
+= 1+outputlen
;
7210 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
7214 decrRefCount(sortval
);
7215 listRelease(operations
);
7216 for (j
= 0; j
< vectorlen
; j
++) {
7217 if (alpha
&& vector
[j
].u
.cmpobj
)
7218 decrRefCount(vector
[j
].u
.cmpobj
);
7223 /* Convert an amount of bytes into a human readable string in the form
7224 * of 100B, 2G, 100M, 4K, and so forth. */
7225 static void bytesToHuman(char *s
, unsigned long long n
) {
7230 sprintf(s
,"%lluB",n
);
7232 } else if (n
< (1024*1024)) {
7233 d
= (double)n
/(1024);
7234 sprintf(s
,"%.2fK",d
);
7235 } else if (n
< (1024LL*1024*1024)) {
7236 d
= (double)n
/(1024*1024);
7237 sprintf(s
,"%.2fM",d
);
7238 } else if (n
< (1024LL*1024*1024*1024)) {
7239 d
= (double)n
/(1024LL*1024*1024);
7240 sprintf(s
,"%.2fG",d
);
7244 /* Create the string returned by the INFO command. This is decoupled
7245 * by the INFO command itself as we need to report the same information
7246 * on memory corruption problems. */
7247 static sds
genRedisInfoString(void) {
7249 time_t uptime
= time(NULL
)-server
.stat_starttime
;
7253 bytesToHuman(hmem
,zmalloc_used_memory());
7254 info
= sdscatprintf(sdsempty(),
7255 "redis_version:%s\r\n"
7256 "redis_git_sha1:%s\r\n"
7257 "redis_git_dirty:%d\r\n"
7259 "multiplexing_api:%s\r\n"
7260 "process_id:%ld\r\n"
7261 "uptime_in_seconds:%ld\r\n"
7262 "uptime_in_days:%ld\r\n"
7263 "connected_clients:%d\r\n"
7264 "connected_slaves:%d\r\n"
7265 "blocked_clients:%d\r\n"
7266 "used_memory:%zu\r\n"
7267 "used_memory_human:%s\r\n"
7268 "changes_since_last_save:%lld\r\n"
7269 "bgsave_in_progress:%d\r\n"
7270 "last_save_time:%ld\r\n"
7271 "bgrewriteaof_in_progress:%d\r\n"
7272 "total_connections_received:%lld\r\n"
7273 "total_commands_processed:%lld\r\n"
7274 "expired_keys:%lld\r\n"
7275 "hash_max_zipmap_entries:%zu\r\n"
7276 "hash_max_zipmap_value:%zu\r\n"
7277 "pubsub_channels:%ld\r\n"
7278 "pubsub_patterns:%u\r\n"
7283 strtol(REDIS_GIT_DIRTY
,NULL
,10) > 0,
7284 (sizeof(long) == 8) ? "64" : "32",
7289 listLength(server
.clients
)-listLength(server
.slaves
),
7290 listLength(server
.slaves
),
7291 server
.blpop_blocked_clients
,
7292 zmalloc_used_memory(),
7295 server
.bgsavechildpid
!= -1,
7297 server
.bgrewritechildpid
!= -1,
7298 server
.stat_numconnections
,
7299 server
.stat_numcommands
,
7300 server
.stat_expiredkeys
,
7301 server
.hash_max_zipmap_entries
,
7302 server
.hash_max_zipmap_value
,
7303 dictSize(server
.pubsub_channels
),
7304 listLength(server
.pubsub_patterns
),
7305 server
.vm_enabled
!= 0,
7306 server
.masterhost
== NULL
? "master" : "slave"
7308 if (server
.masterhost
) {
7309 info
= sdscatprintf(info
,
7310 "master_host:%s\r\n"
7311 "master_port:%d\r\n"
7312 "master_link_status:%s\r\n"
7313 "master_last_io_seconds_ago:%d\r\n"
7316 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
7318 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
7321 if (server
.vm_enabled
) {
7323 info
= sdscatprintf(info
,
7324 "vm_conf_max_memory:%llu\r\n"
7325 "vm_conf_page_size:%llu\r\n"
7326 "vm_conf_pages:%llu\r\n"
7327 "vm_stats_used_pages:%llu\r\n"
7328 "vm_stats_swapped_objects:%llu\r\n"
7329 "vm_stats_swappin_count:%llu\r\n"
7330 "vm_stats_swappout_count:%llu\r\n"
7331 "vm_stats_io_newjobs_len:%lu\r\n"
7332 "vm_stats_io_processing_len:%lu\r\n"
7333 "vm_stats_io_processed_len:%lu\r\n"
7334 "vm_stats_io_active_threads:%lu\r\n"
7335 "vm_stats_blocked_clients:%lu\r\n"
7336 ,(unsigned long long) server
.vm_max_memory
,
7337 (unsigned long long) server
.vm_page_size
,
7338 (unsigned long long) server
.vm_pages
,
7339 (unsigned long long) server
.vm_stats_used_pages
,
7340 (unsigned long long) server
.vm_stats_swapped_objects
,
7341 (unsigned long long) server
.vm_stats_swapins
,
7342 (unsigned long long) server
.vm_stats_swapouts
,
7343 (unsigned long) listLength(server
.io_newjobs
),
7344 (unsigned long) listLength(server
.io_processing
),
7345 (unsigned long) listLength(server
.io_processed
),
7346 (unsigned long) server
.io_active_threads
,
7347 (unsigned long) server
.vm_blocked_clients
7351 for (j
= 0; j
< server
.dbnum
; j
++) {
7352 long long keys
, vkeys
;
7354 keys
= dictSize(server
.db
[j
].dict
);
7355 vkeys
= dictSize(server
.db
[j
].expires
);
7356 if (keys
|| vkeys
) {
7357 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
7364 static void infoCommand(redisClient
*c
) {
7365 sds info
= genRedisInfoString();
7366 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
7367 (unsigned long)sdslen(info
)));
7368 addReplySds(c
,info
);
7369 addReply(c
,shared
.crlf
);
7372 static void monitorCommand(redisClient
*c
) {
7373 /* ignore MONITOR if aleady slave or in monitor mode */
7374 if (c
->flags
& REDIS_SLAVE
) return;
7376 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
7378 listAddNodeTail(server
.monitors
,c
);
7379 addReply(c
,shared
.ok
);
7382 /* ================================= Expire ================================= */
7383 static int removeExpire(redisDb
*db
, robj
*key
) {
7384 if (dictDelete(db
->expires
,key
) == DICT_OK
) {
7391 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
7392 if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) {
7400 /* Return the expire time of the specified key, or -1 if no expire
7401 * is associated with this key (i.e. the key is non volatile) */
7402 static time_t getExpire(redisDb
*db
, robj
*key
) {
7405 /* No expire? return ASAP */
7406 if (dictSize(db
->expires
) == 0 ||
7407 (de
= dictFind(db
->expires
,key
)) == NULL
) return -1;
7409 return (time_t) dictGetEntryVal(de
);
7412 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
7416 /* No expire? return ASAP */
7417 if (dictSize(db
->expires
) == 0 ||
7418 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7420 /* Lookup the expire */
7421 when
= (time_t) dictGetEntryVal(de
);
7422 if (time(NULL
) <= when
) return 0;
7424 /* Delete the key */
7425 dictDelete(db
->expires
,key
);
7426 server
.stat_expiredkeys
++;
7427 return dictDelete(db
->dict
,key
) == DICT_OK
;
7430 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
7433 /* No expire? return ASAP */
7434 if (dictSize(db
->expires
) == 0 ||
7435 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7437 /* Delete the key */
7439 server
.stat_expiredkeys
++;
7440 dictDelete(db
->expires
,key
);
7441 return dictDelete(db
->dict
,key
) == DICT_OK
;
7444 static void expireGenericCommand(redisClient
*c
, robj
*key
, robj
*param
, long offset
) {
7448 if (getLongFromObjectOrReply(c
, param
, &seconds
, NULL
) != REDIS_OK
) return;
7452 de
= dictFind(c
->db
->dict
,key
);
7454 addReply(c
,shared
.czero
);
7458 if (deleteKey(c
->db
,key
)) server
.dirty
++;
7459 addReply(c
, shared
.cone
);
7462 time_t when
= time(NULL
)+seconds
;
7463 if (setExpire(c
->db
,key
,when
)) {
7464 addReply(c
,shared
.cone
);
7467 addReply(c
,shared
.czero
);
7473 static void expireCommand(redisClient
*c
) {
7474 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],0);
7477 static void expireatCommand(redisClient
*c
) {
7478 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],time(NULL
));
7481 static void ttlCommand(redisClient
*c
) {
7485 expire
= getExpire(c
->db
,c
->argv
[1]);
7487 ttl
= (int) (expire
-time(NULL
));
7488 if (ttl
< 0) ttl
= -1;
7490 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
7493 /* ================================ MULTI/EXEC ============================== */
7495 /* Client state initialization for MULTI/EXEC */
7496 static void initClientMultiState(redisClient
*c
) {
7497 c
->mstate
.commands
= NULL
;
7498 c
->mstate
.count
= 0;
7501 /* Release all the resources associated with MULTI/EXEC state */
7502 static void freeClientMultiState(redisClient
*c
) {
7505 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7507 multiCmd
*mc
= c
->mstate
.commands
+j
;
7509 for (i
= 0; i
< mc
->argc
; i
++)
7510 decrRefCount(mc
->argv
[i
]);
7513 zfree(c
->mstate
.commands
);
7516 /* Add a new command into the MULTI commands queue */
7517 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
7521 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
7522 sizeof(multiCmd
)*(c
->mstate
.count
+1));
7523 mc
= c
->mstate
.commands
+c
->mstate
.count
;
7526 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
7527 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
7528 for (j
= 0; j
< c
->argc
; j
++)
7529 incrRefCount(mc
->argv
[j
]);
7533 static void multiCommand(redisClient
*c
) {
7534 if (c
->flags
& REDIS_MULTI
) {
7535 addReplySds(c
,sdsnew("-ERR MULTI calls can not be nested\r\n"));
7538 c
->flags
|= REDIS_MULTI
;
7539 addReply(c
,shared
.ok
);
7542 static void discardCommand(redisClient
*c
) {
7543 if (!(c
->flags
& REDIS_MULTI
)) {
7544 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
7548 freeClientMultiState(c
);
7549 initClientMultiState(c
);
7550 c
->flags
&= (~REDIS_MULTI
);
7551 addReply(c
,shared
.ok
);
7554 /* Send a MULTI command to all the slaves and AOF file. Check the execCommand
7555 * implememntation for more information. */
7556 static void execCommandReplicateMulti(redisClient
*c
) {
7557 struct redisCommand
*cmd
;
7558 robj
*multistring
= createStringObject("MULTI",5);
7560 cmd
= lookupCommand("multi");
7561 if (server
.appendonly
)
7562 feedAppendOnlyFile(cmd
,c
->db
->id
,&multistring
,1);
7563 if (listLength(server
.slaves
))
7564 replicationFeedSlaves(server
.slaves
,c
->db
->id
,&multistring
,1);
7565 decrRefCount(multistring
);
7568 static void execCommand(redisClient
*c
) {
7573 if (!(c
->flags
& REDIS_MULTI
)) {
7574 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
7578 /* Check if we need to abort the EXEC if some WATCHed key was touched.
7579 * A failed EXEC will return a multi bulk nil object. */
7580 if (c
->flags
& REDIS_DIRTY_CAS
) {
7581 freeClientMultiState(c
);
7582 initClientMultiState(c
);
7583 c
->flags
&= ~(REDIS_MULTI
|REDIS_DIRTY_CAS
);
7585 addReply(c
,shared
.nullmultibulk
);
7589 /* Replicate a MULTI request now that we are sure the block is executed.
7590 * This way we'll deliver the MULTI/..../EXEC block as a whole and
7591 * both the AOF and the replication link will have the same consistency
7592 * and atomicity guarantees. */
7593 execCommandReplicateMulti(c
);
7595 /* Exec all the queued commands */
7596 unwatchAllKeys(c
); /* Unwatch ASAP otherwise we'll waste CPU cycles */
7597 orig_argv
= c
->argv
;
7598 orig_argc
= c
->argc
;
7599 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
7600 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7601 c
->argc
= c
->mstate
.commands
[j
].argc
;
7602 c
->argv
= c
->mstate
.commands
[j
].argv
;
7603 call(c
,c
->mstate
.commands
[j
].cmd
);
7605 c
->argv
= orig_argv
;
7606 c
->argc
= orig_argc
;
7607 freeClientMultiState(c
);
7608 initClientMultiState(c
);
7609 c
->flags
&= ~(REDIS_MULTI
|REDIS_DIRTY_CAS
);
7610 /* Make sure the EXEC command is always replicated / AOF, since we
7611 * always send the MULTI command (we can't know beforehand if the
7612 * next operations will contain at least a modification to the DB). */
7616 /* =========================== Blocking Operations ========================= */
7618 /* Currently Redis blocking operations support is limited to list POP ops,
7619 * so the current implementation is not fully generic, but it is also not
7620 * completely specific so it will not require a rewrite to support new
7621 * kind of blocking operations in the future.
7623 * Still it's important to note that list blocking operations can be already
7624 * used as a notification mechanism in order to implement other blocking
7625 * operations at application level, so there must be a very strong evidence
7626 * of usefulness and generality before new blocking operations are implemented.
7628 * This is how the current blocking POP works, we use BLPOP as example:
7629 * - If the user calls BLPOP and the key exists and contains a non empty list
7630 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
7631 * if there is not to block.
7632 * - If instead BLPOP is called and the key does not exists or the list is
7633 * empty we need to block. In order to do so we remove the notification for
7634 * new data to read in the client socket (so that we'll not serve new
7635 * requests if the blocking request is not served). Also we put the client
7636 * in a dictionary (db->blocking_keys) mapping keys to a list of clients
7637 * blocking for this keys.
7638 * - If a PUSH operation against a key with blocked clients waiting is
7639 * performed, we serve the first in the list: basically instead to push
7640 * the new element inside the list we return it to the (first / oldest)
7641 * blocking client, unblock the client, and remove it form the list.
7643 * The above comment and the source code should be enough in order to understand
7644 * the implementation and modify / fix it later.
7647 /* Set a client in blocking mode for the specified key, with the specified
7649 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
7654 c
->blocking_keys
= zmalloc(sizeof(robj
*)*numkeys
);
7655 c
->blocking_keys_num
= numkeys
;
7656 c
->blockingto
= timeout
;
7657 for (j
= 0; j
< numkeys
; j
++) {
7658 /* Add the key in the client structure, to map clients -> keys */
7659 c
->blocking_keys
[j
] = keys
[j
];
7660 incrRefCount(keys
[j
]);
7662 /* And in the other "side", to map keys -> clients */
7663 de
= dictFind(c
->db
->blocking_keys
,keys
[j
]);
7667 /* For every key we take a list of clients blocked for it */
7669 retval
= dictAdd(c
->db
->blocking_keys
,keys
[j
],l
);
7670 incrRefCount(keys
[j
]);
7671 assert(retval
== DICT_OK
);
7673 l
= dictGetEntryVal(de
);
7675 listAddNodeTail(l
,c
);
7677 /* Mark the client as a blocked client */
7678 c
->flags
|= REDIS_BLOCKED
;
7679 server
.blpop_blocked_clients
++;
7682 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
7683 static void unblockClientWaitingData(redisClient
*c
) {
7688 assert(c
->blocking_keys
!= NULL
);
7689 /* The client may wait for multiple keys, so unblock it for every key. */
7690 for (j
= 0; j
< c
->blocking_keys_num
; j
++) {
7691 /* Remove this client from the list of clients waiting for this key. */
7692 de
= dictFind(c
->db
->blocking_keys
,c
->blocking_keys
[j
]);
7694 l
= dictGetEntryVal(de
);
7695 listDelNode(l
,listSearchKey(l
,c
));
7696 /* If the list is empty we need to remove it to avoid wasting memory */
7697 if (listLength(l
) == 0)
7698 dictDelete(c
->db
->blocking_keys
,c
->blocking_keys
[j
]);
7699 decrRefCount(c
->blocking_keys
[j
]);
7701 /* Cleanup the client structure */
7702 zfree(c
->blocking_keys
);
7703 c
->blocking_keys
= NULL
;
7704 c
->flags
&= (~REDIS_BLOCKED
);
7705 server
.blpop_blocked_clients
--;
7706 /* We want to process data if there is some command waiting
7707 * in the input buffer. Note that this is safe even if
7708 * unblockClientWaitingData() gets called from freeClient() because
7709 * freeClient() will be smart enough to call this function
7710 * *after* c->querybuf was set to NULL. */
7711 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
7714 /* This should be called from any function PUSHing into lists.
7715 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
7716 * 'ele' is the element pushed.
7718 * If the function returns 0 there was no client waiting for a list push
7721 * If the function returns 1 there was a client waiting for a list push
7722 * against this key, the element was passed to this client thus it's not
7723 * needed to actually add it to the list and the caller should return asap. */
7724 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
7725 struct dictEntry
*de
;
7726 redisClient
*receiver
;
7730 de
= dictFind(c
->db
->blocking_keys
,key
);
7731 if (de
== NULL
) return 0;
7732 l
= dictGetEntryVal(de
);
7735 receiver
= ln
->value
;
7737 addReplySds(receiver
,sdsnew("*2\r\n"));
7738 addReplyBulk(receiver
,key
);
7739 addReplyBulk(receiver
,ele
);
7740 unblockClientWaitingData(receiver
);
7744 /* Blocking RPOP/LPOP */
7745 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
7750 for (j
= 1; j
< c
->argc
-1; j
++) {
7751 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
7753 if (o
->type
!= REDIS_LIST
) {
7754 addReply(c
,shared
.wrongtypeerr
);
7757 list
*list
= o
->ptr
;
7758 if (listLength(list
) != 0) {
7759 /* If the list contains elements fall back to the usual
7760 * non-blocking POP operation */
7761 robj
*argv
[2], **orig_argv
;
7764 /* We need to alter the command arguments before to call
7765 * popGenericCommand() as the command takes a single key. */
7766 orig_argv
= c
->argv
;
7767 orig_argc
= c
->argc
;
7768 argv
[1] = c
->argv
[j
];
7772 /* Also the return value is different, we need to output
7773 * the multi bulk reply header and the key name. The
7774 * "real" command will add the last element (the value)
7775 * for us. If this souds like an hack to you it's just
7776 * because it is... */
7777 addReplySds(c
,sdsnew("*2\r\n"));
7778 addReplyBulk(c
,argv
[1]);
7779 popGenericCommand(c
,where
);
7781 /* Fix the client structure with the original stuff */
7782 c
->argv
= orig_argv
;
7783 c
->argc
= orig_argc
;
7789 /* If the list is empty or the key does not exists we must block */
7790 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
7791 if (timeout
> 0) timeout
+= time(NULL
);
7792 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
7795 static void blpopCommand(redisClient
*c
) {
7796 blockingPopGenericCommand(c
,REDIS_HEAD
);
7799 static void brpopCommand(redisClient
*c
) {
7800 blockingPopGenericCommand(c
,REDIS_TAIL
);
7803 /* =============================== Replication ============================= */
7805 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7806 ssize_t nwritten
, ret
= size
;
7807 time_t start
= time(NULL
);
7811 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
7812 nwritten
= write(fd
,ptr
,size
);
7813 if (nwritten
== -1) return -1;
7817 if ((time(NULL
)-start
) > timeout
) {
7825 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7826 ssize_t nread
, totread
= 0;
7827 time_t start
= time(NULL
);
7831 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
7832 nread
= read(fd
,ptr
,size
);
7833 if (nread
== -1) return -1;
7838 if ((time(NULL
)-start
) > timeout
) {
7846 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7853 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
7856 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
7867 static void syncCommand(redisClient
*c
) {
7868 /* ignore SYNC if aleady slave or in monitor mode */
7869 if (c
->flags
& REDIS_SLAVE
) return;
7871 /* SYNC can't be issued when the server has pending data to send to
7872 * the client about already issued commands. We need a fresh reply
7873 * buffer registering the differences between the BGSAVE and the current
7874 * dataset, so that we can copy to other slaves if needed. */
7875 if (listLength(c
->reply
) != 0) {
7876 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
7880 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
7881 /* Here we need to check if there is a background saving operation
7882 * in progress, or if it is required to start one */
7883 if (server
.bgsavechildpid
!= -1) {
7884 /* Ok a background save is in progress. Let's check if it is a good
7885 * one for replication, i.e. if there is another slave that is
7886 * registering differences since the server forked to save */
7891 listRewind(server
.slaves
,&li
);
7892 while((ln
= listNext(&li
))) {
7894 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
7897 /* Perfect, the server is already registering differences for
7898 * another slave. Set the right state, and copy the buffer. */
7899 listRelease(c
->reply
);
7900 c
->reply
= listDup(slave
->reply
);
7901 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7902 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
7904 /* No way, we need to wait for the next BGSAVE in order to
7905 * register differences */
7906 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7907 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
7910 /* Ok we don't have a BGSAVE in progress, let's start one */
7911 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
7912 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7913 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
7914 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
7917 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7920 c
->flags
|= REDIS_SLAVE
;
7922 listAddNodeTail(server
.slaves
,c
);
7926 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
7927 redisClient
*slave
= privdata
;
7929 REDIS_NOTUSED(mask
);
7930 char buf
[REDIS_IOBUF_LEN
];
7931 ssize_t nwritten
, buflen
;
7933 if (slave
->repldboff
== 0) {
7934 /* Write the bulk write count before to transfer the DB. In theory here
7935 * we don't know how much room there is in the output buffer of the
7936 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
7937 * operations) will never be smaller than the few bytes we need. */
7940 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
7942 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
7950 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
7951 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
7953 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
7954 (buflen
== 0) ? "premature EOF" : strerror(errno
));
7958 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
7959 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
7964 slave
->repldboff
+= nwritten
;
7965 if (slave
->repldboff
== slave
->repldbsize
) {
7966 close(slave
->repldbfd
);
7967 slave
->repldbfd
= -1;
7968 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7969 slave
->replstate
= REDIS_REPL_ONLINE
;
7970 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
7971 sendReplyToClient
, slave
) == AE_ERR
) {
7975 addReplySds(slave
,sdsempty());
7976 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
7980 /* This function is called at the end of every backgrond saving.
7981 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
7982 * otherwise REDIS_ERR is passed to the function.
7984 * The goal of this function is to handle slaves waiting for a successful
7985 * background saving in order to perform non-blocking synchronization. */
7986 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
7988 int startbgsave
= 0;
7991 listRewind(server
.slaves
,&li
);
7992 while((ln
= listNext(&li
))) {
7993 redisClient
*slave
= ln
->value
;
7995 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
7997 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7998 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
7999 struct redis_stat buf
;
8001 if (bgsaveerr
!= REDIS_OK
) {
8003 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
8006 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
8007 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
8009 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
8012 slave
->repldboff
= 0;
8013 slave
->repldbsize
= buf
.st_size
;
8014 slave
->replstate
= REDIS_REPL_SEND_BULK
;
8015 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
8016 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
8023 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
8026 listRewind(server
.slaves
,&li
);
8027 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
8028 while((ln
= listNext(&li
))) {
8029 redisClient
*slave
= ln
->value
;
8031 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
8038 static int syncWithMaster(void) {
8039 char buf
[1024], tmpfile
[256], authcmd
[1024];
8041 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
8042 int dfd
, maxtries
= 5;
8045 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
8050 /* AUTH with the master if required. */
8051 if(server
.masterauth
) {
8052 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
8053 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
8055 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
8059 /* Read the AUTH result. */
8060 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
8062 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
8066 if (buf
[0] != '+') {
8068 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
8073 /* Issue the SYNC command */
8074 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
8076 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
8080 /* Read the bulk write count */
8081 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
8083 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
8087 if (buf
[0] != '$') {
8089 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
8092 dumpsize
= strtol(buf
+1,NULL
,10);
8093 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
8094 /* Read the bulk write data on a temp file */
8096 snprintf(tmpfile
,256,
8097 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
8098 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
8099 if (dfd
!= -1) break;
8104 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
8108 int nread
, nwritten
;
8110 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
8112 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
8118 nwritten
= write(dfd
,buf
,nread
);
8119 if (nwritten
== -1) {
8120 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
8128 if (rename(tmpfile
,server
.dbfilename
) == -1) {
8129 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
8135 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
8136 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
8140 server
.master
= createClient(fd
);
8141 server
.master
->flags
|= REDIS_MASTER
;
8142 server
.master
->authenticated
= 1;
8143 server
.replstate
= REDIS_REPL_CONNECTED
;
8147 static void slaveofCommand(redisClient
*c
) {
8148 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
8149 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
8150 if (server
.masterhost
) {
8151 sdsfree(server
.masterhost
);
8152 server
.masterhost
= NULL
;
8153 if (server
.master
) freeClient(server
.master
);
8154 server
.replstate
= REDIS_REPL_NONE
;
8155 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
8158 sdsfree(server
.masterhost
);
8159 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
8160 server
.masterport
= atoi(c
->argv
[2]->ptr
);
8161 if (server
.master
) freeClient(server
.master
);
8162 server
.replstate
= REDIS_REPL_CONNECT
;
8163 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
8164 server
.masterhost
, server
.masterport
);
8166 addReply(c
,shared
.ok
);
8169 /* ============================ Maxmemory directive ======================== */
8171 /* Try to free one object form the pre-allocated objects free list.
8172 * This is useful under low mem conditions as by default we take 1 million
8173 * free objects allocated. On success REDIS_OK is returned, otherwise
8175 static int tryFreeOneObjectFromFreelist(void) {
8178 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
8179 if (listLength(server
.objfreelist
)) {
8180 listNode
*head
= listFirst(server
.objfreelist
);
8181 o
= listNodeValue(head
);
8182 listDelNode(server
.objfreelist
,head
);
8183 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
8187 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
8192 /* This function gets called when 'maxmemory' is set on the config file to limit
8193 * the max memory used by the server, and we are out of memory.
8194 * This function will try to, in order:
8196 * - Free objects from the free list
8197 * - Try to remove keys with an EXPIRE set
8199 * It is not possible to free enough memory to reach used-memory < maxmemory
8200 * the server will start refusing commands that will enlarge even more the
8203 static void freeMemoryIfNeeded(void) {
8204 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
8205 int j
, k
, freed
= 0;
8207 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
8208 for (j
= 0; j
< server
.dbnum
; j
++) {
8210 robj
*minkey
= NULL
;
8211 struct dictEntry
*de
;
8213 if (dictSize(server
.db
[j
].expires
)) {
8215 /* From a sample of three keys drop the one nearest to
8216 * the natural expire */
8217 for (k
= 0; k
< 3; k
++) {
8220 de
= dictGetRandomKey(server
.db
[j
].expires
);
8221 t
= (time_t) dictGetEntryVal(de
);
8222 if (minttl
== -1 || t
< minttl
) {
8223 minkey
= dictGetEntryKey(de
);
8227 deleteKey(server
.db
+j
,minkey
);
8230 if (!freed
) return; /* nothing to free... */
8234 /* ============================== Append Only file ========================== */
8236 /* Write the append only file buffer on disk.
8238 * Since we are required to write the AOF before replying to the client,
8239 * and the only way the client socket can get a write is entering when the
8240 * the event loop, we accumulate all the AOF writes in a memory
8241 * buffer and write it on disk using this function just before entering
8242 * the event loop again. */
8243 static void flushAppendOnlyFile(void) {
8247 if (sdslen(server
.aofbuf
) == 0) return;
8249 /* We want to perform a single write. This should be guaranteed atomic
8250 * at least if the filesystem we are writing is a real physical one.
8251 * While this will save us against the server being killed I don't think
8252 * there is much to do about the whole server stopping for power problems
8254 nwritten
= write(server
.appendfd
,server
.aofbuf
,sdslen(server
.aofbuf
));
8255 if (nwritten
!= (signed)sdslen(server
.aofbuf
)) {
8256 /* Ooops, we are in troubles. The best thing to do for now is
8257 * aborting instead of giving the illusion that everything is
8258 * working as expected. */
8259 if (nwritten
== -1) {
8260 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
8262 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
8266 sdsfree(server
.aofbuf
);
8267 server
.aofbuf
= sdsempty();
8269 /* Fsync if needed */
8271 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
8272 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
8273 now
-server
.lastfsync
> 1))
8275 /* aof_fsync is defined as fdatasync() for Linux in order to avoid
8276 * flushing metadata. */
8277 aof_fsync(server
.appendfd
); /* Let's try to get this data on the disk */
8278 server
.lastfsync
= now
;
8282 static sds
catAppendOnlyGenericCommand(sds buf
, int argc
, robj
**argv
) {
8284 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
8285 for (j
= 0; j
< argc
; j
++) {
8286 robj
*o
= getDecodedObject(argv
[j
]);
8287 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
8288 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
8289 buf
= sdscatlen(buf
,"\r\n",2);
8295 static sds
catAppendOnlyExpireAtCommand(sds buf
, robj
*key
, robj
*seconds
) {
8300 /* Make sure we can use strtol */
8301 seconds
= getDecodedObject(seconds
);
8302 when
= time(NULL
)+strtol(seconds
->ptr
,NULL
,10);
8303 decrRefCount(seconds
);
8305 argv
[0] = createStringObject("EXPIREAT",8);
8307 argv
[2] = createObject(REDIS_STRING
,
8308 sdscatprintf(sdsempty(),"%ld",when
));
8309 buf
= catAppendOnlyGenericCommand(buf
, argc
, argv
);
8310 decrRefCount(argv
[0]);
8311 decrRefCount(argv
[2]);
8315 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
8316 sds buf
= sdsempty();
8319 /* The DB this command was targetting is not the same as the last command
8320 * we appendend. To issue a SELECT command is needed. */
8321 if (dictid
!= server
.appendseldb
) {
8324 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
8325 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
8326 (unsigned long)strlen(seldb
),seldb
);
8327 server
.appendseldb
= dictid
;
8330 if (cmd
->proc
== expireCommand
) {
8331 /* Translate EXPIRE into EXPIREAT */
8332 buf
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]);
8333 } else if (cmd
->proc
== setexCommand
) {
8334 /* Translate SETEX to SET and EXPIREAT */
8335 tmpargv
[0] = createStringObject("SET",3);
8336 tmpargv
[1] = argv
[1];
8337 tmpargv
[2] = argv
[3];
8338 buf
= catAppendOnlyGenericCommand(buf
,3,tmpargv
);
8339 decrRefCount(tmpargv
[0]);
8340 buf
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]);
8342 buf
= catAppendOnlyGenericCommand(buf
,argc
,argv
);
8345 /* Append to the AOF buffer. This will be flushed on disk just before
8346 * of re-entering the event loop, so before the client will get a
8347 * positive reply about the operation performed. */
8348 server
.aofbuf
= sdscatlen(server
.aofbuf
,buf
,sdslen(buf
));
8350 /* If a background append only file rewriting is in progress we want to
8351 * accumulate the differences between the child DB and the current one
8352 * in a buffer, so that when the child process will do its work we
8353 * can append the differences to the new append only file. */
8354 if (server
.bgrewritechildpid
!= -1)
8355 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
8360 /* In Redis commands are always executed in the context of a client, so in
8361 * order to load the append only file we need to create a fake client. */
8362 static struct redisClient
*createFakeClient(void) {
8363 struct redisClient
*c
= zmalloc(sizeof(*c
));
8367 c
->querybuf
= sdsempty();
8371 /* We set the fake client as a slave waiting for the synchronization
8372 * so that Redis will not try to send replies to this client. */
8373 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
8374 c
->reply
= listCreate();
8375 listSetFreeMethod(c
->reply
,decrRefCount
);
8376 listSetDupMethod(c
->reply
,dupClientReplyValue
);
8377 initClientMultiState(c
);
8381 static void freeFakeClient(struct redisClient
*c
) {
8382 sdsfree(c
->querybuf
);
8383 listRelease(c
->reply
);
8384 freeClientMultiState(c
);
8388 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
8389 * error (the append only file is zero-length) REDIS_ERR is returned. On
8390 * fatal error an error message is logged and the program exists. */
8391 int loadAppendOnlyFile(char *filename
) {
8392 struct redisClient
*fakeClient
;
8393 FILE *fp
= fopen(filename
,"r");
8394 struct redis_stat sb
;
8395 unsigned long long loadedkeys
= 0;
8396 int appendonly
= server
.appendonly
;
8398 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
8402 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
8406 /* Temporarily disable AOF, to prevent EXEC from feeding a MULTI
8407 * to the same file we're about to read. */
8408 server
.appendonly
= 0;
8410 fakeClient
= createFakeClient();
8417 struct redisCommand
*cmd
;
8419 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
8425 if (buf
[0] != '*') goto fmterr
;
8427 argv
= zmalloc(sizeof(robj
*)*argc
);
8428 for (j
= 0; j
< argc
; j
++) {
8429 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
8430 if (buf
[0] != '$') goto fmterr
;
8431 len
= strtol(buf
+1,NULL
,10);
8432 argsds
= sdsnewlen(NULL
,len
);
8433 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
8434 argv
[j
] = createObject(REDIS_STRING
,argsds
);
8435 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
8438 /* Command lookup */
8439 cmd
= lookupCommand(argv
[0]->ptr
);
8441 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
8444 /* Try object encoding */
8445 if (cmd
->flags
& REDIS_CMD_BULK
)
8446 argv
[argc
-1] = tryObjectEncoding(argv
[argc
-1]);
8447 /* Run the command in the context of a fake client */
8448 fakeClient
->argc
= argc
;
8449 fakeClient
->argv
= argv
;
8450 cmd
->proc(fakeClient
);
8451 /* Discard the reply objects list from the fake client */
8452 while(listLength(fakeClient
->reply
))
8453 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
8454 /* Clean up, ready for the next command */
8455 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
8457 /* Handle swapping while loading big datasets when VM is on */
8459 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
8460 while (zmalloc_used_memory() > server
.vm_max_memory
) {
8461 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
8466 /* This point can only be reached when EOF is reached without errors.
8467 * If the client is in the middle of a MULTI/EXEC, log error and quit. */
8468 if (fakeClient
->flags
& REDIS_MULTI
) goto readerr
;
8471 freeFakeClient(fakeClient
);
8472 server
.appendonly
= appendonly
;
8477 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
8479 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
8483 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
8487 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
8488 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
8492 /* Avoid the incr/decr ref count business if possible to help
8493 * copy-on-write (we are often in a child process when this function
8495 * Also makes sure that key objects don't get incrRefCount-ed when VM
8497 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
8498 obj
= getDecodedObject(obj
);
8501 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
));
8502 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
;
8503 if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0)
8505 if (fwrite("\r\n",2,1,fp
) == 0) goto err
;
8506 if (decrrc
) decrRefCount(obj
);
8509 if (decrrc
) decrRefCount(obj
);
8513 /* Write binary-safe string into a file in the bulkformat
8514 * $<count>\r\n<payload>\r\n */
8515 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
8518 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
);
8519 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8520 if (len
&& fwrite(s
,len
,1,fp
) == 0) return 0;
8521 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
8525 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
8526 static int fwriteBulkDouble(FILE *fp
, double d
) {
8527 char buf
[128], dbuf
[128];
8529 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
8530 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
8531 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8532 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
8536 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
8537 static int fwriteBulkLong(FILE *fp
, long l
) {
8538 char buf
[128], lbuf
[128];
8540 snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
);
8541 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2);
8542 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8543 if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0;
8547 /* Write a sequence of commands able to fully rebuild the dataset into
8548 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
8549 static int rewriteAppendOnlyFile(char *filename
) {
8550 dictIterator
*di
= NULL
;
8555 time_t now
= time(NULL
);
8557 /* Note that we have to use a different temp name here compared to the
8558 * one used by rewriteAppendOnlyFileBackground() function. */
8559 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
8560 fp
= fopen(tmpfile
,"w");
8562 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
8565 for (j
= 0; j
< server
.dbnum
; j
++) {
8566 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
8567 redisDb
*db
= server
.db
+j
;
8569 if (dictSize(d
) == 0) continue;
8570 di
= dictGetIterator(d
);
8576 /* SELECT the new DB */
8577 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
8578 if (fwriteBulkLong(fp
,j
) == 0) goto werr
;
8580 /* Iterate this DB writing every entry */
8581 while((de
= dictNext(di
)) != NULL
) {
8586 key
= dictGetEntryKey(de
);
8587 /* If the value for this key is swapped, load a preview in memory.
8588 * We use a "swapped" flag to remember if we need to free the
8589 * value object instead to just increment the ref count anyway
8590 * in order to avoid copy-on-write of pages if we are forked() */
8591 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
8592 key
->storage
== REDIS_VM_SWAPPING
) {
8593 o
= dictGetEntryVal(de
);
8596 o
= vmPreviewObject(key
);
8599 expiretime
= getExpire(db
,key
);
8601 /* Save the key and associated value */
8602 if (o
->type
== REDIS_STRING
) {
8603 /* Emit a SET command */
8604 char cmd
[]="*3\r\n$3\r\nSET\r\n";
8605 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8607 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8608 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
8609 } else if (o
->type
== REDIS_LIST
) {
8610 /* Emit the RPUSHes needed to rebuild the list */
8611 list
*list
= o
->ptr
;
8615 listRewind(list
,&li
);
8616 while((ln
= listNext(&li
))) {
8617 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
8618 robj
*eleobj
= listNodeValue(ln
);
8620 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8621 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8622 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8624 } else if (o
->type
== REDIS_SET
) {
8625 /* Emit the SADDs needed to rebuild the set */
8627 dictIterator
*di
= dictGetIterator(set
);
8630 while((de
= dictNext(di
)) != NULL
) {
8631 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
8632 robj
*eleobj
= dictGetEntryKey(de
);
8634 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8635 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8636 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8638 dictReleaseIterator(di
);
8639 } else if (o
->type
== REDIS_ZSET
) {
8640 /* Emit the ZADDs needed to rebuild the sorted set */
8642 dictIterator
*di
= dictGetIterator(zs
->dict
);
8645 while((de
= dictNext(di
)) != NULL
) {
8646 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
8647 robj
*eleobj
= dictGetEntryKey(de
);
8648 double *score
= dictGetEntryVal(de
);
8650 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8651 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8652 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
8653 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8655 dictReleaseIterator(di
);
8656 } else if (o
->type
== REDIS_HASH
) {
8657 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
8659 /* Emit the HSETs needed to rebuild the hash */
8660 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8661 unsigned char *p
= zipmapRewind(o
->ptr
);
8662 unsigned char *field
, *val
;
8663 unsigned int flen
, vlen
;
8665 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
8666 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8667 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8668 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
8670 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
8674 dictIterator
*di
= dictGetIterator(o
->ptr
);
8677 while((de
= dictNext(di
)) != NULL
) {
8678 robj
*field
= dictGetEntryKey(de
);
8679 robj
*val
= dictGetEntryVal(de
);
8681 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8682 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8683 if (fwriteBulkObject(fp
,field
) == -1) return -1;
8684 if (fwriteBulkObject(fp
,val
) == -1) return -1;
8686 dictReleaseIterator(di
);
8689 redisPanic("Unknown object type");
8691 /* Save the expire time */
8692 if (expiretime
!= -1) {
8693 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
8694 /* If this key is already expired skip it */
8695 if (expiretime
< now
) continue;
8696 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8697 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8698 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
;
8700 if (swapped
) decrRefCount(o
);
8702 dictReleaseIterator(di
);
8705 /* Make sure data will not remain on the OS's output buffers */
8710 /* Use RENAME to make sure the DB file is changed atomically only
8711 * if the generate DB file is ok. */
8712 if (rename(tmpfile
,filename
) == -1) {
8713 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
8717 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
8723 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
8724 if (di
) dictReleaseIterator(di
);
8728 /* This is how rewriting of the append only file in background works:
8730 * 1) The user calls BGREWRITEAOF
8731 * 2) Redis calls this function, that forks():
8732 * 2a) the child rewrite the append only file in a temp file.
8733 * 2b) the parent accumulates differences in server.bgrewritebuf.
8734 * 3) When the child finished '2a' exists.
8735 * 4) The parent will trap the exit code, if it's OK, will append the
8736 * data accumulated into server.bgrewritebuf into the temp file, and
8737 * finally will rename(2) the temp file in the actual file name.
8738 * The the new file is reopened as the new append only file. Profit!
8740 static int rewriteAppendOnlyFileBackground(void) {
8743 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
8744 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
8745 if ((childpid
= fork()) == 0) {
8749 if (server
.vm_enabled
) vmReopenSwapFile();
8751 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
8752 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
8759 if (childpid
== -1) {
8760 redisLog(REDIS_WARNING
,
8761 "Can't rewrite append only file in background: fork: %s",
8765 redisLog(REDIS_NOTICE
,
8766 "Background append only file rewriting started by pid %d",childpid
);
8767 server
.bgrewritechildpid
= childpid
;
8768 updateDictResizePolicy();
8769 /* We set appendseldb to -1 in order to force the next call to the
8770 * feedAppendOnlyFile() to issue a SELECT command, so the differences
8771 * accumulated by the parent into server.bgrewritebuf will start
8772 * with a SELECT statement and it will be safe to merge. */
8773 server
.appendseldb
= -1;
8776 return REDIS_OK
; /* unreached */
8779 static void bgrewriteaofCommand(redisClient
*c
) {
8780 if (server
.bgrewritechildpid
!= -1) {
8781 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
8784 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
8785 char *status
= "+Background append only file rewriting started\r\n";
8786 addReplySds(c
,sdsnew(status
));
8788 addReply(c
,shared
.err
);
8792 static void aofRemoveTempFile(pid_t childpid
) {
8795 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
8799 /* Virtual Memory is composed mainly of two subsystems:
8800 * - Blocking Virutal Memory
8801 * - Threaded Virtual Memory I/O
8802 * The two parts are not fully decoupled, but functions are split among two
8803 * different sections of the source code (delimited by comments) in order to
8804 * make more clear what functionality is about the blocking VM and what about
8805 * the threaded (not blocking) VM.
8809 * Redis VM is a blocking VM (one that blocks reading swapped values from
8810 * disk into memory when a value swapped out is needed in memory) that is made
8811 * unblocking by trying to examine the command argument vector in order to
8812 * load in background values that will likely be needed in order to exec
8813 * the command. The command is executed only once all the relevant keys
8814 * are loaded into memory.
8816 * This basically is almost as simple of a blocking VM, but almost as parallel
8817 * as a fully non-blocking VM.
8820 /* Called when the user switches from "appendonly yes" to "appendonly no"
8821 * at runtime using the CONFIG command. */
8822 static void stopAppendOnly(void) {
8823 flushAppendOnlyFile();
8824 fsync(server
.appendfd
);
8825 close(server
.appendfd
);
8827 server
.appendfd
= -1;
8828 server
.appendseldb
= -1;
8829 server
.appendonly
= 0;
8830 /* rewrite operation in progress? kill it, wait child exit */
8831 if (server
.bgsavechildpid
!= -1) {
8834 if (kill(server
.bgsavechildpid
,SIGKILL
) != -1)
8835 wait3(&statloc
,0,NULL
);
8836 /* reset the buffer accumulating changes while the child saves */
8837 sdsfree(server
.bgrewritebuf
);
8838 server
.bgrewritebuf
= sdsempty();
8839 server
.bgsavechildpid
= -1;
8843 /* Called when the user switches from "appendonly no" to "appendonly yes"
8844 * at runtime using the CONFIG command. */
8845 static int startAppendOnly(void) {
8846 server
.appendonly
= 1;
8847 server
.lastfsync
= time(NULL
);
8848 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
8849 if (server
.appendfd
== -1) {
8850 redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, but I can't open the AOF file: %s",strerror(errno
));
8853 if (rewriteAppendOnlyFileBackground() == REDIS_ERR
) {
8854 server
.appendonly
= 0;
8855 close(server
.appendfd
);
8856 redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, I can't trigger a background AOF rewrite operation. Check the above logs for more info about the error.",strerror(errno
));
8862 /* =================== Virtual Memory - Blocking Side ====================== */
8864 static void vmInit(void) {
8870 if (server
.vm_max_threads
!= 0)
8871 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
8873 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
8874 /* Try to open the old swap file, otherwise create it */
8875 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
8876 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
8878 if (server
.vm_fp
== NULL
) {
8879 redisLog(REDIS_WARNING
,
8880 "Can't open the swap file: %s. Exiting.",
8884 server
.vm_fd
= fileno(server
.vm_fp
);
8885 /* Lock the swap file for writing, this is useful in order to avoid
8886 * another instance to use the same swap file for a config error. */
8887 fl
.l_type
= F_WRLCK
;
8888 fl
.l_whence
= SEEK_SET
;
8889 fl
.l_start
= fl
.l_len
= 0;
8890 if (fcntl(server
.vm_fd
,F_SETLK
,&fl
) == -1) {
8891 redisLog(REDIS_WARNING
,
8892 "Can't lock the swap file at '%s': %s. Make sure it is not used by another Redis instance.", server
.vm_swap_file
, strerror(errno
));
8896 server
.vm_next_page
= 0;
8897 server
.vm_near_pages
= 0;
8898 server
.vm_stats_used_pages
= 0;
8899 server
.vm_stats_swapped_objects
= 0;
8900 server
.vm_stats_swapouts
= 0;
8901 server
.vm_stats_swapins
= 0;
8902 totsize
= server
.vm_pages
*server
.vm_page_size
;
8903 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
8904 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
8905 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
8909 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
8911 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
8912 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
8913 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
8914 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
8916 /* Initialize threaded I/O (used by Virtual Memory) */
8917 server
.io_newjobs
= listCreate();
8918 server
.io_processing
= listCreate();
8919 server
.io_processed
= listCreate();
8920 server
.io_ready_clients
= listCreate();
8921 pthread_mutex_init(&server
.io_mutex
,NULL
);
8922 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
8923 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
8924 server
.io_active_threads
= 0;
8925 if (pipe(pipefds
) == -1) {
8926 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
8930 server
.io_ready_pipe_read
= pipefds
[0];
8931 server
.io_ready_pipe_write
= pipefds
[1];
8932 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
8933 /* LZF requires a lot of stack */
8934 pthread_attr_init(&server
.io_threads_attr
);
8935 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
8936 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
8937 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
8938 /* Listen for events in the threaded I/O pipe */
8939 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
8940 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
8941 oom("creating file event");
8944 /* Mark the page as used */
8945 static void vmMarkPageUsed(off_t page
) {
8946 off_t byte
= page
/8;
8948 redisAssert(vmFreePage(page
) == 1);
8949 server
.vm_bitmap
[byte
] |= 1<<bit
;
8952 /* Mark N contiguous pages as used, with 'page' being the first. */
8953 static void vmMarkPagesUsed(off_t page
, off_t count
) {
8956 for (j
= 0; j
< count
; j
++)
8957 vmMarkPageUsed(page
+j
);
8958 server
.vm_stats_used_pages
+= count
;
8959 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
8960 (long long)count
, (long long)page
);
8963 /* Mark the page as free */
8964 static void vmMarkPageFree(off_t page
) {
8965 off_t byte
= page
/8;
8967 redisAssert(vmFreePage(page
) == 0);
8968 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
8971 /* Mark N contiguous pages as free, with 'page' being the first. */
8972 static void vmMarkPagesFree(off_t page
, off_t count
) {
8975 for (j
= 0; j
< count
; j
++)
8976 vmMarkPageFree(page
+j
);
8977 server
.vm_stats_used_pages
-= count
;
8978 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
8979 (long long)count
, (long long)page
);
8982 /* Test if the page is free */
8983 static int vmFreePage(off_t page
) {
8984 off_t byte
= page
/8;
8986 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
8989 /* Find N contiguous free pages storing the first page of the cluster in *first.
8990 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
8991 * REDIS_ERR is returned.
8993 * This function uses a simple algorithm: we try to allocate
8994 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
8995 * again from the start of the swap file searching for free spaces.
8997 * If it looks pretty clear that there are no free pages near our offset
8998 * we try to find less populated places doing a forward jump of
8999 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
9000 * without hurry, and then we jump again and so forth...
9002 * This function can be improved using a free list to avoid to guess
9003 * too much, since we could collect data about freed pages.
9005 * note: I implemented this function just after watching an episode of
9006 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
9008 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
9009 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
9011 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
9012 server
.vm_near_pages
= 0;
9013 server
.vm_next_page
= 0;
9015 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
9016 base
= server
.vm_next_page
;
9018 while(offset
< server
.vm_pages
) {
9019 off_t
this = base
+offset
;
9021 /* If we overflow, restart from page zero */
9022 if (this >= server
.vm_pages
) {
9023 this -= server
.vm_pages
;
9025 /* Just overflowed, what we found on tail is no longer
9026 * interesting, as it's no longer contiguous. */
9030 if (vmFreePage(this)) {
9031 /* This is a free page */
9033 /* Already got N free pages? Return to the caller, with success */
9035 *first
= this-(n
-1);
9036 server
.vm_next_page
= this+1;
9037 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
9041 /* The current one is not a free page */
9045 /* Fast-forward if the current page is not free and we already
9046 * searched enough near this place. */
9048 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
9049 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
9051 /* Note that even if we rewind after the jump, we are don't need
9052 * to make sure numfree is set to zero as we only jump *if* it
9053 * is set to zero. */
9055 /* Otherwise just check the next page */
9062 /* Write the specified object at the specified page of the swap file */
9063 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
9064 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
9065 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
9066 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9067 redisLog(REDIS_WARNING
,
9068 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
9072 rdbSaveObject(server
.vm_fp
,o
);
9073 fflush(server
.vm_fp
);
9074 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9078 /* Swap the 'val' object relative to 'key' into disk. Store all the information
9079 * needed to later retrieve the object into the key object.
9080 * If we can't find enough contiguous empty pages to swap the object on disk
9081 * REDIS_ERR is returned. */
9082 static int vmSwapObjectBlocking(robj
*key
, robj
*val
) {
9083 off_t pages
= rdbSavedObjectPages(val
,NULL
);
9086 assert(key
->storage
== REDIS_VM_MEMORY
);
9087 assert(key
->refcount
== 1);
9088 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
;
9089 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
;
9090 key
->vm
.page
= page
;
9091 key
->vm
.usedpages
= pages
;
9092 key
->storage
= REDIS_VM_SWAPPED
;
9093 key
->vtype
= val
->type
;
9094 decrRefCount(val
); /* Deallocate the object from memory. */
9095 vmMarkPagesUsed(page
,pages
);
9096 redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)",
9097 (unsigned char*) key
->ptr
,
9098 (unsigned long long) page
, (unsigned long long) pages
);
9099 server
.vm_stats_swapped_objects
++;
9100 server
.vm_stats_swapouts
++;
9104 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
9107 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
9108 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
9109 redisLog(REDIS_WARNING
,
9110 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
9114 o
= rdbLoadObject(type
,server
.vm_fp
);
9116 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
9119 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9123 /* Load the value object relative to the 'key' object from swap to memory.
9124 * The newly allocated object is returned.
9126 * If preview is true the unserialized object is returned to the caller but
9127 * no changes are made to the key object, nor the pages are marked as freed */
9128 static robj
*vmGenericLoadObject(robj
*key
, int preview
) {
9131 redisAssert(key
->storage
== REDIS_VM_SWAPPED
|| key
->storage
== REDIS_VM_LOADING
);
9132 val
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
);
9134 key
->storage
= REDIS_VM_MEMORY
;
9135 key
->vm
.atime
= server
.unixtime
;
9136 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
9137 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk",
9138 (unsigned char*) key
->ptr
);
9139 server
.vm_stats_swapped_objects
--;
9141 redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk",
9142 (unsigned char*) key
->ptr
);
9144 server
.vm_stats_swapins
++;
9148 /* Plain object loading, from swap to memory */
9149 static robj
*vmLoadObject(robj
*key
) {
9150 /* If we are loading the object in background, stop it, we
9151 * need to load this object synchronously ASAP. */
9152 if (key
->storage
== REDIS_VM_LOADING
)
9153 vmCancelThreadedIOJob(key
);
9154 return vmGenericLoadObject(key
,0);
9157 /* Just load the value on disk, without to modify the key.
9158 * This is useful when we want to perform some operation on the value
9159 * without to really bring it from swap to memory, like while saving the
9160 * dataset or rewriting the append only log. */
9161 static robj
*vmPreviewObject(robj
*key
) {
9162 return vmGenericLoadObject(key
,1);
9165 /* How a good candidate is this object for swapping?
9166 * The better candidate it is, the greater the returned value.
9168 * Currently we try to perform a fast estimation of the object size in
9169 * memory, and combine it with aging informations.
9171 * Basically swappability = idle-time * log(estimated size)
9173 * Bigger objects are preferred over smaller objects, but not
9174 * proportionally, this is why we use the logarithm. This algorithm is
9175 * just a first try and will probably be tuned later. */
9176 static double computeObjectSwappability(robj
*o
) {
9177 time_t age
= server
.unixtime
- o
->vm
.atime
;
9181 struct dictEntry
*de
;
9184 if (age
<= 0) return 0;
9187 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
9190 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
9195 listNode
*ln
= listFirst(l
);
9197 asize
= sizeof(list
);
9199 robj
*ele
= ln
->value
;
9202 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9203 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9205 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
9210 z
= (o
->type
== REDIS_ZSET
);
9211 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
9213 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
9214 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
9219 de
= dictGetRandomKey(d
);
9220 ele
= dictGetEntryKey(de
);
9221 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9222 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9224 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
9225 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
9229 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
9230 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
9231 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
9232 unsigned int klen
, vlen
;
9233 unsigned char *key
, *val
;
9235 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
9239 asize
= len
*(klen
+vlen
+3);
9240 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
9242 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
9247 de
= dictGetRandomKey(d
);
9248 ele
= dictGetEntryKey(de
);
9249 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9250 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9252 ele
= dictGetEntryVal(de
);
9253 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9254 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9256 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
9261 return (double)age
*log(1+asize
);
9264 /* Try to swap an object that's a good candidate for swapping.
9265 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
9266 * to swap any object at all.
9268 * If 'usethreaded' is true, Redis will try to swap the object in background
9269 * using I/O threads. */
9270 static int vmSwapOneObject(int usethreads
) {
9272 struct dictEntry
*best
= NULL
;
9273 double best_swappability
= 0;
9274 redisDb
*best_db
= NULL
;
9277 for (j
= 0; j
< server
.dbnum
; j
++) {
9278 redisDb
*db
= server
.db
+j
;
9279 /* Why maxtries is set to 100?
9280 * Because this way (usually) we'll find 1 object even if just 1% - 2%
9281 * are swappable objects */
9284 if (dictSize(db
->dict
) == 0) continue;
9285 for (i
= 0; i
< 5; i
++) {
9287 double swappability
;
9289 if (maxtries
) maxtries
--;
9290 de
= dictGetRandomKey(db
->dict
);
9291 key
= dictGetEntryKey(de
);
9292 val
= dictGetEntryVal(de
);
9293 /* Only swap objects that are currently in memory.
9295 * Also don't swap shared objects if threaded VM is on, as we
9296 * try to ensure that the main thread does not touch the
9297 * object while the I/O thread is using it, but we can't
9298 * control other keys without adding additional mutex. */
9299 if (key
->storage
!= REDIS_VM_MEMORY
||
9300 (server
.vm_max_threads
!= 0 && val
->refcount
!= 1)) {
9301 if (maxtries
) i
--; /* don't count this try */
9304 swappability
= computeObjectSwappability(val
);
9305 if (!best
|| swappability
> best_swappability
) {
9307 best_swappability
= swappability
;
9312 if (best
== NULL
) return REDIS_ERR
;
9313 key
= dictGetEntryKey(best
);
9314 val
= dictGetEntryVal(best
);
9316 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
9317 key
->ptr
, best_swappability
);
9319 /* Unshare the key if needed */
9320 if (key
->refcount
> 1) {
9321 robj
*newkey
= dupStringObject(key
);
9323 key
= dictGetEntryKey(best
) = newkey
;
9327 vmSwapObjectThreaded(key
,val
,best_db
);
9330 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
9331 dictGetEntryVal(best
) = NULL
;
9339 static int vmSwapOneObjectBlocking() {
9340 return vmSwapOneObject(0);
9343 static int vmSwapOneObjectThreaded() {
9344 return vmSwapOneObject(1);
9347 /* Return true if it's safe to swap out objects in a given moment.
9348 * Basically we don't want to swap objects out while there is a BGSAVE
9349 * or a BGAEOREWRITE running in backgroud. */
9350 static int vmCanSwapOut(void) {
9351 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
9354 /* Delete a key if swapped. Returns 1 if the key was found, was swapped
9355 * and was deleted. Otherwise 0 is returned. */
9356 static int deleteIfSwapped(redisDb
*db
, robj
*key
) {
9360 if ((de
= dictFind(db
->dict
,key
)) == NULL
) return 0;
9361 foundkey
= dictGetEntryKey(de
);
9362 if (foundkey
->storage
== REDIS_VM_MEMORY
) return 0;
9367 /* =================== Virtual Memory - Threaded I/O ======================= */
9369 static void freeIOJob(iojob
*j
) {
9370 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
9371 j
->type
== REDIS_IOJOB_DO_SWAP
||
9372 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
9373 decrRefCount(j
->val
);
9374 /* We don't decrRefCount the j->key field as we did't incremented
9375 * the count creating IO Jobs. This is because the key field here is
9376 * just used as an indentifier and if a key is removed the Job should
9377 * never be touched again. */
9381 /* Every time a thread finished a Job, it writes a byte into the write side
9382 * of an unix pipe in order to "awake" the main thread, and this function
9384 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
9388 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
9390 REDIS_NOTUSED(mask
);
9391 REDIS_NOTUSED(privdata
);
9393 /* For every byte we read in the read side of the pipe, there is one
9394 * I/O job completed to process. */
9395 while((retval
= read(fd
,buf
,1)) == 1) {
9399 struct dictEntry
*de
;
9401 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
9403 /* Get the processed element (the oldest one) */
9405 assert(listLength(server
.io_processed
) != 0);
9406 if (toprocess
== -1) {
9407 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
9408 if (toprocess
<= 0) toprocess
= 1;
9410 ln
= listFirst(server
.io_processed
);
9412 listDelNode(server
.io_processed
,ln
);
9414 /* If this job is marked as canceled, just ignore it */
9419 /* Post process it in the main thread, as there are things we
9420 * can do just here to avoid race conditions and/or invasive locks */
9421 redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
);
9422 de
= dictFind(j
->db
->dict
,j
->key
);
9424 key
= dictGetEntryKey(de
);
9425 if (j
->type
== REDIS_IOJOB_LOAD
) {
9428 /* Key loaded, bring it at home */
9429 key
->storage
= REDIS_VM_MEMORY
;
9430 key
->vm
.atime
= server
.unixtime
;
9431 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
9432 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
9433 (unsigned char*) key
->ptr
);
9434 server
.vm_stats_swapped_objects
--;
9435 server
.vm_stats_swapins
++;
9436 dictGetEntryVal(de
) = j
->val
;
9437 incrRefCount(j
->val
);
9440 /* Handle clients waiting for this key to be loaded. */
9441 handleClientsBlockedOnSwappedKey(db
,key
);
9442 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9443 /* Now we know the amount of pages required to swap this object.
9444 * Let's find some space for it, and queue this task again
9445 * rebranded as REDIS_IOJOB_DO_SWAP. */
9446 if (!vmCanSwapOut() ||
9447 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
9449 /* Ooops... no space or we can't swap as there is
9450 * a fork()ed Redis trying to save stuff on disk. */
9452 key
->storage
= REDIS_VM_MEMORY
; /* undo operation */
9454 /* Note that we need to mark this pages as used now,
9455 * if the job will be canceled, we'll mark them as freed
9457 vmMarkPagesUsed(j
->page
,j
->pages
);
9458 j
->type
= REDIS_IOJOB_DO_SWAP
;
9463 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9466 /* Key swapped. We can finally free some memory. */
9467 if (key
->storage
!= REDIS_VM_SWAPPING
) {
9468 printf("key->storage: %d\n",key
->storage
);
9469 printf("key->name: %s\n",(char*)key
->ptr
);
9470 printf("key->refcount: %d\n",key
->refcount
);
9471 printf("val: %p\n",(void*)j
->val
);
9472 printf("val->type: %d\n",j
->val
->type
);
9473 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
9475 redisAssert(key
->storage
== REDIS_VM_SWAPPING
);
9476 val
= dictGetEntryVal(de
);
9477 key
->vm
.page
= j
->page
;
9478 key
->vm
.usedpages
= j
->pages
;
9479 key
->storage
= REDIS_VM_SWAPPED
;
9480 key
->vtype
= j
->val
->type
;
9481 decrRefCount(val
); /* Deallocate the object from memory. */
9482 dictGetEntryVal(de
) = NULL
;
9483 redisLog(REDIS_DEBUG
,
9484 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
9485 (unsigned char*) key
->ptr
,
9486 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
9487 server
.vm_stats_swapped_objects
++;
9488 server
.vm_stats_swapouts
++;
9490 /* Put a few more swap requests in queue if we are still
9492 if (trytoswap
&& vmCanSwapOut() &&
9493 zmalloc_used_memory() > server
.vm_max_memory
)
9498 more
= listLength(server
.io_newjobs
) <
9499 (unsigned) server
.vm_max_threads
;
9501 /* Don't waste CPU time if swappable objects are rare. */
9502 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
9510 if (processed
== toprocess
) return;
9512 if (retval
< 0 && errno
!= EAGAIN
) {
9513 redisLog(REDIS_WARNING
,
9514 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
9519 static void lockThreadedIO(void) {
9520 pthread_mutex_lock(&server
.io_mutex
);
9523 static void unlockThreadedIO(void) {
9524 pthread_mutex_unlock(&server
.io_mutex
);
9527 /* Remove the specified object from the threaded I/O queue if still not
9528 * processed, otherwise make sure to flag it as canceled. */
9529 static void vmCancelThreadedIOJob(robj
*o
) {
9531 server
.io_newjobs
, /* 0 */
9532 server
.io_processing
, /* 1 */
9533 server
.io_processed
/* 2 */
9537 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
9540 /* Search for a matching key in one of the queues */
9541 for (i
= 0; i
< 3; i
++) {
9545 listRewind(lists
[i
],&li
);
9546 while ((ln
= listNext(&li
)) != NULL
) {
9547 iojob
*job
= ln
->value
;
9549 if (job
->canceled
) continue; /* Skip this, already canceled. */
9550 if (job
->key
== o
) {
9551 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n",
9552 (void*)job
, (char*)o
->ptr
, job
->type
, i
);
9553 /* Mark the pages as free since the swap didn't happened
9554 * or happened but is now discarded. */
9555 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
9556 vmMarkPagesFree(job
->page
,job
->pages
);
9557 /* Cancel the job. It depends on the list the job is
9560 case 0: /* io_newjobs */
9561 /* If the job was yet not processed the best thing to do
9562 * is to remove it from the queue at all */
9564 listDelNode(lists
[i
],ln
);
9566 case 1: /* io_processing */
9567 /* Oh Shi- the thread is messing with the Job:
9569 * Probably it's accessing the object if this is a
9570 * PREPARE_SWAP or DO_SWAP job.
9571 * If it's a LOAD job it may be reading from disk and
9572 * if we don't wait for the job to terminate before to
9573 * cancel it, maybe in a few microseconds data can be
9574 * corrupted in this pages. So the short story is:
9576 * Better to wait for the job to move into the
9577 * next queue (processed)... */
9579 /* We try again and again until the job is completed. */
9581 /* But let's wait some time for the I/O thread
9582 * to finish with this job. After all this condition
9583 * should be very rare. */
9586 case 2: /* io_processed */
9587 /* The job was already processed, that's easy...
9588 * just mark it as canceled so that we'll ignore it
9589 * when processing completed jobs. */
9593 /* Finally we have to adjust the storage type of the object
9594 * in order to "UNDO" the operaiton. */
9595 if (o
->storage
== REDIS_VM_LOADING
)
9596 o
->storage
= REDIS_VM_SWAPPED
;
9597 else if (o
->storage
== REDIS_VM_SWAPPING
)
9598 o
->storage
= REDIS_VM_MEMORY
;
9605 assert(1 != 1); /* We should never reach this */
9608 static void *IOThreadEntryPoint(void *arg
) {
9613 pthread_detach(pthread_self());
9615 /* Get a new job to process */
9617 if (listLength(server
.io_newjobs
) == 0) {
9618 /* No new jobs in queue, exit. */
9619 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
9620 (long) pthread_self());
9621 server
.io_active_threads
--;
9625 ln
= listFirst(server
.io_newjobs
);
9627 listDelNode(server
.io_newjobs
,ln
);
9628 /* Add the job in the processing queue */
9629 j
->thread
= pthread_self();
9630 listAddNodeTail(server
.io_processing
,j
);
9631 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
9633 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
9634 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
9636 /* Process the Job */
9637 if (j
->type
== REDIS_IOJOB_LOAD
) {
9638 j
->val
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
);
9639 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9640 FILE *fp
= fopen("/dev/null","w+");
9641 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
9643 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9644 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
9648 /* Done: insert the job into the processed queue */
9649 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
9650 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
9652 listDelNode(server
.io_processing
,ln
);
9653 listAddNodeTail(server
.io_processed
,j
);
9656 /* Signal the main thread there is new stuff to process */
9657 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
9659 return NULL
; /* never reached */
9662 static void spawnIOThread(void) {
9664 sigset_t mask
, omask
;
9668 sigaddset(&mask
,SIGCHLD
);
9669 sigaddset(&mask
,SIGHUP
);
9670 sigaddset(&mask
,SIGPIPE
);
9671 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
9672 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
9673 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
9677 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
9678 server
.io_active_threads
++;
9681 /* We need to wait for the last thread to exit before we are able to
9682 * fork() in order to BGSAVE or BGREWRITEAOF. */
9683 static void waitEmptyIOJobsQueue(void) {
9685 int io_processed_len
;
9688 if (listLength(server
.io_newjobs
) == 0 &&
9689 listLength(server
.io_processing
) == 0 &&
9690 server
.io_active_threads
== 0)
9695 /* While waiting for empty jobs queue condition we post-process some
9696 * finshed job, as I/O threads may be hanging trying to write against
9697 * the io_ready_pipe_write FD but there are so much pending jobs that
9699 io_processed_len
= listLength(server
.io_processed
);
9701 if (io_processed_len
) {
9702 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
9703 usleep(1000); /* 1 millisecond */
9705 usleep(10000); /* 10 milliseconds */
9710 static void vmReopenSwapFile(void) {
9711 /* Note: we don't close the old one as we are in the child process
9712 * and don't want to mess at all with the original file object. */
9713 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
9714 if (server
.vm_fp
== NULL
) {
9715 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
9716 server
.vm_swap_file
);
9719 server
.vm_fd
= fileno(server
.vm_fp
);
9722 /* This function must be called while with threaded IO locked */
9723 static void queueIOJob(iojob
*j
) {
9724 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
9725 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
9726 listAddNodeTail(server
.io_newjobs
,j
);
9727 if (server
.io_active_threads
< server
.vm_max_threads
)
9731 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
9734 assert(key
->storage
== REDIS_VM_MEMORY
);
9735 assert(key
->refcount
== 1);
9737 j
= zmalloc(sizeof(*j
));
9738 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
9744 j
->thread
= (pthread_t
) -1;
9745 key
->storage
= REDIS_VM_SWAPPING
;
9753 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
9755 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
9756 * If there is not already a job loading the key, it is craeted.
9757 * The key is added to the io_keys list in the client structure, and also
9758 * in the hash table mapping swapped keys to waiting clients, that is,
9759 * server.io_waited_keys. */
9760 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
9761 struct dictEntry
*de
;
9765 /* If the key does not exist or is already in RAM we don't need to
9766 * block the client at all. */
9767 de
= dictFind(c
->db
->dict
,key
);
9768 if (de
== NULL
) return 0;
9769 o
= dictGetEntryKey(de
);
9770 if (o
->storage
== REDIS_VM_MEMORY
) {
9772 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
9773 /* We were swapping the key, undo it! */
9774 vmCancelThreadedIOJob(o
);
9778 /* OK: the key is either swapped, or being loaded just now. */
9780 /* Add the key to the list of keys this client is waiting for.
9781 * This maps clients to keys they are waiting for. */
9782 listAddNodeTail(c
->io_keys
,key
);
9785 /* Add the client to the swapped keys => clients waiting map. */
9786 de
= dictFind(c
->db
->io_keys
,key
);
9790 /* For every key we take a list of clients blocked for it */
9792 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
9794 assert(retval
== DICT_OK
);
9796 l
= dictGetEntryVal(de
);
9798 listAddNodeTail(l
,c
);
9800 /* Are we already loading the key from disk? If not create a job */
9801 if (o
->storage
== REDIS_VM_SWAPPED
) {
9804 o
->storage
= REDIS_VM_LOADING
;
9805 j
= zmalloc(sizeof(*j
));
9806 j
->type
= REDIS_IOJOB_LOAD
;
9809 j
->key
->vtype
= o
->vtype
;
9810 j
->page
= o
->vm
.page
;
9813 j
->thread
= (pthread_t
) -1;
9821 /* Preload keys for any command with first, last and step values for
9822 * the command keys prototype, as defined in the command table. */
9823 static void waitForMultipleSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
9825 if (cmd
->vm_firstkey
== 0) return;
9826 last
= cmd
->vm_lastkey
;
9827 if (last
< 0) last
= argc
+last
;
9828 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
) {
9829 redisAssert(j
< argc
);
9830 waitForSwappedKey(c
,argv
[j
]);
9834 /* Preload keys needed for the ZUNIONSTORE and ZINTERSTORE commands.
9835 * Note that the number of keys to preload is user-defined, so we need to
9836 * apply a sanity check against argc. */
9837 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
9841 num
= atoi(argv
[2]->ptr
);
9842 if (num
> (argc
-3)) return;
9843 for (i
= 0; i
< num
; i
++) {
9844 waitForSwappedKey(c
,argv
[3+i
]);
9848 /* Preload keys needed to execute the entire MULTI/EXEC block.
9850 * This function is called by blockClientOnSwappedKeys when EXEC is issued,
9851 * and will block the client when any command requires a swapped out value. */
9852 static void execBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
9854 struct redisCommand
*mcmd
;
9857 REDIS_NOTUSED(argc
);
9858 REDIS_NOTUSED(argv
);
9860 if (!(c
->flags
& REDIS_MULTI
)) return;
9861 for (i
= 0; i
< c
->mstate
.count
; i
++) {
9862 mcmd
= c
->mstate
.commands
[i
].cmd
;
9863 margc
= c
->mstate
.commands
[i
].argc
;
9864 margv
= c
->mstate
.commands
[i
].argv
;
9866 if (mcmd
->vm_preload_proc
!= NULL
) {
9867 mcmd
->vm_preload_proc(c
,mcmd
,margc
,margv
);
9869 waitForMultipleSwappedKeys(c
,mcmd
,margc
,margv
);
9874 /* Is this client attempting to run a command against swapped keys?
9875 * If so, block it ASAP, load the keys in background, then resume it.
9877 * The important idea about this function is that it can fail! If keys will
9878 * still be swapped when the client is resumed, this key lookups will
9879 * just block loading keys from disk. In practical terms this should only
9880 * happen with SORT BY command or if there is a bug in this function.
9882 * Return 1 if the client is marked as blocked, 0 if the client can
9883 * continue as the keys it is going to access appear to be in memory. */
9884 static int blockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
) {
9885 if (cmd
->vm_preload_proc
!= NULL
) {
9886 cmd
->vm_preload_proc(c
,cmd
,c
->argc
,c
->argv
);
9888 waitForMultipleSwappedKeys(c
,cmd
,c
->argc
,c
->argv
);
9891 /* If the client was blocked for at least one key, mark it as blocked. */
9892 if (listLength(c
->io_keys
)) {
9893 c
->flags
|= REDIS_IO_WAIT
;
9894 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
9895 server
.vm_blocked_clients
++;
9902 /* Remove the 'key' from the list of blocked keys for a given client.
9904 * The function returns 1 when there are no longer blocking keys after
9905 * the current one was removed (and the client can be unblocked). */
9906 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
9910 struct dictEntry
*de
;
9912 /* Remove the key from the list of keys this client is waiting for. */
9913 listRewind(c
->io_keys
,&li
);
9914 while ((ln
= listNext(&li
)) != NULL
) {
9915 if (equalStringObjects(ln
->value
,key
)) {
9916 listDelNode(c
->io_keys
,ln
);
9922 /* Remove the client form the key => waiting clients map. */
9923 de
= dictFind(c
->db
->io_keys
,key
);
9925 l
= dictGetEntryVal(de
);
9926 ln
= listSearchKey(l
,c
);
9929 if (listLength(l
) == 0)
9930 dictDelete(c
->db
->io_keys
,key
);
9932 return listLength(c
->io_keys
) == 0;
9935 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
9936 struct dictEntry
*de
;
9941 de
= dictFind(db
->io_keys
,key
);
9944 l
= dictGetEntryVal(de
);
9945 len
= listLength(l
);
9946 /* Note: we can't use something like while(listLength(l)) as the list
9947 * can be freed by the calling function when we remove the last element. */
9950 redisClient
*c
= ln
->value
;
9952 if (dontWaitForSwappedKey(c
,key
)) {
9953 /* Put the client in the list of clients ready to go as we
9954 * loaded all the keys about it. */
9955 listAddNodeTail(server
.io_ready_clients
,c
);
9960 /* =========================== Remote Configuration ========================= */
9962 static void configSetCommand(redisClient
*c
) {
9963 robj
*o
= getDecodedObject(c
->argv
[3]);
9966 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
9967 zfree(server
.dbfilename
);
9968 server
.dbfilename
= zstrdup(o
->ptr
);
9969 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
9970 zfree(server
.requirepass
);
9971 server
.requirepass
= zstrdup(o
->ptr
);
9972 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
9973 zfree(server
.masterauth
);
9974 server
.masterauth
= zstrdup(o
->ptr
);
9975 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
9976 if (getLongLongFromObject(o
,&ll
) == REDIS_ERR
||
9977 ll
< 0) goto badfmt
;
9978 server
.maxmemory
= ll
;
9979 } else if (!strcasecmp(c
->argv
[2]->ptr
,"timeout")) {
9980 if (getLongLongFromObject(o
,&ll
) == REDIS_ERR
||
9981 ll
< 0 || ll
> LONG_MAX
) goto badfmt
;
9982 server
.maxidletime
= ll
;
9983 } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendfsync")) {
9984 if (!strcasecmp(o
->ptr
,"no")) {
9985 server
.appendfsync
= APPENDFSYNC_NO
;
9986 } else if (!strcasecmp(o
->ptr
,"everysec")) {
9987 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
9988 } else if (!strcasecmp(o
->ptr
,"always")) {
9989 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
9993 } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendonly")) {
9994 int old
= server
.appendonly
;
9995 int new = yesnotoi(o
->ptr
);
9997 if (new == -1) goto badfmt
;
10002 if (startAppendOnly() == REDIS_ERR
) {
10003 addReplySds(c
,sdscatprintf(sdsempty(),
10004 "-ERR Unable to turn on AOF. Check server logs.\r\n"));
10010 } else if (!strcasecmp(c
->argv
[2]->ptr
,"save")) {
10012 sds
*v
= sdssplitlen(o
->ptr
,sdslen(o
->ptr
)," ",1,&vlen
);
10014 /* Perform sanity check before setting the new config:
10015 * - Even number of args
10016 * - Seconds >= 1, changes >= 0 */
10018 sdsfreesplitres(v
,vlen
);
10021 for (j
= 0; j
< vlen
; j
++) {
10025 val
= strtoll(v
[j
], &eptr
, 10);
10026 if (eptr
[0] != '\0' ||
10027 ((j
& 1) == 0 && val
< 1) ||
10028 ((j
& 1) == 1 && val
< 0)) {
10029 sdsfreesplitres(v
,vlen
);
10033 /* Finally set the new config */
10034 resetServerSaveParams();
10035 for (j
= 0; j
< vlen
; j
+= 2) {
10039 seconds
= strtoll(v
[j
],NULL
,10);
10040 changes
= strtoll(v
[j
+1],NULL
,10);
10041 appendServerSaveParams(seconds
, changes
);
10043 sdsfreesplitres(v
,vlen
);
10045 addReplySds(c
,sdscatprintf(sdsempty(),
10046 "-ERR not supported CONFIG parameter %s\r\n",
10047 (char*)c
->argv
[2]->ptr
));
10052 addReply(c
,shared
.ok
);
10055 badfmt
: /* Bad format errors */
10056 addReplySds(c
,sdscatprintf(sdsempty(),
10057 "-ERR invalid argument '%s' for CONFIG SET '%s'\r\n",
10059 (char*)c
->argv
[2]->ptr
));
10063 static void configGetCommand(redisClient
*c
) {
10064 robj
*o
= getDecodedObject(c
->argv
[2]);
10065 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
10066 char *pattern
= o
->ptr
;
10069 addReply(c
,lenobj
);
10070 decrRefCount(lenobj
);
10072 if (stringmatch(pattern
,"dbfilename",0)) {
10073 addReplyBulkCString(c
,"dbfilename");
10074 addReplyBulkCString(c
,server
.dbfilename
);
10077 if (stringmatch(pattern
,"requirepass",0)) {
10078 addReplyBulkCString(c
,"requirepass");
10079 addReplyBulkCString(c
,server
.requirepass
);
10082 if (stringmatch(pattern
,"masterauth",0)) {
10083 addReplyBulkCString(c
,"masterauth");
10084 addReplyBulkCString(c
,server
.masterauth
);
10087 if (stringmatch(pattern
,"maxmemory",0)) {
10090 ll2string(buf
,128,server
.maxmemory
);
10091 addReplyBulkCString(c
,"maxmemory");
10092 addReplyBulkCString(c
,buf
);
10095 if (stringmatch(pattern
,"timeout",0)) {
10098 ll2string(buf
,128,server
.maxidletime
);
10099 addReplyBulkCString(c
,"timeout");
10100 addReplyBulkCString(c
,buf
);
10103 if (stringmatch(pattern
,"appendonly",0)) {
10104 addReplyBulkCString(c
,"appendonly");
10105 addReplyBulkCString(c
,server
.appendonly
? "yes" : "no");
10108 if (stringmatch(pattern
,"appendfsync",0)) {
10111 switch(server
.appendfsync
) {
10112 case APPENDFSYNC_NO
: policy
= "no"; break;
10113 case APPENDFSYNC_EVERYSEC
: policy
= "everysec"; break;
10114 case APPENDFSYNC_ALWAYS
: policy
= "always"; break;
10115 default: policy
= "unknown"; break; /* too harmless to panic */
10117 addReplyBulkCString(c
,"appendfsync");
10118 addReplyBulkCString(c
,policy
);
10121 if (stringmatch(pattern
,"save",0)) {
10122 sds buf
= sdsempty();
10125 for (j
= 0; j
< server
.saveparamslen
; j
++) {
10126 buf
= sdscatprintf(buf
,"%ld %d",
10127 server
.saveparams
[j
].seconds
,
10128 server
.saveparams
[j
].changes
);
10129 if (j
!= server
.saveparamslen
-1)
10130 buf
= sdscatlen(buf
," ",1);
10132 addReplyBulkCString(c
,"save");
10133 addReplyBulkCString(c
,buf
);
10138 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
10141 static void configCommand(redisClient
*c
) {
10142 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
10143 if (c
->argc
!= 4) goto badarity
;
10144 configSetCommand(c
);
10145 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
10146 if (c
->argc
!= 3) goto badarity
;
10147 configGetCommand(c
);
10148 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
10149 if (c
->argc
!= 2) goto badarity
;
10150 server
.stat_numcommands
= 0;
10151 server
.stat_numconnections
= 0;
10152 server
.stat_expiredkeys
= 0;
10153 server
.stat_starttime
= time(NULL
);
10154 addReply(c
,shared
.ok
);
10156 addReplySds(c
,sdscatprintf(sdsempty(),
10157 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
10162 addReplySds(c
,sdscatprintf(sdsempty(),
10163 "-ERR Wrong number of arguments for CONFIG %s\r\n",
10164 (char*) c
->argv
[1]->ptr
));
10167 /* =========================== Pubsub implementation ======================== */
10169 static void freePubsubPattern(void *p
) {
10170 pubsubPattern
*pat
= p
;
10172 decrRefCount(pat
->pattern
);
10176 static int listMatchPubsubPattern(void *a
, void *b
) {
10177 pubsubPattern
*pa
= a
, *pb
= b
;
10179 return (pa
->client
== pb
->client
) &&
10180 (equalStringObjects(pa
->pattern
,pb
->pattern
));
10183 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
10184 * 0 if the client was already subscribed to that channel. */
10185 static int pubsubSubscribeChannel(redisClient
*c
, robj
*channel
) {
10186 struct dictEntry
*de
;
10187 list
*clients
= NULL
;
10190 /* Add the channel to the client -> channels hash table */
10191 if (dictAdd(c
->pubsub_channels
,channel
,NULL
) == DICT_OK
) {
10193 incrRefCount(channel
);
10194 /* Add the client to the channel -> list of clients hash table */
10195 de
= dictFind(server
.pubsub_channels
,channel
);
10197 clients
= listCreate();
10198 dictAdd(server
.pubsub_channels
,channel
,clients
);
10199 incrRefCount(channel
);
10201 clients
= dictGetEntryVal(de
);
10203 listAddNodeTail(clients
,c
);
10205 /* Notify the client */
10206 addReply(c
,shared
.mbulk3
);
10207 addReply(c
,shared
.subscribebulk
);
10208 addReplyBulk(c
,channel
);
10209 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
10213 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
10214 * 0 if the client was not subscribed to the specified channel. */
10215 static int pubsubUnsubscribeChannel(redisClient
*c
, robj
*channel
, int notify
) {
10216 struct dictEntry
*de
;
10221 /* Remove the channel from the client -> channels hash table */
10222 incrRefCount(channel
); /* channel may be just a pointer to the same object
10223 we have in the hash tables. Protect it... */
10224 if (dictDelete(c
->pubsub_channels
,channel
) == DICT_OK
) {
10226 /* Remove the client from the channel -> clients list hash table */
10227 de
= dictFind(server
.pubsub_channels
,channel
);
10228 assert(de
!= NULL
);
10229 clients
= dictGetEntryVal(de
);
10230 ln
= listSearchKey(clients
,c
);
10231 assert(ln
!= NULL
);
10232 listDelNode(clients
,ln
);
10233 if (listLength(clients
) == 0) {
10234 /* Free the list and associated hash entry at all if this was
10235 * the latest client, so that it will be possible to abuse
10236 * Redis PUBSUB creating millions of channels. */
10237 dictDelete(server
.pubsub_channels
,channel
);
10240 /* Notify the client */
10242 addReply(c
,shared
.mbulk3
);
10243 addReply(c
,shared
.unsubscribebulk
);
10244 addReplyBulk(c
,channel
);
10245 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+
10246 listLength(c
->pubsub_patterns
));
10249 decrRefCount(channel
); /* it is finally safe to release it */
10253 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */
10254 static int pubsubSubscribePattern(redisClient
*c
, robj
*pattern
) {
10257 if (listSearchKey(c
->pubsub_patterns
,pattern
) == NULL
) {
10259 pubsubPattern
*pat
;
10260 listAddNodeTail(c
->pubsub_patterns
,pattern
);
10261 incrRefCount(pattern
);
10262 pat
= zmalloc(sizeof(*pat
));
10263 pat
->pattern
= getDecodedObject(pattern
);
10265 listAddNodeTail(server
.pubsub_patterns
,pat
);
10267 /* Notify the client */
10268 addReply(c
,shared
.mbulk3
);
10269 addReply(c
,shared
.psubscribebulk
);
10270 addReplyBulk(c
,pattern
);
10271 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
10275 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
10276 * 0 if the client was not subscribed to the specified channel. */
10277 static int pubsubUnsubscribePattern(redisClient
*c
, robj
*pattern
, int notify
) {
10282 incrRefCount(pattern
); /* Protect the object. May be the same we remove */
10283 if ((ln
= listSearchKey(c
->pubsub_patterns
,pattern
)) != NULL
) {
10285 listDelNode(c
->pubsub_patterns
,ln
);
10287 pat
.pattern
= pattern
;
10288 ln
= listSearchKey(server
.pubsub_patterns
,&pat
);
10289 listDelNode(server
.pubsub_patterns
,ln
);
10291 /* Notify the client */
10293 addReply(c
,shared
.mbulk3
);
10294 addReply(c
,shared
.punsubscribebulk
);
10295 addReplyBulk(c
,pattern
);
10296 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+
10297 listLength(c
->pubsub_patterns
));
10299 decrRefCount(pattern
);
10303 /* Unsubscribe from all the channels. Return the number of channels the
10304 * client was subscribed from. */
10305 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
) {
10306 dictIterator
*di
= dictGetIterator(c
->pubsub_channels
);
10310 while((de
= dictNext(di
)) != NULL
) {
10311 robj
*channel
= dictGetEntryKey(de
);
10313 count
+= pubsubUnsubscribeChannel(c
,channel
,notify
);
10315 dictReleaseIterator(di
);
10319 /* Unsubscribe from all the patterns. Return the number of patterns the
10320 * client was subscribed from. */
10321 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
) {
10326 listRewind(c
->pubsub_patterns
,&li
);
10327 while ((ln
= listNext(&li
)) != NULL
) {
10328 robj
*pattern
= ln
->value
;
10330 count
+= pubsubUnsubscribePattern(c
,pattern
,notify
);
10335 /* Publish a message */
10336 static int pubsubPublishMessage(robj
*channel
, robj
*message
) {
10338 struct dictEntry
*de
;
10342 /* Send to clients listening for that channel */
10343 de
= dictFind(server
.pubsub_channels
,channel
);
10345 list
*list
= dictGetEntryVal(de
);
10349 listRewind(list
,&li
);
10350 while ((ln
= listNext(&li
)) != NULL
) {
10351 redisClient
*c
= ln
->value
;
10353 addReply(c
,shared
.mbulk3
);
10354 addReply(c
,shared
.messagebulk
);
10355 addReplyBulk(c
,channel
);
10356 addReplyBulk(c
,message
);
10360 /* Send to clients listening to matching channels */
10361 if (listLength(server
.pubsub_patterns
)) {
10362 listRewind(server
.pubsub_patterns
,&li
);
10363 channel
= getDecodedObject(channel
);
10364 while ((ln
= listNext(&li
)) != NULL
) {
10365 pubsubPattern
*pat
= ln
->value
;
10367 if (stringmatchlen((char*)pat
->pattern
->ptr
,
10368 sdslen(pat
->pattern
->ptr
),
10369 (char*)channel
->ptr
,
10370 sdslen(channel
->ptr
),0)) {
10371 addReply(pat
->client
,shared
.mbulk4
);
10372 addReply(pat
->client
,shared
.pmessagebulk
);
10373 addReplyBulk(pat
->client
,pat
->pattern
);
10374 addReplyBulk(pat
->client
,channel
);
10375 addReplyBulk(pat
->client
,message
);
10379 decrRefCount(channel
);
10384 static void subscribeCommand(redisClient
*c
) {
10387 for (j
= 1; j
< c
->argc
; j
++)
10388 pubsubSubscribeChannel(c
,c
->argv
[j
]);
10391 static void unsubscribeCommand(redisClient
*c
) {
10392 if (c
->argc
== 1) {
10393 pubsubUnsubscribeAllChannels(c
,1);
10398 for (j
= 1; j
< c
->argc
; j
++)
10399 pubsubUnsubscribeChannel(c
,c
->argv
[j
],1);
10403 static void psubscribeCommand(redisClient
*c
) {
10406 for (j
= 1; j
< c
->argc
; j
++)
10407 pubsubSubscribePattern(c
,c
->argv
[j
]);
10410 static void punsubscribeCommand(redisClient
*c
) {
10411 if (c
->argc
== 1) {
10412 pubsubUnsubscribeAllPatterns(c
,1);
10417 for (j
= 1; j
< c
->argc
; j
++)
10418 pubsubUnsubscribePattern(c
,c
->argv
[j
],1);
10422 static void publishCommand(redisClient
*c
) {
10423 int receivers
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]);
10424 addReplyLongLong(c
,receivers
);
10427 /* ===================== WATCH (CAS alike for MULTI/EXEC) ===================
10429 * The implementation uses a per-DB hash table mapping keys to list of clients
10430 * WATCHing those keys, so that given a key that is going to be modified
10431 * we can mark all the associated clients as dirty.
10433 * Also every client contains a list of WATCHed keys so that's possible to
10434 * un-watch such keys when the client is freed or when UNWATCH is called. */
10436 /* In the client->watched_keys list we need to use watchedKey structures
10437 * as in order to identify a key in Redis we need both the key name and the
10439 typedef struct watchedKey
{
10444 /* Watch for the specified key */
10445 static void watchForKey(redisClient
*c
, robj
*key
) {
10446 list
*clients
= NULL
;
10451 /* Check if we are already watching for this key */
10452 listRewind(c
->watched_keys
,&li
);
10453 while((ln
= listNext(&li
))) {
10454 wk
= listNodeValue(ln
);
10455 if (wk
->db
== c
->db
&& equalStringObjects(key
,wk
->key
))
10456 return; /* Key already watched */
10458 /* This key is not already watched in this DB. Let's add it */
10459 clients
= dictFetchValue(c
->db
->watched_keys
,key
);
10461 clients
= listCreate();
10462 dictAdd(c
->db
->watched_keys
,key
,clients
);
10465 listAddNodeTail(clients
,c
);
10466 /* Add the new key to the lits of keys watched by this client */
10467 wk
= zmalloc(sizeof(*wk
));
10471 listAddNodeTail(c
->watched_keys
,wk
);
10474 /* Unwatch all the keys watched by this client. To clean the EXEC dirty
10475 * flag is up to the caller. */
10476 static void unwatchAllKeys(redisClient
*c
) {
10480 if (listLength(c
->watched_keys
) == 0) return;
10481 listRewind(c
->watched_keys
,&li
);
10482 while((ln
= listNext(&li
))) {
10486 /* Lookup the watched key -> clients list and remove the client
10488 wk
= listNodeValue(ln
);
10489 clients
= dictFetchValue(wk
->db
->watched_keys
, wk
->key
);
10490 assert(clients
!= NULL
);
10491 listDelNode(clients
,listSearchKey(clients
,c
));
10492 /* Kill the entry at all if this was the only client */
10493 if (listLength(clients
) == 0)
10494 dictDelete(wk
->db
->watched_keys
, wk
->key
);
10495 /* Remove this watched key from the client->watched list */
10496 listDelNode(c
->watched_keys
,ln
);
10497 decrRefCount(wk
->key
);
10502 /* "Touch" a key, so that if this key is being WATCHed by some client the
10503 * next EXEC will fail. */
10504 static void touchWatchedKey(redisDb
*db
, robj
*key
) {
10509 if (dictSize(db
->watched_keys
) == 0) return;
10510 clients
= dictFetchValue(db
->watched_keys
, key
);
10511 if (!clients
) return;
10513 /* Mark all the clients watching this key as REDIS_DIRTY_CAS */
10514 /* Check if we are already watching for this key */
10515 listRewind(clients
,&li
);
10516 while((ln
= listNext(&li
))) {
10517 redisClient
*c
= listNodeValue(ln
);
10519 c
->flags
|= REDIS_DIRTY_CAS
;
10523 /* On FLUSHDB or FLUSHALL all the watched keys that are present before the
10524 * flush but will be deleted as effect of the flushing operation should
10525 * be touched. "dbid" is the DB that's getting the flush. -1 if it is
10526 * a FLUSHALL operation (all the DBs flushed). */
10527 static void touchWatchedKeysOnFlush(int dbid
) {
10531 /* For every client, check all the waited keys */
10532 listRewind(server
.clients
,&li1
);
10533 while((ln
= listNext(&li1
))) {
10534 redisClient
*c
= listNodeValue(ln
);
10535 listRewind(c
->watched_keys
,&li2
);
10536 while((ln
= listNext(&li2
))) {
10537 watchedKey
*wk
= listNodeValue(ln
);
10539 /* For every watched key matching the specified DB, if the
10540 * key exists, mark the client as dirty, as the key will be
10542 if (dbid
== -1 || wk
->db
->id
== dbid
) {
10543 if (dictFind(wk
->db
->dict
, wk
->key
) != NULL
)
10544 c
->flags
|= REDIS_DIRTY_CAS
;
10550 static void watchCommand(redisClient
*c
) {
10553 if (c
->flags
& REDIS_MULTI
) {
10554 addReplySds(c
,sdsnew("-ERR WATCH inside MULTI is not allowed\r\n"));
10557 for (j
= 1; j
< c
->argc
; j
++)
10558 watchForKey(c
,c
->argv
[j
]);
10559 addReply(c
,shared
.ok
);
10562 static void unwatchCommand(redisClient
*c
) {
10564 c
->flags
&= (~REDIS_DIRTY_CAS
);
10565 addReply(c
,shared
.ok
);
10568 /* ================================= Debugging ============================== */
10570 /* Compute the sha1 of string at 's' with 'len' bytes long.
10571 * The SHA1 is then xored againt the string pointed by digest.
10572 * Since xor is commutative, this operation is used in order to
10573 * "add" digests relative to unordered elements.
10575 * So digest(a,b,c,d) will be the same of digest(b,a,c,d) */
10576 static void xorDigest(unsigned char *digest
, void *ptr
, size_t len
) {
10578 unsigned char hash
[20], *s
= ptr
;
10582 SHA1Update(&ctx
,s
,len
);
10583 SHA1Final(hash
,&ctx
);
10585 for (j
= 0; j
< 20; j
++)
10586 digest
[j
] ^= hash
[j
];
10589 static void xorObjectDigest(unsigned char *digest
, robj
*o
) {
10590 o
= getDecodedObject(o
);
10591 xorDigest(digest
,o
->ptr
,sdslen(o
->ptr
));
10595 /* This function instead of just computing the SHA1 and xoring it
10596 * against diget, also perform the digest of "digest" itself and
10597 * replace the old value with the new one.
10599 * So the final digest will be:
10601 * digest = SHA1(digest xor SHA1(data))
10603 * This function is used every time we want to preserve the order so
10604 * that digest(a,b,c,d) will be different than digest(b,c,d,a)
10606 * Also note that mixdigest("foo") followed by mixdigest("bar")
10607 * will lead to a different digest compared to "fo", "obar".
10609 static void mixDigest(unsigned char *digest
, void *ptr
, size_t len
) {
10613 xorDigest(digest
,s
,len
);
10615 SHA1Update(&ctx
,digest
,20);
10616 SHA1Final(digest
,&ctx
);
10619 static void mixObjectDigest(unsigned char *digest
, robj
*o
) {
10620 o
= getDecodedObject(o
);
10621 mixDigest(digest
,o
->ptr
,sdslen(o
->ptr
));
10625 /* Compute the dataset digest. Since keys, sets elements, hashes elements
10626 * are not ordered, we use a trick: every aggregate digest is the xor
10627 * of the digests of their elements. This way the order will not change
10628 * the result. For list instead we use a feedback entering the output digest
10629 * as input in order to ensure that a different ordered list will result in
10630 * a different digest. */
10631 static void computeDatasetDigest(unsigned char *final
) {
10632 unsigned char digest
[20];
10634 dictIterator
*di
= NULL
;
10639 memset(final
,0,20); /* Start with a clean result */
10641 for (j
= 0; j
< server
.dbnum
; j
++) {
10642 redisDb
*db
= server
.db
+j
;
10644 if (dictSize(db
->dict
) == 0) continue;
10645 di
= dictGetIterator(db
->dict
);
10647 /* hash the DB id, so the same dataset moved in a different
10648 * DB will lead to a different digest */
10650 mixDigest(final
,&aux
,sizeof(aux
));
10652 /* Iterate this DB writing every entry */
10653 while((de
= dictNext(di
)) != NULL
) {
10654 robj
*key
, *o
, *kcopy
;
10657 memset(digest
,0,20); /* This key-val digest */
10658 key
= dictGetEntryKey(de
);
10660 if (!server
.vm_enabled
) {
10661 mixObjectDigest(digest
,key
);
10662 o
= dictGetEntryVal(de
);
10664 /* Don't work with the key directly as when VM is active
10665 * this is unsafe: TODO: fix decrRefCount to check if the
10666 * count really reached 0 to avoid this mess */
10667 kcopy
= dupStringObject(key
);
10668 mixObjectDigest(digest
,kcopy
);
10669 o
= lookupKeyRead(db
,kcopy
);
10670 decrRefCount(kcopy
);
10672 aux
= htonl(o
->type
);
10673 mixDigest(digest
,&aux
,sizeof(aux
));
10674 expiretime
= getExpire(db
,key
);
10676 /* Save the key and associated value */
10677 if (o
->type
== REDIS_STRING
) {
10678 mixObjectDigest(digest
,o
);
10679 } else if (o
->type
== REDIS_LIST
) {
10680 list
*list
= o
->ptr
;
10684 listRewind(list
,&li
);
10685 while((ln
= listNext(&li
))) {
10686 robj
*eleobj
= listNodeValue(ln
);
10688 mixObjectDigest(digest
,eleobj
);
10690 } else if (o
->type
== REDIS_SET
) {
10691 dict
*set
= o
->ptr
;
10692 dictIterator
*di
= dictGetIterator(set
);
10695 while((de
= dictNext(di
)) != NULL
) {
10696 robj
*eleobj
= dictGetEntryKey(de
);
10698 xorObjectDigest(digest
,eleobj
);
10700 dictReleaseIterator(di
);
10701 } else if (o
->type
== REDIS_ZSET
) {
10703 dictIterator
*di
= dictGetIterator(zs
->dict
);
10706 while((de
= dictNext(di
)) != NULL
) {
10707 robj
*eleobj
= dictGetEntryKey(de
);
10708 double *score
= dictGetEntryVal(de
);
10709 unsigned char eledigest
[20];
10711 snprintf(buf
,sizeof(buf
),"%.17g",*score
);
10712 memset(eledigest
,0,20);
10713 mixObjectDigest(eledigest
,eleobj
);
10714 mixDigest(eledigest
,buf
,strlen(buf
));
10715 xorDigest(digest
,eledigest
,20);
10717 dictReleaseIterator(di
);
10718 } else if (o
->type
== REDIS_HASH
) {
10722 hi
= hashInitIterator(o
);
10723 while (hashNext(hi
) != REDIS_ERR
) {
10724 unsigned char eledigest
[20];
10726 memset(eledigest
,0,20);
10727 obj
= hashCurrent(hi
,REDIS_HASH_KEY
);
10728 mixObjectDigest(eledigest
,obj
);
10730 obj
= hashCurrent(hi
,REDIS_HASH_VALUE
);
10731 mixObjectDigest(eledigest
,obj
);
10733 xorDigest(digest
,eledigest
,20);
10735 hashReleaseIterator(hi
);
10737 redisPanic("Unknown object type");
10739 /* If the key has an expire, add it to the mix */
10740 if (expiretime
!= -1) xorDigest(digest
,"!!expire!!",10);
10741 /* We can finally xor the key-val digest to the final digest */
10742 xorDigest(final
,digest
,20);
10744 dictReleaseIterator(di
);
10748 static void debugCommand(redisClient
*c
) {
10749 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
10750 *((char*)-1) = 'x';
10751 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
10752 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
10753 addReply(c
,shared
.err
);
10757 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
10758 addReply(c
,shared
.err
);
10761 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
10762 addReply(c
,shared
.ok
);
10763 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
10765 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
10766 addReply(c
,shared
.err
);
10769 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
10770 addReply(c
,shared
.ok
);
10771 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
10772 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
10776 addReply(c
,shared
.nokeyerr
);
10779 key
= dictGetEntryKey(de
);
10780 val
= dictGetEntryVal(de
);
10781 if (!server
.vm_enabled
|| (key
->storage
== REDIS_VM_MEMORY
||
10782 key
->storage
== REDIS_VM_SWAPPING
)) {
10786 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
10787 strenc
= strencoding
[val
->encoding
];
10789 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
10792 addReplySds(c
,sdscatprintf(sdsempty(),
10793 "+Key at:%p refcount:%d, value at:%p refcount:%d "
10794 "encoding:%s serializedlength:%lld\r\n",
10795 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
,
10796 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
10798 addReplySds(c
,sdscatprintf(sdsempty(),
10799 "+Key at:%p refcount:%d, value swapped at: page %llu "
10800 "using %llu pages\r\n",
10801 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
,
10802 (unsigned long long) key
->vm
.usedpages
));
10804 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapin") && c
->argc
== 3) {
10805 lookupKeyRead(c
->db
,c
->argv
[2]);
10806 addReply(c
,shared
.ok
);
10807 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
10808 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
10811 if (!server
.vm_enabled
) {
10812 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
10816 addReply(c
,shared
.nokeyerr
);
10819 key
= dictGetEntryKey(de
);
10820 val
= dictGetEntryVal(de
);
10821 /* If the key is shared we want to create a copy */
10822 if (key
->refcount
> 1) {
10823 robj
*newkey
= dupStringObject(key
);
10825 key
= dictGetEntryKey(de
) = newkey
;
10828 if (key
->storage
!= REDIS_VM_MEMORY
) {
10829 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
10830 } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
10831 dictGetEntryVal(de
) = NULL
;
10832 addReply(c
,shared
.ok
);
10834 addReply(c
,shared
.err
);
10836 } else if (!strcasecmp(c
->argv
[1]->ptr
,"populate") && c
->argc
== 3) {
10841 if (getLongFromObjectOrReply(c
, c
->argv
[2], &keys
, NULL
) != REDIS_OK
)
10843 for (j
= 0; j
< keys
; j
++) {
10844 snprintf(buf
,sizeof(buf
),"key:%lu",j
);
10845 key
= createStringObject(buf
,strlen(buf
));
10846 if (lookupKeyRead(c
->db
,key
) != NULL
) {
10850 snprintf(buf
,sizeof(buf
),"value:%lu",j
);
10851 val
= createStringObject(buf
,strlen(buf
));
10852 dictAdd(c
->db
->dict
,key
,val
);
10854 addReply(c
,shared
.ok
);
10855 } else if (!strcasecmp(c
->argv
[1]->ptr
,"digest") && c
->argc
== 2) {
10856 unsigned char digest
[20];
10857 sds d
= sdsnew("+");
10860 computeDatasetDigest(digest
);
10861 for (j
= 0; j
< 20; j
++)
10862 d
= sdscatprintf(d
, "%02x",digest
[j
]);
10864 d
= sdscatlen(d
,"\r\n",2);
10867 addReplySds(c
,sdsnew(
10868 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n"));
10872 static void _redisAssert(char *estr
, char *file
, int line
) {
10873 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
10874 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true",file
,line
,estr
);
10875 #ifdef HAVE_BACKTRACE
10876 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
10877 *((char*)-1) = 'x';
10881 static void _redisPanic(char *msg
, char *file
, int line
) {
10882 redisLog(REDIS_WARNING
,"!!! Software Failure. Press left mouse button to continue");
10883 redisLog(REDIS_WARNING
,"Guru Meditation: %s #%s:%d",msg
,file
,line
);
10884 #ifdef HAVE_BACKTRACE
10885 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
10886 *((char*)-1) = 'x';
10890 /* =================================== Main! ================================ */
10893 int linuxOvercommitMemoryValue(void) {
10894 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
10897 if (!fp
) return -1;
10898 if (fgets(buf
,64,fp
) == NULL
) {
10907 void linuxOvercommitMemoryWarning(void) {
10908 if (linuxOvercommitMemoryValue() == 0) {
10909 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
10912 #endif /* __linux__ */
10914 static void daemonize(void) {
10918 if (fork() != 0) exit(0); /* parent exits */
10919 setsid(); /* create a new session */
10921 /* Every output goes to /dev/null. If Redis is daemonized but
10922 * the 'logfile' is set to 'stdout' in the configuration file
10923 * it will not log at all. */
10924 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
10925 dup2(fd
, STDIN_FILENO
);
10926 dup2(fd
, STDOUT_FILENO
);
10927 dup2(fd
, STDERR_FILENO
);
10928 if (fd
> STDERR_FILENO
) close(fd
);
10930 /* Try to write the pid file */
10931 fp
= fopen(server
.pidfile
,"w");
10933 fprintf(fp
,"%d\n",getpid());
10938 static void version() {
10939 printf("Redis server version %s (%s:%d)\n", REDIS_VERSION
,
10940 REDIS_GIT_SHA1
, atoi(REDIS_GIT_DIRTY
) > 0);
10944 static void usage() {
10945 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
10946 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
10950 int main(int argc
, char **argv
) {
10953 initServerConfig();
10954 sortCommandTable();
10956 if (strcmp(argv
[1], "-v") == 0 ||
10957 strcmp(argv
[1], "--version") == 0) version();
10958 if (strcmp(argv
[1], "--help") == 0) usage();
10959 resetServerSaveParams();
10960 loadServerConfig(argv
[1]);
10961 } else if ((argc
> 2)) {
10964 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
10966 if (server
.daemonize
) daemonize();
10968 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
10970 linuxOvercommitMemoryWarning();
10972 start
= time(NULL
);
10973 if (server
.appendonly
) {
10974 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
10975 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
10977 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
10978 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
10980 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
10981 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
10983 aeDeleteEventLoop(server
.el
);
10987 /* ============================= Backtrace support ========================= */
10989 #ifdef HAVE_BACKTRACE
10990 static char *findFuncName(void *pointer
, unsigned long *offset
);
10992 static void *getMcontextEip(ucontext_t
*uc
) {
10993 #if defined(__FreeBSD__)
10994 return (void*) uc
->uc_mcontext
.mc_eip
;
10995 #elif defined(__dietlibc__)
10996 return (void*) uc
->uc_mcontext
.eip
;
10997 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
10999 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
11001 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
11003 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
11004 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
11005 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
11007 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
11009 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
11010 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
11011 #elif defined(__ia64__) /* Linux IA64 */
11012 return (void*) uc
->uc_mcontext
.sc_ip
;
11018 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
11020 char **messages
= NULL
;
11021 int i
, trace_size
= 0;
11022 unsigned long offset
=0;
11023 ucontext_t
*uc
= (ucontext_t
*) secret
;
11025 REDIS_NOTUSED(info
);
11027 redisLog(REDIS_WARNING
,
11028 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
11029 infostring
= genRedisInfoString();
11030 redisLog(REDIS_WARNING
, "%s",infostring
);
11031 /* It's not safe to sdsfree() the returned string under memory
11032 * corruption conditions. Let it leak as we are going to abort */
11034 trace_size
= backtrace(trace
, 100);
11035 /* overwrite sigaction with caller's address */
11036 if (getMcontextEip(uc
) != NULL
) {
11037 trace
[1] = getMcontextEip(uc
);
11039 messages
= backtrace_symbols(trace
, trace_size
);
11041 for (i
=1; i
<trace_size
; ++i
) {
11042 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
11044 p
= strchr(messages
[i
],'+');
11045 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
11046 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
11048 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
11051 /* free(messages); Don't call free() with possibly corrupted memory. */
11055 static void sigtermHandler(int sig
) {
11056 REDIS_NOTUSED(sig
);
11058 redisLog(REDIS_WARNING
,"SIGTERM received, scheduling shutting down...");
11059 server
.shutdown_asap
= 1;
11062 static void setupSigSegvAction(void) {
11063 struct sigaction act
;
11065 sigemptyset (&act
.sa_mask
);
11066 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
11067 * is used. Otherwise, sa_handler is used */
11068 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
11069 act
.sa_sigaction
= segvHandler
;
11070 sigaction (SIGSEGV
, &act
, NULL
);
11071 sigaction (SIGBUS
, &act
, NULL
);
11072 sigaction (SIGFPE
, &act
, NULL
);
11073 sigaction (SIGILL
, &act
, NULL
);
11074 sigaction (SIGBUS
, &act
, NULL
);
11076 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
;
11077 act
.sa_handler
= sigtermHandler
;
11078 sigaction (SIGTERM
, &act
, NULL
);
11082 #include "staticsymbols.h"
11083 /* This function try to convert a pointer into a function name. It's used in
11084 * oreder to provide a backtrace under segmentation fault that's able to
11085 * display functions declared as static (otherwise the backtrace is useless). */
11086 static char *findFuncName(void *pointer
, unsigned long *offset
){
11088 unsigned long off
, minoff
= 0;
11090 /* Try to match against the Symbol with the smallest offset */
11091 for (i
=0; symsTable
[i
].pointer
; i
++) {
11092 unsigned long lp
= (unsigned long) pointer
;
11094 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
11095 off
=lp
-symsTable
[i
].pointer
;
11096 if (ret
< 0 || off
< minoff
) {
11102 if (ret
== -1) return NULL
;
11104 return symsTable
[ret
].name
;
11106 #else /* HAVE_BACKTRACE */
11107 static void setupSigSegvAction(void) {
11109 #endif /* HAVE_BACKTRACE */