2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "2.1.0"
45 #endif /* HAVE_BACKTRACE */
53 #include <arpa/inet.h>
57 #include <sys/resource.h>
65 #include "solarisfixes.h"
69 #include "ae.h" /* Event driven programming library */
70 #include "sds.h" /* Dynamic safe strings */
71 #include "anet.h" /* Networking the easy way */
72 #include "dict.h" /* Hash tables */
73 #include "adlist.h" /* Linked lists */
74 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
75 #include "lzf.h" /* LZF compression library */
76 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
77 #include "zipmap.h" /* Compact dictionary-alike data structure */
78 #include "sha1.h" /* SHA1 is used for DEBUG DIGEST */
79 #include "release.h" /* Release and/or git repository information */
85 /* Static server configuration */
86 #define REDIS_SERVERPORT 6379 /* TCP port */
87 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
88 #define REDIS_IOBUF_LEN 1024
89 #define REDIS_LOADBUF_LEN 1024
90 #define REDIS_STATIC_ARGS 8
91 #define REDIS_DEFAULT_DBNUM 16
92 #define REDIS_CONFIGLINE_MAX 1024
93 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
94 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
95 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* lookup 10 expires per loop */
96 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
97 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
99 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
100 #define REDIS_WRITEV_THRESHOLD 3
101 /* Max number of iovecs used for each writev call */
102 #define REDIS_WRITEV_IOVEC_COUNT 256
104 /* Hash table parameters */
105 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
108 #define REDIS_CMD_BULK 1 /* Bulk write command */
109 #define REDIS_CMD_INLINE 2 /* Inline command */
110 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
111 this flags will return an error when the 'maxmemory' option is set in the
112 config file and the server is using more than maxmemory bytes of memory.
113 In short this commands are denied on low memory conditions. */
114 #define REDIS_CMD_DENYOOM 4
115 #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */
118 #define REDIS_STRING 0
124 /* Objects encoding. Some kind of objects like Strings and Hashes can be
125 * internally represented in multiple ways. The 'encoding' field of the object
126 * is set to one of this fields for this object. */
127 #define REDIS_ENCODING_RAW 0 /* Raw representation */
128 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
129 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
130 #define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
132 static char* strencoding
[] = {
133 "raw", "int", "zipmap", "hashtable"
136 /* Object types only used for dumping to disk */
137 #define REDIS_EXPIRETIME 253
138 #define REDIS_SELECTDB 254
139 #define REDIS_EOF 255
141 /* Defines related to the dump file format. To store 32 bits lengths for short
142 * keys requires a lot of space, so we check the most significant 2 bits of
143 * the first byte to interpreter the length:
145 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
146 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
147 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
148 * 11|000000 this means: specially encoded object will follow. The six bits
149 * number specify the kind of object that follows.
150 * See the REDIS_RDB_ENC_* defines.
152 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
153 * values, will fit inside. */
154 #define REDIS_RDB_6BITLEN 0
155 #define REDIS_RDB_14BITLEN 1
156 #define REDIS_RDB_32BITLEN 2
157 #define REDIS_RDB_ENCVAL 3
158 #define REDIS_RDB_LENERR UINT_MAX
160 /* When a length of a string object stored on disk has the first two bits
161 * set, the remaining two bits specify a special encoding for the object
162 * accordingly to the following defines: */
163 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
164 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
165 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
166 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
168 /* Virtual memory object->where field. */
169 #define REDIS_VM_MEMORY 0 /* The object is on memory */
170 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
171 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
172 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
174 /* Virtual memory static configuration stuff.
175 * Check vmFindContiguousPages() to know more about this magic numbers. */
176 #define REDIS_VM_MAX_NEAR_PAGES 65536
177 #define REDIS_VM_MAX_RANDOM_JUMP 4096
178 #define REDIS_VM_MAX_THREADS 32
179 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
180 /* The following is the *percentage* of completed I/O jobs to process when the
181 * handelr is called. While Virtual Memory I/O operations are performed by
182 * threads, this operations must be processed by the main thread when completed
183 * in order to take effect. */
184 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
187 #define REDIS_SLAVE 1 /* This client is a slave server */
188 #define REDIS_MASTER 2 /* This client is a master server */
189 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
190 #define REDIS_MULTI 8 /* This client is in a MULTI context */
191 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
192 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
193 #define REDIS_DIRTY_CAS 64 /* Watched keys modified. EXEC will fail. */
195 /* Slave replication state - slave side */
196 #define REDIS_REPL_NONE 0 /* No active replication */
197 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
198 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
200 /* Slave replication state - from the point of view of master
201 * Note that in SEND_BULK and ONLINE state the slave receives new updates
202 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
203 * to start the next background saving in order to send updates to it. */
204 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
205 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
206 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
207 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
209 /* List related stuff */
213 /* Sort operations */
214 #define REDIS_SORT_GET 0
215 #define REDIS_SORT_ASC 1
216 #define REDIS_SORT_DESC 2
217 #define REDIS_SORTKEY_MAX 1024
220 #define REDIS_DEBUG 0
221 #define REDIS_VERBOSE 1
222 #define REDIS_NOTICE 2
223 #define REDIS_WARNING 3
225 /* Anti-warning macro... */
226 #define REDIS_NOTUSED(V) ((void) V)
228 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
229 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
231 /* Append only defines */
232 #define APPENDFSYNC_NO 0
233 #define APPENDFSYNC_ALWAYS 1
234 #define APPENDFSYNC_EVERYSEC 2
236 /* Hashes related defaults */
237 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
238 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
240 /* We can print the stacktrace, so our assert is defined this way: */
241 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
242 #define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1)
243 static void _redisAssert(char *estr
, char *file
, int line
);
244 static void _redisPanic(char *msg
, char *file
, int line
);
246 /*================================= Data types ============================== */
248 /* A redis object, that is a type able to hold a string / list / set */
250 /* The VM object structure */
251 struct redisObjectVM
{
252 off_t page
; /* the page at witch the object is stored on disk */
253 off_t usedpages
; /* number of pages used on disk */
254 time_t atime
; /* Last access time */
257 /* The actual Redis Object */
258 typedef struct redisObject
{
261 unsigned char encoding
;
262 unsigned char storage
; /* If this object is a key, where is the value?
263 * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
264 unsigned char vtype
; /* If this object is a key, and value is swapped out,
265 * this is the type of the swapped out object. */
267 /* VM fields, this are only allocated if VM is active, otherwise the
268 * object allocation function will just allocate
269 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
270 * Redis without VM active will not have any overhead. */
271 struct redisObjectVM vm
;
274 /* Macro used to initalize a Redis object allocated on the stack.
275 * Note that this macro is taken near the structure definition to make sure
276 * we'll update it when the structure is changed, to avoid bugs like
277 * bug #85 introduced exactly in this way. */
278 #define initStaticStringObject(_var,_ptr) do { \
280 _var.type = REDIS_STRING; \
281 _var.encoding = REDIS_ENCODING_RAW; \
283 if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \
286 typedef struct redisDb
{
287 dict
*dict
; /* The keyspace for this DB */
288 dict
*expires
; /* Timeout of keys with a timeout set */
289 dict
*blocking_keys
; /* Keys with clients waiting for data (BLPOP) */
290 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
291 dict
*watched_keys
; /* WATCHED keys for MULTI/EXEC CAS */
295 /* Client MULTI/EXEC state */
296 typedef struct multiCmd
{
299 struct redisCommand
*cmd
;
302 typedef struct multiState
{
303 multiCmd
*commands
; /* Array of MULTI commands */
304 int count
; /* Total number of MULTI commands */
307 /* With multiplexing we need to take per-clinet state.
308 * Clients are taken in a liked list. */
309 typedef struct redisClient
{
314 robj
**argv
, **mbargv
;
316 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
317 int multibulk
; /* multi bulk command format active */
320 time_t lastinteraction
; /* time of the last interaction, used for timeout */
321 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
322 int slaveseldb
; /* slave selected db, if this client is a slave */
323 int authenticated
; /* when requirepass is non-NULL */
324 int replstate
; /* replication state if this is a slave */
325 int repldbfd
; /* replication DB file descriptor */
326 long repldboff
; /* replication DB file offset */
327 off_t repldbsize
; /* replication DB file size */
328 multiState mstate
; /* MULTI/EXEC state */
329 robj
**blocking_keys
; /* The key we are waiting to terminate a blocking
330 * operation such as BLPOP. Otherwise NULL. */
331 int blocking_keys_num
; /* Number of blocking keys */
332 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
333 * is >= blockingto then the operation timed out. */
334 list
*io_keys
; /* Keys this client is waiting to be loaded from the
335 * swap file in order to continue. */
336 list
*watched_keys
; /* Keys WATCHED for MULTI/EXEC CAS */
337 dict
*pubsub_channels
; /* channels a client is interested in (SUBSCRIBE) */
338 list
*pubsub_patterns
; /* patterns a client is interested in (SUBSCRIBE) */
346 /* Global server state structure */
351 long long dirty
; /* changes to DB from the last save */
353 list
*slaves
, *monitors
;
354 char neterr
[ANET_ERR_LEN
];
356 int cronloops
; /* number of times the cron function run */
357 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
358 time_t lastsave
; /* Unix time of last save succeeede */
359 /* Fields used only for stats */
360 time_t stat_starttime
; /* server start time */
361 long long stat_numcommands
; /* number of processed commands */
362 long long stat_numconnections
; /* number of connections received */
363 long long stat_expiredkeys
; /* number of expired keys */
377 pid_t bgsavechildpid
;
378 pid_t bgrewritechildpid
;
379 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
380 sds aofbuf
; /* AOF buffer, written before entering the event loop */
381 struct saveparam
*saveparams
;
386 char *appendfilename
;
390 /* Replication related */
395 redisClient
*master
; /* client that is master for this slave */
397 unsigned int maxclients
;
398 unsigned long long maxmemory
;
399 unsigned int blpop_blocked_clients
;
400 unsigned int vm_blocked_clients
;
401 /* Sort parameters - qsort_r() is only available under BSD so we
402 * have to take this state global, in order to pass it to sortCompare() */
406 /* Virtual memory configuration */
411 unsigned long long vm_max_memory
;
413 size_t hash_max_zipmap_entries
;
414 size_t hash_max_zipmap_value
;
415 /* Virtual memory state */
418 off_t vm_next_page
; /* Next probably empty page */
419 off_t vm_near_pages
; /* Number of pages allocated sequentially */
420 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
421 time_t unixtime
; /* Unix time sampled every second. */
422 /* Virtual memory I/O threads stuff */
423 /* An I/O thread process an element taken from the io_jobs queue and
424 * put the result of the operation in the io_done list. While the
425 * job is being processed, it's put on io_processing queue. */
426 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
427 list
*io_processing
; /* List of VM I/O jobs being processed */
428 list
*io_processed
; /* List of VM I/O jobs already processed */
429 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
430 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
431 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
432 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
433 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
434 int io_active_threads
; /* Number of running I/O threads */
435 int vm_max_threads
; /* Max number of I/O threads running at the same time */
436 /* Our main thread is blocked on the event loop, locking for sockets ready
437 * to be read or written, so when a threaded I/O operation is ready to be
438 * processed by the main thread, the I/O thread will use a unix pipe to
439 * awake the main thread. The followings are the two pipe FDs. */
440 int io_ready_pipe_read
;
441 int io_ready_pipe_write
;
442 /* Virtual memory stats */
443 unsigned long long vm_stats_used_pages
;
444 unsigned long long vm_stats_swapped_objects
;
445 unsigned long long vm_stats_swapouts
;
446 unsigned long long vm_stats_swapins
;
448 dict
*pubsub_channels
; /* Map channels to list of subscribed clients */
449 list
*pubsub_patterns
; /* A list of pubsub_patterns */
454 typedef struct pubsubPattern
{
459 typedef void redisCommandProc(redisClient
*c
);
460 typedef void redisVmPreloadProc(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
461 struct redisCommand
{
463 redisCommandProc
*proc
;
466 /* Use a function to determine which keys need to be loaded
467 * in the background prior to executing this command. Takes precedence
468 * over vm_firstkey and others, ignored when NULL */
469 redisVmPreloadProc
*vm_preload_proc
;
470 /* What keys should be loaded in background when calling this command? */
471 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
472 int vm_lastkey
; /* THe last argument that's a key */
473 int vm_keystep
; /* The step between first and last key */
476 struct redisFunctionSym
{
478 unsigned long pointer
;
481 typedef struct _redisSortObject
{
489 typedef struct _redisSortOperation
{
492 } redisSortOperation
;
494 /* ZSETs use a specialized version of Skiplists */
496 typedef struct zskiplistNode
{
497 struct zskiplistNode
**forward
;
498 struct zskiplistNode
*backward
;
504 typedef struct zskiplist
{
505 struct zskiplistNode
*header
, *tail
;
506 unsigned long length
;
510 typedef struct zset
{
515 /* Our shared "common" objects */
517 #define REDIS_SHARED_INTEGERS 10000
518 struct sharedObjectsStruct
{
519 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
520 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
521 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
522 *outofrangeerr
, *plus
,
523 *select0
, *select1
, *select2
, *select3
, *select4
,
524 *select5
, *select6
, *select7
, *select8
, *select9
,
525 *messagebulk
, *pmessagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
,
526 *mbulk4
, *psubscribebulk
, *punsubscribebulk
,
527 *integers
[REDIS_SHARED_INTEGERS
];
530 /* Global vars that are actally used as constants. The following double
531 * values are used for double on-disk serialization, and are initialized
532 * at runtime to avoid strange compiler optimizations. */
534 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
536 /* VM threaded I/O request message */
537 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
538 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
539 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
540 typedef struct iojob
{
541 int type
; /* Request type, REDIS_IOJOB_* */
542 redisDb
*db
;/* Redis database */
543 robj
*key
; /* This I/O request is about swapping this key */
544 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
545 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
546 off_t page
; /* Swap page where to read/write the object */
547 off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */
548 int canceled
; /* True if this command was canceled by blocking side of VM */
549 pthread_t thread
; /* ID of the thread processing this entry */
552 /*================================ Prototypes =============================== */
554 static void freeStringObject(robj
*o
);
555 static void freeListObject(robj
*o
);
556 static void freeSetObject(robj
*o
);
557 static void decrRefCount(void *o
);
558 static robj
*createObject(int type
, void *ptr
);
559 static void freeClient(redisClient
*c
);
560 static int rdbLoad(char *filename
);
561 static void addReply(redisClient
*c
, robj
*obj
);
562 static void addReplySds(redisClient
*c
, sds s
);
563 static void incrRefCount(robj
*o
);
564 static int rdbSaveBackground(char *filename
);
565 static robj
*createStringObject(char *ptr
, size_t len
);
566 static robj
*dupStringObject(robj
*o
);
567 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
);
568 static void replicationFeedMonitors(list
*monitors
, int dictid
, robj
**argv
, int argc
);
569 static void flushAppendOnlyFile(void);
570 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
571 static int syncWithMaster(void);
572 static robj
*tryObjectEncoding(robj
*o
);
573 static robj
*getDecodedObject(robj
*o
);
574 static int removeExpire(redisDb
*db
, robj
*key
);
575 static int expireIfNeeded(redisDb
*db
, robj
*key
);
576 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
577 static int deleteIfSwapped(redisDb
*db
, robj
*key
);
578 static int deleteKey(redisDb
*db
, robj
*key
);
579 static time_t getExpire(redisDb
*db
, robj
*key
);
580 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
581 static void updateSlavesWaitingBgsave(int bgsaveerr
);
582 static void freeMemoryIfNeeded(void);
583 static int processCommand(redisClient
*c
);
584 static void setupSigSegvAction(void);
585 static void rdbRemoveTempFile(pid_t childpid
);
586 static void aofRemoveTempFile(pid_t childpid
);
587 static size_t stringObjectLen(robj
*o
);
588 static void processInputBuffer(redisClient
*c
);
589 static zskiplist
*zslCreate(void);
590 static void zslFree(zskiplist
*zsl
);
591 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
592 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
593 static void initClientMultiState(redisClient
*c
);
594 static void freeClientMultiState(redisClient
*c
);
595 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
596 static void unblockClientWaitingData(redisClient
*c
);
597 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
598 static void vmInit(void);
599 static void vmMarkPagesFree(off_t page
, off_t count
);
600 static robj
*vmLoadObject(robj
*key
);
601 static robj
*vmPreviewObject(robj
*key
);
602 static int vmSwapOneObjectBlocking(void);
603 static int vmSwapOneObjectThreaded(void);
604 static int vmCanSwapOut(void);
605 static int tryFreeOneObjectFromFreelist(void);
606 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
607 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
608 static void vmCancelThreadedIOJob(robj
*o
);
609 static void lockThreadedIO(void);
610 static void unlockThreadedIO(void);
611 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
612 static void freeIOJob(iojob
*j
);
613 static void queueIOJob(iojob
*j
);
614 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
615 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
616 static void waitEmptyIOJobsQueue(void);
617 static void vmReopenSwapFile(void);
618 static int vmFreePage(off_t page
);
619 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
620 static void execBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
621 static int blockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
);
622 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
623 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
624 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
625 static struct redisCommand
*lookupCommand(char *name
);
626 static void call(redisClient
*c
, struct redisCommand
*cmd
);
627 static void resetClient(redisClient
*c
);
628 static void convertToRealHash(robj
*o
);
629 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
);
630 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
);
631 static void freePubsubPattern(void *p
);
632 static int listMatchPubsubPattern(void *a
, void *b
);
633 static int compareStringObjects(robj
*a
, robj
*b
);
634 static int equalStringObjects(robj
*a
, robj
*b
);
636 static int rewriteAppendOnlyFileBackground(void);
637 static int vmSwapObjectBlocking(robj
*key
, robj
*val
);
638 static int prepareForShutdown();
639 static void touchWatchedKey(redisDb
*db
, robj
*key
);
640 static void unwatchAllKeys(redisClient
*c
);
642 static void authCommand(redisClient
*c
);
643 static void pingCommand(redisClient
*c
);
644 static void echoCommand(redisClient
*c
);
645 static void setCommand(redisClient
*c
);
646 static void setnxCommand(redisClient
*c
);
647 static void setexCommand(redisClient
*c
);
648 static void getCommand(redisClient
*c
);
649 static void delCommand(redisClient
*c
);
650 static void existsCommand(redisClient
*c
);
651 static void incrCommand(redisClient
*c
);
652 static void decrCommand(redisClient
*c
);
653 static void incrbyCommand(redisClient
*c
);
654 static void decrbyCommand(redisClient
*c
);
655 static void selectCommand(redisClient
*c
);
656 static void randomkeyCommand(redisClient
*c
);
657 static void keysCommand(redisClient
*c
);
658 static void dbsizeCommand(redisClient
*c
);
659 static void lastsaveCommand(redisClient
*c
);
660 static void saveCommand(redisClient
*c
);
661 static void bgsaveCommand(redisClient
*c
);
662 static void bgrewriteaofCommand(redisClient
*c
);
663 static void shutdownCommand(redisClient
*c
);
664 static void moveCommand(redisClient
*c
);
665 static void renameCommand(redisClient
*c
);
666 static void renamenxCommand(redisClient
*c
);
667 static void lpushCommand(redisClient
*c
);
668 static void rpushCommand(redisClient
*c
);
669 static void lpopCommand(redisClient
*c
);
670 static void rpopCommand(redisClient
*c
);
671 static void llenCommand(redisClient
*c
);
672 static void lindexCommand(redisClient
*c
);
673 static void lrangeCommand(redisClient
*c
);
674 static void ltrimCommand(redisClient
*c
);
675 static void typeCommand(redisClient
*c
);
676 static void lsetCommand(redisClient
*c
);
677 static void saddCommand(redisClient
*c
);
678 static void sremCommand(redisClient
*c
);
679 static void smoveCommand(redisClient
*c
);
680 static void sismemberCommand(redisClient
*c
);
681 static void scardCommand(redisClient
*c
);
682 static void spopCommand(redisClient
*c
);
683 static void srandmemberCommand(redisClient
*c
);
684 static void sinterCommand(redisClient
*c
);
685 static void sinterstoreCommand(redisClient
*c
);
686 static void sunionCommand(redisClient
*c
);
687 static void sunionstoreCommand(redisClient
*c
);
688 static void sdiffCommand(redisClient
*c
);
689 static void sdiffstoreCommand(redisClient
*c
);
690 static void syncCommand(redisClient
*c
);
691 static void flushdbCommand(redisClient
*c
);
692 static void flushallCommand(redisClient
*c
);
693 static void sortCommand(redisClient
*c
);
694 static void lremCommand(redisClient
*c
);
695 static void rpoplpushcommand(redisClient
*c
);
696 static void infoCommand(redisClient
*c
);
697 static void mgetCommand(redisClient
*c
);
698 static void monitorCommand(redisClient
*c
);
699 static void expireCommand(redisClient
*c
);
700 static void expireatCommand(redisClient
*c
);
701 static void getsetCommand(redisClient
*c
);
702 static void ttlCommand(redisClient
*c
);
703 static void slaveofCommand(redisClient
*c
);
704 static void debugCommand(redisClient
*c
);
705 static void msetCommand(redisClient
*c
);
706 static void msetnxCommand(redisClient
*c
);
707 static void zaddCommand(redisClient
*c
);
708 static void zincrbyCommand(redisClient
*c
);
709 static void zrangeCommand(redisClient
*c
);
710 static void zrangebyscoreCommand(redisClient
*c
);
711 static void zcountCommand(redisClient
*c
);
712 static void zrevrangeCommand(redisClient
*c
);
713 static void zcardCommand(redisClient
*c
);
714 static void zremCommand(redisClient
*c
);
715 static void zscoreCommand(redisClient
*c
);
716 static void zremrangebyscoreCommand(redisClient
*c
);
717 static void multiCommand(redisClient
*c
);
718 static void execCommand(redisClient
*c
);
719 static void discardCommand(redisClient
*c
);
720 static void blpopCommand(redisClient
*c
);
721 static void brpopCommand(redisClient
*c
);
722 static void appendCommand(redisClient
*c
);
723 static void substrCommand(redisClient
*c
);
724 static void zrankCommand(redisClient
*c
);
725 static void zrevrankCommand(redisClient
*c
);
726 static void hsetCommand(redisClient
*c
);
727 static void hsetnxCommand(redisClient
*c
);
728 static void hgetCommand(redisClient
*c
);
729 static void hmsetCommand(redisClient
*c
);
730 static void hmgetCommand(redisClient
*c
);
731 static void hdelCommand(redisClient
*c
);
732 static void hlenCommand(redisClient
*c
);
733 static void zremrangebyrankCommand(redisClient
*c
);
734 static void zunionstoreCommand(redisClient
*c
);
735 static void zinterstoreCommand(redisClient
*c
);
736 static void hkeysCommand(redisClient
*c
);
737 static void hvalsCommand(redisClient
*c
);
738 static void hgetallCommand(redisClient
*c
);
739 static void hexistsCommand(redisClient
*c
);
740 static void configCommand(redisClient
*c
);
741 static void hincrbyCommand(redisClient
*c
);
742 static void subscribeCommand(redisClient
*c
);
743 static void unsubscribeCommand(redisClient
*c
);
744 static void psubscribeCommand(redisClient
*c
);
745 static void punsubscribeCommand(redisClient
*c
);
746 static void publishCommand(redisClient
*c
);
747 static void watchCommand(redisClient
*c
);
748 static void unwatchCommand(redisClient
*c
);
750 /*================================= Globals ================================= */
753 static struct redisServer server
; /* server global state */
754 static struct redisCommand cmdTable
[] = {
755 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
756 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
757 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
758 {"setex",setexCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
759 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
760 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
761 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
762 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
763 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
764 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
765 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
766 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
767 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
768 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
769 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
770 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
771 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
772 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
773 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
774 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
775 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
776 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
777 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
778 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
779 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
780 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
781 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
782 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
783 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
784 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
785 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
786 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
787 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
788 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
789 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
790 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
791 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
792 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
793 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
794 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
795 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
796 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
797 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
798 {"zunionstore",zunionstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
799 {"zinterstore",zinterstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
800 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
801 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
802 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
803 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
804 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
805 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
806 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
807 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
808 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
809 {"hsetnx",hsetnxCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
810 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
811 {"hmset",hmsetCommand
,-4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
812 {"hmget",hmgetCommand
,-3,REDIS_CMD_BULK
,NULL
,1,1,1},
813 {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
814 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
815 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
816 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
817 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
818 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
819 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
820 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
821 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
822 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
823 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
824 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
825 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
826 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
827 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
828 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
829 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
830 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
831 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
832 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
833 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
834 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
835 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
836 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
837 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
838 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
839 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
840 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
841 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
842 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
843 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
844 {"exec",execCommand
,1,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,execBlockClientOnSwappedKeys
,0,0,0},
845 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
846 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
847 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
848 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
849 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
850 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
851 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
852 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
853 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
854 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
855 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
856 {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
857 {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
858 {"psubscribe",psubscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
859 {"punsubscribe",punsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
860 {"publish",publishCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_FORCE_REPLICATION
,NULL
,0,0,0},
861 {"watch",watchCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
862 {"unwatch",unwatchCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
863 {NULL
,NULL
,0,0,NULL
,0,0,0}
866 /*============================ Utility functions ============================ */
868 /* Glob-style pattern matching. */
869 static int stringmatchlen(const char *pattern
, int patternLen
,
870 const char *string
, int stringLen
, int nocase
)
875 while (pattern
[1] == '*') {
880 return 1; /* match */
882 if (stringmatchlen(pattern
+1, patternLen
-1,
883 string
, stringLen
, nocase
))
884 return 1; /* match */
888 return 0; /* no match */
892 return 0; /* no match */
902 not = pattern
[0] == '^';
909 if (pattern
[0] == '\\') {
912 if (pattern
[0] == string
[0])
914 } else if (pattern
[0] == ']') {
916 } else if (patternLen
== 0) {
920 } else if (pattern
[1] == '-' && patternLen
>= 3) {
921 int start
= pattern
[0];
922 int end
= pattern
[2];
930 start
= tolower(start
);
936 if (c
>= start
&& c
<= end
)
940 if (pattern
[0] == string
[0])
943 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
953 return 0; /* no match */
959 if (patternLen
>= 2) {
966 if (pattern
[0] != string
[0])
967 return 0; /* no match */
969 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
970 return 0; /* no match */
978 if (stringLen
== 0) {
979 while(*pattern
== '*') {
986 if (patternLen
== 0 && stringLen
== 0)
991 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
992 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
995 /* Convert a string representing an amount of memory into the number of
996 * bytes, so for instance memtoll("1Gi") will return 1073741824 that is
999 * On parsing error, if *err is not NULL, it's set to 1, otherwise it's
1001 static long long memtoll(const char *p
, int *err
) {
1004 long mul
; /* unit multiplier */
1006 unsigned int digits
;
1009 /* Search the first non digit character. */
1012 while(*u
&& isdigit(*u
)) u
++;
1013 if (*u
== '\0' || !strcasecmp(u
,"b")) {
1015 } else if (!strcasecmp(u
,"k")) {
1017 } else if (!strcasecmp(u
,"kb")) {
1019 } else if (!strcasecmp(u
,"m")) {
1021 } else if (!strcasecmp(u
,"mb")) {
1023 } else if (!strcasecmp(u
,"g")) {
1024 mul
= 1000L*1000*1000;
1025 } else if (!strcasecmp(u
,"gb")) {
1026 mul
= 1024L*1024*1024;
1032 if (digits
>= sizeof(buf
)) {
1036 memcpy(buf
,p
,digits
);
1038 val
= strtoll(buf
,NULL
,10);
1042 /* Convert a long long into a string. Returns the number of
1043 * characters needed to represent the number, that can be shorter if passed
1044 * buffer length is not enough to store the whole number. */
1045 static int ll2string(char *s
, size_t len
, long long value
) {
1047 unsigned long long v
;
1050 if (len
== 0) return 0;
1051 v
= (value
< 0) ? -value
: value
;
1052 p
= buf
+31; /* point to the last character */
1057 if (value
< 0) *p
-- = '-';
1060 if (l
+1 > len
) l
= len
-1; /* Make sure it fits, including the nul term */
1066 static void redisLog(int level
, const char *fmt
, ...) {
1070 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
1074 if (level
>= server
.verbosity
) {
1080 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
1081 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
1082 vfprintf(fp
, fmt
, ap
);
1088 if (server
.logfile
) fclose(fp
);
1091 /*====================== Hash table type implementation ==================== */
1093 /* This is an hash table type that uses the SDS dynamic strings libary as
1094 * keys and radis objects as values (objects can hold SDS strings,
1097 static void dictVanillaFree(void *privdata
, void *val
)
1099 DICT_NOTUSED(privdata
);
1103 static void dictListDestructor(void *privdata
, void *val
)
1105 DICT_NOTUSED(privdata
);
1106 listRelease((list
*)val
);
1109 static int sdsDictKeyCompare(void *privdata
, const void *key1
,
1113 DICT_NOTUSED(privdata
);
1115 l1
= sdslen((sds
)key1
);
1116 l2
= sdslen((sds
)key2
);
1117 if (l1
!= l2
) return 0;
1118 return memcmp(key1
, key2
, l1
) == 0;
1121 static void dictRedisObjectDestructor(void *privdata
, void *val
)
1123 DICT_NOTUSED(privdata
);
1125 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
1129 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1132 const robj
*o1
= key1
, *o2
= key2
;
1133 return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1136 static unsigned int dictObjHash(const void *key
) {
1137 const robj
*o
= key
;
1138 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1141 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1144 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1147 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1148 o2
->encoding
== REDIS_ENCODING_INT
)
1149 return o1
->ptr
== o2
->ptr
;
1151 o1
= getDecodedObject(o1
);
1152 o2
= getDecodedObject(o2
);
1153 cmp
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1159 static unsigned int dictEncObjHash(const void *key
) {
1160 robj
*o
= (robj
*) key
;
1162 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1163 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1165 if (o
->encoding
== REDIS_ENCODING_INT
) {
1169 len
= ll2string(buf
,32,(long)o
->ptr
);
1170 return dictGenHashFunction((unsigned char*)buf
, len
);
1174 o
= getDecodedObject(o
);
1175 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1182 /* Sets type and expires */
1183 static dictType setDictType
= {
1184 dictEncObjHash
, /* hash function */
1187 dictEncObjKeyCompare
, /* key compare */
1188 dictRedisObjectDestructor
, /* key destructor */
1189 NULL
/* val destructor */
1192 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1193 static dictType zsetDictType
= {
1194 dictEncObjHash
, /* hash function */
1197 dictEncObjKeyCompare
, /* key compare */
1198 dictRedisObjectDestructor
, /* key destructor */
1199 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1203 static dictType dbDictType
= {
1204 dictObjHash
, /* hash function */
1207 dictObjKeyCompare
, /* key compare */
1208 dictRedisObjectDestructor
, /* key destructor */
1209 dictRedisObjectDestructor
/* val destructor */
1213 static dictType keyptrDictType
= {
1214 dictObjHash
, /* hash function */
1217 dictObjKeyCompare
, /* key compare */
1218 dictRedisObjectDestructor
, /* key destructor */
1219 NULL
/* val destructor */
1222 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1223 static dictType hashDictType
= {
1224 dictEncObjHash
, /* hash function */
1227 dictEncObjKeyCompare
, /* key compare */
1228 dictRedisObjectDestructor
, /* key destructor */
1229 dictRedisObjectDestructor
/* val destructor */
1232 /* Keylist hash table type has unencoded redis objects as keys and
1233 * lists as values. It's used for blocking operations (BLPOP) and to
1234 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1235 static dictType keylistDictType
= {
1236 dictObjHash
, /* hash function */
1239 dictObjKeyCompare
, /* key compare */
1240 dictRedisObjectDestructor
, /* key destructor */
1241 dictListDestructor
/* val destructor */
1244 static void version();
1246 /* ========================= Random utility functions ======================= */
1248 /* Redis generally does not try to recover from out of memory conditions
1249 * when allocating objects or strings, it is not clear if it will be possible
1250 * to report this condition to the client since the networking layer itself
1251 * is based on heap allocation for send buffers, so we simply abort.
1252 * At least the code will be simpler to read... */
1253 static void oom(const char *msg
) {
1254 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1259 /* ====================== Redis server networking stuff ===================== */
1260 static void closeTimedoutClients(void) {
1263 time_t now
= time(NULL
);
1266 listRewind(server
.clients
,&li
);
1267 while ((ln
= listNext(&li
)) != NULL
) {
1268 c
= listNodeValue(ln
);
1269 if (server
.maxidletime
&&
1270 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1271 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1272 dictSize(c
->pubsub_channels
) == 0 && /* no timeout for pubsub */
1273 listLength(c
->pubsub_patterns
) == 0 &&
1274 (now
- c
->lastinteraction
> server
.maxidletime
))
1276 redisLog(REDIS_VERBOSE
,"Closing idle client");
1278 } else if (c
->flags
& REDIS_BLOCKED
) {
1279 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1280 addReply(c
,shared
.nullmultibulk
);
1281 unblockClientWaitingData(c
);
1287 static int htNeedsResize(dict
*dict
) {
1288 long long size
, used
;
1290 size
= dictSlots(dict
);
1291 used
= dictSize(dict
);
1292 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1293 (used
*100/size
< REDIS_HT_MINFILL
));
1296 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1297 * we resize the hash table to save memory */
1298 static void tryResizeHashTables(void) {
1301 for (j
= 0; j
< server
.dbnum
; j
++) {
1302 if (htNeedsResize(server
.db
[j
].dict
))
1303 dictResize(server
.db
[j
].dict
);
1304 if (htNeedsResize(server
.db
[j
].expires
))
1305 dictResize(server
.db
[j
].expires
);
1309 /* Our hash table implementation performs rehashing incrementally while
1310 * we write/read from the hash table. Still if the server is idle, the hash
1311 * table will use two tables for a long time. So we try to use 1 millisecond
1312 * of CPU time at every serverCron() loop in order to rehash some key. */
1313 static void incrementallyRehash(void) {
1316 for (j
= 0; j
< server
.dbnum
; j
++) {
1317 if (dictIsRehashing(server
.db
[j
].dict
)) {
1318 dictRehashMilliseconds(server
.db
[j
].dict
,1);
1319 break; /* already used our millisecond for this loop... */
1324 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1325 void backgroundSaveDoneHandler(int statloc
) {
1326 int exitcode
= WEXITSTATUS(statloc
);
1327 int bysignal
= WIFSIGNALED(statloc
);
1329 if (!bysignal
&& exitcode
== 0) {
1330 redisLog(REDIS_NOTICE
,
1331 "Background saving terminated with success");
1333 server
.lastsave
= time(NULL
);
1334 } else if (!bysignal
&& exitcode
!= 0) {
1335 redisLog(REDIS_WARNING
, "Background saving error");
1337 redisLog(REDIS_WARNING
,
1338 "Background saving terminated by signal %d", WTERMSIG(statloc
));
1339 rdbRemoveTempFile(server
.bgsavechildpid
);
1341 server
.bgsavechildpid
= -1;
1342 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1343 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1344 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1347 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1349 void backgroundRewriteDoneHandler(int statloc
) {
1350 int exitcode
= WEXITSTATUS(statloc
);
1351 int bysignal
= WIFSIGNALED(statloc
);
1353 if (!bysignal
&& exitcode
== 0) {
1357 redisLog(REDIS_NOTICE
,
1358 "Background append only file rewriting terminated with success");
1359 /* Now it's time to flush the differences accumulated by the parent */
1360 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1361 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1363 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1366 /* Flush our data... */
1367 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1368 (signed) sdslen(server
.bgrewritebuf
)) {
1369 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1373 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1374 /* Now our work is to rename the temp file into the stable file. And
1375 * switch the file descriptor used by the server for append only. */
1376 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1377 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1381 /* Mission completed... almost */
1382 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1383 if (server
.appendfd
!= -1) {
1384 /* If append only is actually enabled... */
1385 close(server
.appendfd
);
1386 server
.appendfd
= fd
;
1388 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1389 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1391 /* If append only is disabled we just generate a dump in this
1392 * format. Why not? */
1395 } else if (!bysignal
&& exitcode
!= 0) {
1396 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1398 redisLog(REDIS_WARNING
,
1399 "Background append only file rewriting terminated by signal %d",
1403 sdsfree(server
.bgrewritebuf
);
1404 server
.bgrewritebuf
= sdsempty();
1405 aofRemoveTempFile(server
.bgrewritechildpid
);
1406 server
.bgrewritechildpid
= -1;
1409 /* This function is called once a background process of some kind terminates,
1410 * as we want to avoid resizing the hash tables when there is a child in order
1411 * to play well with copy-on-write (otherwise when a resize happens lots of
1412 * memory pages are copied). The goal of this function is to update the ability
1413 * for dict.c to resize the hash tables accordingly to the fact we have o not
1414 * running childs. */
1415 static void updateDictResizePolicy(void) {
1416 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1)
1419 dictDisableResize();
1422 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1423 int j
, loops
= server
.cronloops
++;
1424 REDIS_NOTUSED(eventLoop
);
1426 REDIS_NOTUSED(clientData
);
1428 /* We take a cached value of the unix time in the global state because
1429 * with virtual memory and aging there is to store the current time
1430 * in objects at every object access, and accuracy is not needed.
1431 * To access a global var is faster than calling time(NULL) */
1432 server
.unixtime
= time(NULL
);
1434 /* We received a SIGTERM, shutting down here in a safe way, as it is
1435 * not ok doing so inside the signal handler. */
1436 if (server
.shutdown_asap
) {
1437 if (prepareForShutdown() == REDIS_OK
) exit(0);
1438 redisLog(REDIS_WARNING
,"SIGTERM received but errors trying to shut down the server, check the logs for more information");
1441 /* Show some info about non-empty databases */
1442 for (j
= 0; j
< server
.dbnum
; j
++) {
1443 long long size
, used
, vkeys
;
1445 size
= dictSlots(server
.db
[j
].dict
);
1446 used
= dictSize(server
.db
[j
].dict
);
1447 vkeys
= dictSize(server
.db
[j
].expires
);
1448 if (!(loops
% 50) && (used
|| vkeys
)) {
1449 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1450 /* dictPrintStats(server.dict); */
1454 /* We don't want to resize the hash tables while a bacground saving
1455 * is in progress: the saving child is created using fork() that is
1456 * implemented with a copy-on-write semantic in most modern systems, so
1457 * if we resize the HT while there is the saving child at work actually
1458 * a lot of memory movements in the parent will cause a lot of pages
1460 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1) {
1461 if (!(loops
% 10)) tryResizeHashTables();
1462 if (server
.activerehashing
) incrementallyRehash();
1465 /* Show information about connected clients */
1466 if (!(loops
% 50)) {
1467 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use",
1468 listLength(server
.clients
)-listLength(server
.slaves
),
1469 listLength(server
.slaves
),
1470 zmalloc_used_memory());
1473 /* Close connections of timedout clients */
1474 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1475 closeTimedoutClients();
1477 /* Check if a background saving or AOF rewrite in progress terminated */
1478 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1482 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1483 if (pid
== server
.bgsavechildpid
) {
1484 backgroundSaveDoneHandler(statloc
);
1486 backgroundRewriteDoneHandler(statloc
);
1488 updateDictResizePolicy();
1491 /* If there is not a background saving in progress check if
1492 * we have to save now */
1493 time_t now
= time(NULL
);
1494 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1495 struct saveparam
*sp
= server
.saveparams
+j
;
1497 if (server
.dirty
>= sp
->changes
&&
1498 now
-server
.lastsave
> sp
->seconds
) {
1499 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1500 sp
->changes
, sp
->seconds
);
1501 rdbSaveBackground(server
.dbfilename
);
1507 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1508 * will use few CPU cycles if there are few expiring keys, otherwise
1509 * it will get more aggressive to avoid that too much memory is used by
1510 * keys that can be removed from the keyspace. */
1511 for (j
= 0; j
< server
.dbnum
; j
++) {
1513 redisDb
*db
= server
.db
+j
;
1515 /* Continue to expire if at the end of the cycle more than 25%
1516 * of the keys were expired. */
1518 long num
= dictSize(db
->expires
);
1519 time_t now
= time(NULL
);
1522 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1523 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1528 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1529 t
= (time_t) dictGetEntryVal(de
);
1531 deleteKey(db
,dictGetEntryKey(de
));
1533 server
.stat_expiredkeys
++;
1536 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1539 /* Swap a few keys on disk if we are over the memory limit and VM
1540 * is enbled. Try to free objects from the free list first. */
1541 if (vmCanSwapOut()) {
1542 while (server
.vm_enabled
&& zmalloc_used_memory() >
1543 server
.vm_max_memory
)
1547 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1548 retval
= (server
.vm_max_threads
== 0) ?
1549 vmSwapOneObjectBlocking() :
1550 vmSwapOneObjectThreaded();
1551 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1552 zmalloc_used_memory() >
1553 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1555 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1557 /* Note that when using threade I/O we free just one object,
1558 * because anyway when the I/O thread in charge to swap this
1559 * object out will finish, the handler of completed jobs
1560 * will try to swap more objects if we are still out of memory. */
1561 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1565 /* Check if we should connect to a MASTER */
1566 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1567 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1568 if (syncWithMaster() == REDIS_OK
) {
1569 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1570 if (server
.appendonly
) rewriteAppendOnlyFileBackground();
1576 /* This function gets called every time Redis is entering the
1577 * main loop of the event driven library, that is, before to sleep
1578 * for ready file descriptors. */
1579 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1580 REDIS_NOTUSED(eventLoop
);
1582 /* Awake clients that got all the swapped keys they requested */
1583 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1587 listRewind(server
.io_ready_clients
,&li
);
1588 while((ln
= listNext(&li
))) {
1589 redisClient
*c
= ln
->value
;
1590 struct redisCommand
*cmd
;
1592 /* Resume the client. */
1593 listDelNode(server
.io_ready_clients
,ln
);
1594 c
->flags
&= (~REDIS_IO_WAIT
);
1595 server
.vm_blocked_clients
--;
1596 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1597 readQueryFromClient
, c
);
1598 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1599 assert(cmd
!= NULL
);
1602 /* There may be more data to process in the input buffer. */
1603 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1604 processInputBuffer(c
);
1607 /* Write the AOF buffer on disk */
1608 flushAppendOnlyFile();
1611 static void createSharedObjects(void) {
1614 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1615 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1616 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1617 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1618 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1619 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1620 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1621 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1622 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1623 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1624 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1625 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1626 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1627 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1628 "-ERR no such key\r\n"));
1629 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1630 "-ERR syntax error\r\n"));
1631 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1632 "-ERR source and destination objects are the same\r\n"));
1633 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1634 "-ERR index out of range\r\n"));
1635 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1636 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1637 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1638 shared
.select0
= createStringObject("select 0\r\n",10);
1639 shared
.select1
= createStringObject("select 1\r\n",10);
1640 shared
.select2
= createStringObject("select 2\r\n",10);
1641 shared
.select3
= createStringObject("select 3\r\n",10);
1642 shared
.select4
= createStringObject("select 4\r\n",10);
1643 shared
.select5
= createStringObject("select 5\r\n",10);
1644 shared
.select6
= createStringObject("select 6\r\n",10);
1645 shared
.select7
= createStringObject("select 7\r\n",10);
1646 shared
.select8
= createStringObject("select 8\r\n",10);
1647 shared
.select9
= createStringObject("select 9\r\n",10);
1648 shared
.messagebulk
= createStringObject("$7\r\nmessage\r\n",13);
1649 shared
.pmessagebulk
= createStringObject("$8\r\npmessage\r\n",14);
1650 shared
.subscribebulk
= createStringObject("$9\r\nsubscribe\r\n",15);
1651 shared
.unsubscribebulk
= createStringObject("$11\r\nunsubscribe\r\n",18);
1652 shared
.psubscribebulk
= createStringObject("$10\r\npsubscribe\r\n",17);
1653 shared
.punsubscribebulk
= createStringObject("$12\r\npunsubscribe\r\n",19);
1654 shared
.mbulk3
= createStringObject("*3\r\n",4);
1655 shared
.mbulk4
= createStringObject("*4\r\n",4);
1656 for (j
= 0; j
< REDIS_SHARED_INTEGERS
; j
++) {
1657 shared
.integers
[j
] = createObject(REDIS_STRING
,(void*)(long)j
);
1658 shared
.integers
[j
]->encoding
= REDIS_ENCODING_INT
;
1662 static void appendServerSaveParams(time_t seconds
, int changes
) {
1663 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1664 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1665 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1666 server
.saveparamslen
++;
1669 static void resetServerSaveParams() {
1670 zfree(server
.saveparams
);
1671 server
.saveparams
= NULL
;
1672 server
.saveparamslen
= 0;
1675 static void initServerConfig() {
1676 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1677 server
.port
= REDIS_SERVERPORT
;
1678 server
.verbosity
= REDIS_VERBOSE
;
1679 server
.maxidletime
= REDIS_MAXIDLETIME
;
1680 server
.saveparams
= NULL
;
1681 server
.logfile
= NULL
; /* NULL = log on standard output */
1682 server
.bindaddr
= NULL
;
1683 server
.glueoutputbuf
= 1;
1684 server
.daemonize
= 0;
1685 server
.appendonly
= 0;
1686 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1687 server
.lastfsync
= time(NULL
);
1688 server
.appendfd
= -1;
1689 server
.appendseldb
= -1; /* Make sure the first time will not match */
1690 server
.pidfile
= zstrdup("/var/run/redis.pid");
1691 server
.dbfilename
= zstrdup("dump.rdb");
1692 server
.appendfilename
= zstrdup("appendonly.aof");
1693 server
.requirepass
= NULL
;
1694 server
.rdbcompression
= 1;
1695 server
.activerehashing
= 1;
1696 server
.maxclients
= 0;
1697 server
.blpop_blocked_clients
= 0;
1698 server
.maxmemory
= 0;
1699 server
.vm_enabled
= 0;
1700 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1701 server
.vm_page_size
= 256; /* 256 bytes per page */
1702 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1703 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1704 server
.vm_max_threads
= 4;
1705 server
.vm_blocked_clients
= 0;
1706 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1707 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1708 server
.shutdown_asap
= 0;
1710 resetServerSaveParams();
1712 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1713 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1714 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1715 /* Replication related */
1717 server
.masterauth
= NULL
;
1718 server
.masterhost
= NULL
;
1719 server
.masterport
= 6379;
1720 server
.master
= NULL
;
1721 server
.replstate
= REDIS_REPL_NONE
;
1723 /* Double constants initialization */
1725 R_PosInf
= 1.0/R_Zero
;
1726 R_NegInf
= -1.0/R_Zero
;
1727 R_Nan
= R_Zero
/R_Zero
;
1730 static void initServer() {
1733 signal(SIGHUP
, SIG_IGN
);
1734 signal(SIGPIPE
, SIG_IGN
);
1735 setupSigSegvAction();
1737 server
.devnull
= fopen("/dev/null","w");
1738 if (server
.devnull
== NULL
) {
1739 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1742 server
.clients
= listCreate();
1743 server
.slaves
= listCreate();
1744 server
.monitors
= listCreate();
1745 server
.objfreelist
= listCreate();
1746 createSharedObjects();
1747 server
.el
= aeCreateEventLoop();
1748 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1749 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1750 if (server
.fd
== -1) {
1751 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1754 for (j
= 0; j
< server
.dbnum
; j
++) {
1755 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1756 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1757 server
.db
[j
].blocking_keys
= dictCreate(&keylistDictType
,NULL
);
1758 server
.db
[j
].watched_keys
= dictCreate(&keylistDictType
,NULL
);
1759 if (server
.vm_enabled
)
1760 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1761 server
.db
[j
].id
= j
;
1763 server
.pubsub_channels
= dictCreate(&keylistDictType
,NULL
);
1764 server
.pubsub_patterns
= listCreate();
1765 listSetFreeMethod(server
.pubsub_patterns
,freePubsubPattern
);
1766 listSetMatchMethod(server
.pubsub_patterns
,listMatchPubsubPattern
);
1767 server
.cronloops
= 0;
1768 server
.bgsavechildpid
= -1;
1769 server
.bgrewritechildpid
= -1;
1770 server
.bgrewritebuf
= sdsempty();
1771 server
.aofbuf
= sdsempty();
1772 server
.lastsave
= time(NULL
);
1774 server
.stat_numcommands
= 0;
1775 server
.stat_numconnections
= 0;
1776 server
.stat_expiredkeys
= 0;
1777 server
.stat_starttime
= time(NULL
);
1778 server
.unixtime
= time(NULL
);
1779 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1780 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1781 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1783 if (server
.appendonly
) {
1784 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1785 if (server
.appendfd
== -1) {
1786 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1792 if (server
.vm_enabled
) vmInit();
1795 /* Empty the whole database */
1796 static long long emptyDb() {
1798 long long removed
= 0;
1800 for (j
= 0; j
< server
.dbnum
; j
++) {
1801 removed
+= dictSize(server
.db
[j
].dict
);
1802 dictEmpty(server
.db
[j
].dict
);
1803 dictEmpty(server
.db
[j
].expires
);
1808 static int yesnotoi(char *s
) {
1809 if (!strcasecmp(s
,"yes")) return 1;
1810 else if (!strcasecmp(s
,"no")) return 0;
1814 /* I agree, this is a very rudimental way to load a configuration...
1815 will improve later if the config gets more complex */
1816 static void loadServerConfig(char *filename
) {
1818 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1822 if (filename
[0] == '-' && filename
[1] == '\0')
1825 if ((fp
= fopen(filename
,"r")) == NULL
) {
1826 redisLog(REDIS_WARNING
, "Fatal error, can't open config file '%s'", filename
);
1831 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1837 line
= sdstrim(line
," \t\r\n");
1839 /* Skip comments and blank lines*/
1840 if (line
[0] == '#' || line
[0] == '\0') {
1845 /* Split into arguments */
1846 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1847 sdstolower(argv
[0]);
1849 /* Execute config directives */
1850 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1851 server
.maxidletime
= atoi(argv
[1]);
1852 if (server
.maxidletime
< 0) {
1853 err
= "Invalid timeout value"; goto loaderr
;
1855 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1856 server
.port
= atoi(argv
[1]);
1857 if (server
.port
< 1 || server
.port
> 65535) {
1858 err
= "Invalid port"; goto loaderr
;
1860 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1861 server
.bindaddr
= zstrdup(argv
[1]);
1862 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1863 int seconds
= atoi(argv
[1]);
1864 int changes
= atoi(argv
[2]);
1865 if (seconds
< 1 || changes
< 0) {
1866 err
= "Invalid save parameters"; goto loaderr
;
1868 appendServerSaveParams(seconds
,changes
);
1869 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1870 if (chdir(argv
[1]) == -1) {
1871 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1872 argv
[1], strerror(errno
));
1875 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1876 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1877 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1878 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1879 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1881 err
= "Invalid log level. Must be one of debug, notice, warning";
1884 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1887 server
.logfile
= zstrdup(argv
[1]);
1888 if (!strcasecmp(server
.logfile
,"stdout")) {
1889 zfree(server
.logfile
);
1890 server
.logfile
= NULL
;
1892 if (server
.logfile
) {
1893 /* Test if we are able to open the file. The server will not
1894 * be able to abort just for this problem later... */
1895 logfp
= fopen(server
.logfile
,"a");
1896 if (logfp
== NULL
) {
1897 err
= sdscatprintf(sdsempty(),
1898 "Can't open the log file: %s", strerror(errno
));
1903 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1904 server
.dbnum
= atoi(argv
[1]);
1905 if (server
.dbnum
< 1) {
1906 err
= "Invalid number of databases"; goto loaderr
;
1908 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1909 loadServerConfig(argv
[1]);
1910 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1911 server
.maxclients
= atoi(argv
[1]);
1912 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1913 server
.maxmemory
= memtoll(argv
[1],NULL
);
1914 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1915 server
.masterhost
= sdsnew(argv
[1]);
1916 server
.masterport
= atoi(argv
[2]);
1917 server
.replstate
= REDIS_REPL_CONNECT
;
1918 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1919 server
.masterauth
= zstrdup(argv
[1]);
1920 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1921 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1922 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1924 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1925 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1926 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1928 } else if (!strcasecmp(argv
[0],"activerehashing") && argc
== 2) {
1929 if ((server
.activerehashing
= yesnotoi(argv
[1])) == -1) {
1930 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1932 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1933 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1934 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1936 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1937 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1938 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1940 } else if (!strcasecmp(argv
[0],"appendfilename") && argc
== 2) {
1941 zfree(server
.appendfilename
);
1942 server
.appendfilename
= zstrdup(argv
[1]);
1943 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
1944 if (!strcasecmp(argv
[1],"no")) {
1945 server
.appendfsync
= APPENDFSYNC_NO
;
1946 } else if (!strcasecmp(argv
[1],"always")) {
1947 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1948 } else if (!strcasecmp(argv
[1],"everysec")) {
1949 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1951 err
= "argument must be 'no', 'always' or 'everysec'";
1954 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
1955 server
.requirepass
= zstrdup(argv
[1]);
1956 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
1957 zfree(server
.pidfile
);
1958 server
.pidfile
= zstrdup(argv
[1]);
1959 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
1960 zfree(server
.dbfilename
);
1961 server
.dbfilename
= zstrdup(argv
[1]);
1962 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
1963 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
1964 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1966 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
1967 zfree(server
.vm_swap_file
);
1968 server
.vm_swap_file
= zstrdup(argv
[1]);
1969 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
1970 server
.vm_max_memory
= memtoll(argv
[1],NULL
);
1971 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
1972 server
.vm_page_size
= memtoll(argv
[1], NULL
);
1973 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
1974 server
.vm_pages
= memtoll(argv
[1], NULL
);
1975 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1976 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1977 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
1978 server
.hash_max_zipmap_entries
= memtoll(argv
[1], NULL
);
1979 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
1980 server
.hash_max_zipmap_value
= memtoll(argv
[1], NULL
);
1982 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
1984 for (j
= 0; j
< argc
; j
++)
1989 if (fp
!= stdin
) fclose(fp
);
1993 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
1994 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
1995 fprintf(stderr
, ">>> '%s'\n", line
);
1996 fprintf(stderr
, "%s\n", err
);
2000 static void freeClientArgv(redisClient
*c
) {
2003 for (j
= 0; j
< c
->argc
; j
++)
2004 decrRefCount(c
->argv
[j
]);
2005 for (j
= 0; j
< c
->mbargc
; j
++)
2006 decrRefCount(c
->mbargv
[j
]);
2011 static void freeClient(redisClient
*c
) {
2014 /* Note that if the client we are freeing is blocked into a blocking
2015 * call, we have to set querybuf to NULL *before* to call
2016 * unblockClientWaitingData() to avoid processInputBuffer() will get
2017 * called. Also it is important to remove the file events after
2018 * this, because this call adds the READABLE event. */
2019 sdsfree(c
->querybuf
);
2021 if (c
->flags
& REDIS_BLOCKED
)
2022 unblockClientWaitingData(c
);
2024 /* UNWATCH all the keys */
2026 listRelease(c
->watched_keys
);
2027 /* Unsubscribe from all the pubsub channels */
2028 pubsubUnsubscribeAllChannels(c
,0);
2029 pubsubUnsubscribeAllPatterns(c
,0);
2030 dictRelease(c
->pubsub_channels
);
2031 listRelease(c
->pubsub_patterns
);
2032 /* Obvious cleanup */
2033 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
2034 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2035 listRelease(c
->reply
);
2038 /* Remove from the list of clients */
2039 ln
= listSearchKey(server
.clients
,c
);
2040 redisAssert(ln
!= NULL
);
2041 listDelNode(server
.clients
,ln
);
2042 /* Remove from the list of clients that are now ready to be restarted
2043 * after waiting for swapped keys */
2044 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
2045 ln
= listSearchKey(server
.io_ready_clients
,c
);
2047 listDelNode(server
.io_ready_clients
,ln
);
2048 server
.vm_blocked_clients
--;
2051 /* Remove from the list of clients waiting for swapped keys */
2052 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
2053 ln
= listFirst(c
->io_keys
);
2054 dontWaitForSwappedKey(c
,ln
->value
);
2056 listRelease(c
->io_keys
);
2057 /* Master/slave cleanup */
2058 if (c
->flags
& REDIS_SLAVE
) {
2059 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
2061 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
2062 ln
= listSearchKey(l
,c
);
2063 redisAssert(ln
!= NULL
);
2066 if (c
->flags
& REDIS_MASTER
) {
2067 server
.master
= NULL
;
2068 server
.replstate
= REDIS_REPL_CONNECT
;
2070 /* Release memory */
2073 freeClientMultiState(c
);
2077 #define GLUEREPLY_UP_TO (1024)
2078 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
2080 char buf
[GLUEREPLY_UP_TO
];
2085 listRewind(c
->reply
,&li
);
2086 while((ln
= listNext(&li
))) {
2090 objlen
= sdslen(o
->ptr
);
2091 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
2092 memcpy(buf
+copylen
,o
->ptr
,objlen
);
2094 listDelNode(c
->reply
,ln
);
2096 if (copylen
== 0) return;
2100 /* Now the output buffer is empty, add the new single element */
2101 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
2102 listAddNodeHead(c
->reply
,o
);
2105 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2106 redisClient
*c
= privdata
;
2107 int nwritten
= 0, totwritten
= 0, objlen
;
2110 REDIS_NOTUSED(mask
);
2112 /* Use writev() if we have enough buffers to send */
2113 if (!server
.glueoutputbuf
&&
2114 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
2115 !(c
->flags
& REDIS_MASTER
))
2117 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
2121 while(listLength(c
->reply
)) {
2122 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
2123 glueReplyBuffersIfNeeded(c
);
2125 o
= listNodeValue(listFirst(c
->reply
));
2126 objlen
= sdslen(o
->ptr
);
2129 listDelNode(c
->reply
,listFirst(c
->reply
));
2133 if (c
->flags
& REDIS_MASTER
) {
2134 /* Don't reply to a master */
2135 nwritten
= objlen
- c
->sentlen
;
2137 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
2138 if (nwritten
<= 0) break;
2140 c
->sentlen
+= nwritten
;
2141 totwritten
+= nwritten
;
2142 /* If we fully sent the object on head go to the next one */
2143 if (c
->sentlen
== objlen
) {
2144 listDelNode(c
->reply
,listFirst(c
->reply
));
2147 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
2148 * bytes, in a single threaded server it's a good idea to serve
2149 * other clients as well, even if a very large request comes from
2150 * super fast link that is always able to accept data (in real world
2151 * scenario think about 'KEYS *' against the loopback interfae) */
2152 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
2154 if (nwritten
== -1) {
2155 if (errno
== EAGAIN
) {
2158 redisLog(REDIS_VERBOSE
,
2159 "Error writing to client: %s", strerror(errno
));
2164 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
2165 if (listLength(c
->reply
) == 0) {
2167 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2171 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
2173 redisClient
*c
= privdata
;
2174 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
2176 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
2177 int offset
, ion
= 0;
2179 REDIS_NOTUSED(mask
);
2182 while (listLength(c
->reply
)) {
2183 offset
= c
->sentlen
;
2187 /* fill-in the iov[] array */
2188 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
2189 o
= listNodeValue(node
);
2190 objlen
= sdslen(o
->ptr
);
2192 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
2195 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2196 break; /* no more iovecs */
2198 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2199 iov
[ion
].iov_len
= objlen
- offset
;
2200 willwrite
+= objlen
- offset
;
2201 offset
= 0; /* just for the first item */
2208 /* write all collected blocks at once */
2209 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2210 if (errno
!= EAGAIN
) {
2211 redisLog(REDIS_VERBOSE
,
2212 "Error writing to client: %s", strerror(errno
));
2219 totwritten
+= nwritten
;
2220 offset
= c
->sentlen
;
2222 /* remove written robjs from c->reply */
2223 while (nwritten
&& listLength(c
->reply
)) {
2224 o
= listNodeValue(listFirst(c
->reply
));
2225 objlen
= sdslen(o
->ptr
);
2227 if(nwritten
>= objlen
- offset
) {
2228 listDelNode(c
->reply
, listFirst(c
->reply
));
2229 nwritten
-= objlen
- offset
;
2233 c
->sentlen
+= nwritten
;
2241 c
->lastinteraction
= time(NULL
);
2243 if (listLength(c
->reply
) == 0) {
2245 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2249 static struct redisCommand
*lookupCommand(char *name
) {
2251 while(cmdTable
[j
].name
!= NULL
) {
2252 if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
];
2258 /* resetClient prepare the client to process the next command */
2259 static void resetClient(redisClient
*c
) {
2265 /* Call() is the core of Redis execution of a command */
2266 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2269 dirty
= server
.dirty
;
2271 dirty
= server
.dirty
-dirty
;
2273 if (server
.appendonly
&& dirty
)
2274 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2275 if ((dirty
|| cmd
->flags
& REDIS_CMD_FORCE_REPLICATION
) &&
2276 listLength(server
.slaves
))
2277 replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
);
2278 if (listLength(server
.monitors
))
2279 replicationFeedMonitors(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
);
2280 server
.stat_numcommands
++;
2283 /* If this function gets called we already read a whole
2284 * command, argments are in the client argv/argc fields.
2285 * processCommand() execute the command or prepare the
2286 * server for a bulk read from the client.
2288 * If 1 is returned the client is still alive and valid and
2289 * and other operations can be performed by the caller. Otherwise
2290 * if 0 is returned the client was destroied (i.e. after QUIT). */
2291 static int processCommand(redisClient
*c
) {
2292 struct redisCommand
*cmd
;
2294 /* Free some memory if needed (maxmemory setting) */
2295 if (server
.maxmemory
) freeMemoryIfNeeded();
2297 /* Handle the multi bulk command type. This is an alternative protocol
2298 * supported by Redis in order to receive commands that are composed of
2299 * multiple binary-safe "bulk" arguments. The latency of processing is
2300 * a bit higher but this allows things like multi-sets, so if this
2301 * protocol is used only for MSET and similar commands this is a big win. */
2302 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2303 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2304 if (c
->multibulk
<= 0) {
2308 decrRefCount(c
->argv
[c
->argc
-1]);
2312 } else if (c
->multibulk
) {
2313 if (c
->bulklen
== -1) {
2314 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2315 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2319 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2320 decrRefCount(c
->argv
[0]);
2321 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2323 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2328 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2332 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2333 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2337 if (c
->multibulk
== 0) {
2341 /* Here we need to swap the multi-bulk argc/argv with the
2342 * normal argc/argv of the client structure. */
2344 c
->argv
= c
->mbargv
;
2345 c
->mbargv
= auxargv
;
2348 c
->argc
= c
->mbargc
;
2349 c
->mbargc
= auxargc
;
2351 /* We need to set bulklen to something different than -1
2352 * in order for the code below to process the command without
2353 * to try to read the last argument of a bulk command as
2354 * a special argument. */
2356 /* continue below and process the command */
2363 /* -- end of multi bulk commands processing -- */
2365 /* The QUIT command is handled as a special case. Normal command
2366 * procs are unable to close the client connection safely */
2367 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2372 /* Now lookup the command and check ASAP about trivial error conditions
2373 * such wrong arity, bad command name and so forth. */
2374 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2377 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2378 (char*)c
->argv
[0]->ptr
));
2381 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2382 (c
->argc
< -cmd
->arity
)) {
2384 sdscatprintf(sdsempty(),
2385 "-ERR wrong number of arguments for '%s' command\r\n",
2389 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2390 /* This is a bulk command, we have to read the last argument yet. */
2391 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2393 decrRefCount(c
->argv
[c
->argc
-1]);
2394 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2396 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2401 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2402 /* It is possible that the bulk read is already in the
2403 * buffer. Check this condition and handle it accordingly.
2404 * This is just a fast path, alternative to call processInputBuffer().
2405 * It's a good idea since the code is small and this condition
2406 * happens most of the times. */
2407 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2408 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2410 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2412 /* Otherwise return... there is to read the last argument
2413 * from the socket. */
2417 /* Let's try to encode the bulk object to save space. */
2418 if (cmd
->flags
& REDIS_CMD_BULK
)
2419 c
->argv
[c
->argc
-1] = tryObjectEncoding(c
->argv
[c
->argc
-1]);
2421 /* Check if the user is authenticated */
2422 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2423 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2428 /* Handle the maxmemory directive */
2429 if (server
.maxmemory
&& (cmd
->flags
& REDIS_CMD_DENYOOM
) &&
2430 zmalloc_used_memory() > server
.maxmemory
)
2432 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2437 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
2438 if ((dictSize(c
->pubsub_channels
) > 0 || listLength(c
->pubsub_patterns
) > 0)
2440 cmd
->proc
!= subscribeCommand
&& cmd
->proc
!= unsubscribeCommand
&&
2441 cmd
->proc
!= psubscribeCommand
&& cmd
->proc
!= punsubscribeCommand
) {
2442 addReplySds(c
,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n"));
2447 /* Exec the command */
2448 if (c
->flags
& REDIS_MULTI
&& cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
) {
2449 queueMultiCommand(c
,cmd
);
2450 addReply(c
,shared
.queued
);
2452 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2453 blockClientOnSwappedKeys(c
,cmd
)) return 1;
2457 /* Prepare the client for the next command */
2462 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
) {
2467 /* We need 1+(ARGS*3) objects since commands are using the new protocol
2468 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
2469 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
2470 robj
*static_outv
[REDIS_STATIC_ARGS
*3+1];
2473 if (argc
<= REDIS_STATIC_ARGS
) {
2476 outv
= zmalloc(sizeof(robj
*)*(argc
*3+1));
2479 lenobj
= createObject(REDIS_STRING
,
2480 sdscatprintf(sdsempty(), "*%d\r\n", argc
));
2481 lenobj
->refcount
= 0;
2482 outv
[outc
++] = lenobj
;
2483 for (j
= 0; j
< argc
; j
++) {
2484 lenobj
= createObject(REDIS_STRING
,
2485 sdscatprintf(sdsempty(),"$%lu\r\n",
2486 (unsigned long) stringObjectLen(argv
[j
])));
2487 lenobj
->refcount
= 0;
2488 outv
[outc
++] = lenobj
;
2489 outv
[outc
++] = argv
[j
];
2490 outv
[outc
++] = shared
.crlf
;
2493 /* Increment all the refcounts at start and decrement at end in order to
2494 * be sure to free objects if there is no slave in a replication state
2495 * able to be feed with commands */
2496 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2497 listRewind(slaves
,&li
);
2498 while((ln
= listNext(&li
))) {
2499 redisClient
*slave
= ln
->value
;
2501 /* Don't feed slaves that are still waiting for BGSAVE to start */
2502 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2504 /* Feed all the other slaves, MONITORs and so on */
2505 if (slave
->slaveseldb
!= dictid
) {
2509 case 0: selectcmd
= shared
.select0
; break;
2510 case 1: selectcmd
= shared
.select1
; break;
2511 case 2: selectcmd
= shared
.select2
; break;
2512 case 3: selectcmd
= shared
.select3
; break;
2513 case 4: selectcmd
= shared
.select4
; break;
2514 case 5: selectcmd
= shared
.select5
; break;
2515 case 6: selectcmd
= shared
.select6
; break;
2516 case 7: selectcmd
= shared
.select7
; break;
2517 case 8: selectcmd
= shared
.select8
; break;
2518 case 9: selectcmd
= shared
.select9
; break;
2520 selectcmd
= createObject(REDIS_STRING
,
2521 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2522 selectcmd
->refcount
= 0;
2525 addReply(slave
,selectcmd
);
2526 slave
->slaveseldb
= dictid
;
2528 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2530 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2531 if (outv
!= static_outv
) zfree(outv
);
2534 static sds
sdscatrepr(sds s
, char *p
, size_t len
) {
2535 s
= sdscatlen(s
,"\"",1);
2540 s
= sdscatprintf(s
,"\\%c",*p
);
2542 case '\n': s
= sdscatlen(s
,"\\n",1); break;
2543 case '\r': s
= sdscatlen(s
,"\\r",1); break;
2544 case '\t': s
= sdscatlen(s
,"\\t",1); break;
2545 case '\a': s
= sdscatlen(s
,"\\a",1); break;
2546 case '\b': s
= sdscatlen(s
,"\\b",1); break;
2549 s
= sdscatprintf(s
,"%c",*p
);
2551 s
= sdscatprintf(s
,"\\x%02x",(unsigned char)*p
);
2556 return sdscatlen(s
,"\"",1);
2559 static void replicationFeedMonitors(list
*monitors
, int dictid
, robj
**argv
, int argc
) {
2563 sds cmdrepr
= sdsnew("+");
2567 gettimeofday(&tv
,NULL
);
2568 cmdrepr
= sdscatprintf(cmdrepr
,"%ld.%ld ",(long)tv
.tv_sec
,(long)tv
.tv_usec
);
2569 if (dictid
!= 0) cmdrepr
= sdscatprintf(cmdrepr
,"(db %d) ", dictid
);
2571 for (j
= 0; j
< argc
; j
++) {
2572 if (argv
[j
]->encoding
== REDIS_ENCODING_INT
) {
2573 cmdrepr
= sdscatprintf(cmdrepr
, "%ld", (long)argv
[j
]->ptr
);
2575 cmdrepr
= sdscatrepr(cmdrepr
,(char*)argv
[j
]->ptr
,
2576 sdslen(argv
[j
]->ptr
));
2579 cmdrepr
= sdscatlen(cmdrepr
," ",1);
2581 cmdrepr
= sdscatlen(cmdrepr
,"\r\n",2);
2582 cmdobj
= createObject(REDIS_STRING
,cmdrepr
);
2584 listRewind(monitors
,&li
);
2585 while((ln
= listNext(&li
))) {
2586 redisClient
*monitor
= ln
->value
;
2587 addReply(monitor
,cmdobj
);
2589 decrRefCount(cmdobj
);
2592 static void processInputBuffer(redisClient
*c
) {
2594 /* Before to process the input buffer, make sure the client is not
2595 * waitig for a blocking operation such as BLPOP. Note that the first
2596 * iteration the client is never blocked, otherwise the processInputBuffer
2597 * would not be called at all, but after the execution of the first commands
2598 * in the input buffer the client may be blocked, and the "goto again"
2599 * will try to reiterate. The following line will make it return asap. */
2600 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2601 if (c
->bulklen
== -1) {
2602 /* Read the first line of the query */
2603 char *p
= strchr(c
->querybuf
,'\n');
2610 query
= c
->querybuf
;
2611 c
->querybuf
= sdsempty();
2612 querylen
= 1+(p
-(query
));
2613 if (sdslen(query
) > querylen
) {
2614 /* leave data after the first line of the query in the buffer */
2615 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2617 *p
= '\0'; /* remove "\n" */
2618 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2619 sdsupdatelen(query
);
2621 /* Now we can split the query in arguments */
2622 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2625 if (c
->argv
) zfree(c
->argv
);
2626 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2628 for (j
= 0; j
< argc
; j
++) {
2629 if (sdslen(argv
[j
])) {
2630 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2638 /* Execute the command. If the client is still valid
2639 * after processCommand() return and there is something
2640 * on the query buffer try to process the next command. */
2641 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2643 /* Nothing to process, argc == 0. Just process the query
2644 * buffer if it's not empty or return to the caller */
2645 if (sdslen(c
->querybuf
)) goto again
;
2648 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2649 redisLog(REDIS_VERBOSE
, "Client protocol error");
2654 /* Bulk read handling. Note that if we are at this point
2655 the client already sent a command terminated with a newline,
2656 we are reading the bulk data that is actually the last
2657 argument of the command. */
2658 int qbl
= sdslen(c
->querybuf
);
2660 if (c
->bulklen
<= qbl
) {
2661 /* Copy everything but the final CRLF as final argument */
2662 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2664 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2665 /* Process the command. If the client is still valid after
2666 * the processing and there is more data in the buffer
2667 * try to parse it. */
2668 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2674 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2675 redisClient
*c
= (redisClient
*) privdata
;
2676 char buf
[REDIS_IOBUF_LEN
];
2679 REDIS_NOTUSED(mask
);
2681 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2683 if (errno
== EAGAIN
) {
2686 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2690 } else if (nread
== 0) {
2691 redisLog(REDIS_VERBOSE
, "Client closed connection");
2696 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2697 c
->lastinteraction
= time(NULL
);
2701 processInputBuffer(c
);
2704 static int selectDb(redisClient
*c
, int id
) {
2705 if (id
< 0 || id
>= server
.dbnum
)
2707 c
->db
= &server
.db
[id
];
2711 static void *dupClientReplyValue(void *o
) {
2712 incrRefCount((robj
*)o
);
2716 static int listMatchObjects(void *a
, void *b
) {
2717 return equalStringObjects(a
,b
);
2720 static redisClient
*createClient(int fd
) {
2721 redisClient
*c
= zmalloc(sizeof(*c
));
2723 anetNonBlock(NULL
,fd
);
2724 anetTcpNoDelay(NULL
,fd
);
2725 if (!c
) return NULL
;
2728 c
->querybuf
= sdsempty();
2737 c
->lastinteraction
= time(NULL
);
2738 c
->authenticated
= 0;
2739 c
->replstate
= REDIS_REPL_NONE
;
2740 c
->reply
= listCreate();
2741 listSetFreeMethod(c
->reply
,decrRefCount
);
2742 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2743 c
->blocking_keys
= NULL
;
2744 c
->blocking_keys_num
= 0;
2745 c
->io_keys
= listCreate();
2746 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2747 c
->pubsub_channels
= dictCreate(&setDictType
,NULL
);
2748 c
->pubsub_patterns
= listCreate();
2749 listSetFreeMethod(c
->pubsub_patterns
,decrRefCount
);
2750 listSetMatchMethod(c
->pubsub_patterns
,listMatchObjects
);
2751 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2752 readQueryFromClient
, c
) == AE_ERR
) {
2756 listAddNodeTail(server
.clients
,c
);
2757 initClientMultiState(c
);
2761 static void addReply(redisClient
*c
, robj
*obj
) {
2762 if (listLength(c
->reply
) == 0 &&
2763 (c
->replstate
== REDIS_REPL_NONE
||
2764 c
->replstate
== REDIS_REPL_ONLINE
) &&
2765 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2766 sendReplyToClient
, c
) == AE_ERR
) return;
2768 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2769 obj
= dupStringObject(obj
);
2770 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2772 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2775 static void addReplySds(redisClient
*c
, sds s
) {
2776 robj
*o
= createObject(REDIS_STRING
,s
);
2781 static void addReplyDouble(redisClient
*c
, double d
) {
2784 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2785 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2786 (unsigned long) strlen(buf
),buf
));
2789 static void addReplyLongLong(redisClient
*c
, long long ll
) {
2794 addReply(c
,shared
.czero
);
2796 } else if (ll
== 1) {
2797 addReply(c
,shared
.cone
);
2801 len
= ll2string(buf
+1,sizeof(buf
)-1,ll
);
2804 addReplySds(c
,sdsnewlen(buf
,len
+3));
2807 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2812 addReply(c
,shared
.czero
);
2814 } else if (ul
== 1) {
2815 addReply(c
,shared
.cone
);
2818 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2819 addReplySds(c
,sdsnewlen(buf
,len
));
2822 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2826 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2827 len
= sdslen(obj
->ptr
);
2829 long n
= (long)obj
->ptr
;
2831 /* Compute how many bytes will take this integer as a radix 10 string */
2837 while((n
= n
/10) != 0) {
2842 intlen
= ll2string(buf
+1,sizeof(buf
)-1,(long long)len
);
2843 buf
[intlen
+1] = '\r';
2844 buf
[intlen
+2] = '\n';
2845 addReplySds(c
,sdsnewlen(buf
,intlen
+3));
2848 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2849 addReplyBulkLen(c
,obj
);
2851 addReply(c
,shared
.crlf
);
2854 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2855 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2857 addReply(c
,shared
.nullbulk
);
2859 robj
*o
= createStringObject(s
,strlen(s
));
2865 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2870 REDIS_NOTUSED(mask
);
2871 REDIS_NOTUSED(privdata
);
2873 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2874 if (cfd
== AE_ERR
) {
2875 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2878 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2879 if ((c
= createClient(cfd
)) == NULL
) {
2880 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2881 close(cfd
); /* May be already closed, just ingore errors */
2884 /* If maxclient directive is set and this is one client more... close the
2885 * connection. Note that we create the client instead to check before
2886 * for this condition, since now the socket is already set in nonblocking
2887 * mode and we can send an error for free using the Kernel I/O */
2888 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2889 char *err
= "-ERR max number of clients reached\r\n";
2891 /* That's a best effort error message, don't check write errors */
2892 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2893 /* Nothing to do, Just to avoid the warning... */
2898 server
.stat_numconnections
++;
2901 /* ======================= Redis objects implementation ===================== */
2903 static robj
*createObject(int type
, void *ptr
) {
2906 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2907 if (listLength(server
.objfreelist
)) {
2908 listNode
*head
= listFirst(server
.objfreelist
);
2909 o
= listNodeValue(head
);
2910 listDelNode(server
.objfreelist
,head
);
2911 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2913 if (server
.vm_enabled
) {
2914 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2915 o
= zmalloc(sizeof(*o
));
2917 o
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
));
2921 o
->encoding
= REDIS_ENCODING_RAW
;
2924 if (server
.vm_enabled
) {
2925 /* Note that this code may run in the context of an I/O thread
2926 * and accessing to server.unixtime in theory is an error
2927 * (no locks). But in practice this is safe, and even if we read
2928 * garbage Redis will not fail, as it's just a statistical info */
2929 o
->vm
.atime
= server
.unixtime
;
2930 o
->storage
= REDIS_VM_MEMORY
;
2935 static robj
*createStringObject(char *ptr
, size_t len
) {
2936 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
2939 static robj
*createStringObjectFromLongLong(long long value
) {
2941 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
2942 incrRefCount(shared
.integers
[value
]);
2943 o
= shared
.integers
[value
];
2945 if (value
>= LONG_MIN
&& value
<= LONG_MAX
) {
2946 o
= createObject(REDIS_STRING
, NULL
);
2947 o
->encoding
= REDIS_ENCODING_INT
;
2948 o
->ptr
= (void*)((long)value
);
2950 o
= createObject(REDIS_STRING
,sdsfromlonglong(value
));
2956 static robj
*dupStringObject(robj
*o
) {
2957 assert(o
->encoding
== REDIS_ENCODING_RAW
);
2958 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
2961 static robj
*createListObject(void) {
2962 list
*l
= listCreate();
2964 listSetFreeMethod(l
,decrRefCount
);
2965 return createObject(REDIS_LIST
,l
);
2968 static robj
*createSetObject(void) {
2969 dict
*d
= dictCreate(&setDictType
,NULL
);
2970 return createObject(REDIS_SET
,d
);
2973 static robj
*createHashObject(void) {
2974 /* All the Hashes start as zipmaps. Will be automatically converted
2975 * into hash tables if there are enough elements or big elements
2977 unsigned char *zm
= zipmapNew();
2978 robj
*o
= createObject(REDIS_HASH
,zm
);
2979 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
2983 static robj
*createZsetObject(void) {
2984 zset
*zs
= zmalloc(sizeof(*zs
));
2986 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
2987 zs
->zsl
= zslCreate();
2988 return createObject(REDIS_ZSET
,zs
);
2991 static void freeStringObject(robj
*o
) {
2992 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2997 static void freeListObject(robj
*o
) {
2998 listRelease((list
*) o
->ptr
);
3001 static void freeSetObject(robj
*o
) {
3002 dictRelease((dict
*) o
->ptr
);
3005 static void freeZsetObject(robj
*o
) {
3008 dictRelease(zs
->dict
);
3013 static void freeHashObject(robj
*o
) {
3014 switch (o
->encoding
) {
3015 case REDIS_ENCODING_HT
:
3016 dictRelease((dict
*) o
->ptr
);
3018 case REDIS_ENCODING_ZIPMAP
:
3022 redisPanic("Unknown hash encoding type");
3027 static void incrRefCount(robj
*o
) {
3031 static void decrRefCount(void *obj
) {
3034 if (o
->refcount
<= 0) redisPanic("decrRefCount against refcount <= 0");
3035 /* Object is a key of a swapped out value, or in the process of being
3037 if (server
.vm_enabled
&&
3038 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
3040 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
);
3041 redisAssert(o
->type
== REDIS_STRING
);
3042 freeStringObject(o
);
3043 vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
);
3044 pthread_mutex_lock(&server
.obj_freelist_mutex
);
3045 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
3046 !listAddNodeHead(server
.objfreelist
,o
))
3048 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
3049 server
.vm_stats_swapped_objects
--;
3052 /* Object is in memory, or in the process of being swapped out. */
3053 if (--(o
->refcount
) == 0) {
3054 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
3055 vmCancelThreadedIOJob(obj
);
3057 case REDIS_STRING
: freeStringObject(o
); break;
3058 case REDIS_LIST
: freeListObject(o
); break;
3059 case REDIS_SET
: freeSetObject(o
); break;
3060 case REDIS_ZSET
: freeZsetObject(o
); break;
3061 case REDIS_HASH
: freeHashObject(o
); break;
3062 default: redisPanic("Unknown object type"); break;
3064 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
3065 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
3066 !listAddNodeHead(server
.objfreelist
,o
))
3068 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
3072 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
3073 dictEntry
*de
= dictFind(db
->dict
,key
);
3075 robj
*key
= dictGetEntryKey(de
);
3076 robj
*val
= dictGetEntryVal(de
);
3078 if (server
.vm_enabled
) {
3079 if (key
->storage
== REDIS_VM_MEMORY
||
3080 key
->storage
== REDIS_VM_SWAPPING
)
3082 /* If we were swapping the object out, stop it, this key
3084 if (key
->storage
== REDIS_VM_SWAPPING
)
3085 vmCancelThreadedIOJob(key
);
3086 /* Update the access time of the key for the aging algorithm. */
3087 key
->vm
.atime
= server
.unixtime
;
3089 int notify
= (key
->storage
== REDIS_VM_LOADING
);
3091 /* Our value was swapped on disk. Bring it at home. */
3092 redisAssert(val
== NULL
);
3093 val
= vmLoadObject(key
);
3094 dictGetEntryVal(de
) = val
;
3096 /* Clients blocked by the VM subsystem may be waiting for
3098 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
3107 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
3108 expireIfNeeded(db
,key
);
3109 return lookupKey(db
,key
);
3112 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
3113 deleteIfVolatile(db
,key
);
3114 touchWatchedKey(db
,key
);
3115 return lookupKey(db
,key
);
3118 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3119 robj
*o
= lookupKeyRead(c
->db
, key
);
3120 if (!o
) addReply(c
,reply
);
3124 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3125 robj
*o
= lookupKeyWrite(c
->db
, key
);
3126 if (!o
) addReply(c
,reply
);
3130 static int checkType(redisClient
*c
, robj
*o
, int type
) {
3131 if (o
->type
!= type
) {
3132 addReply(c
,shared
.wrongtypeerr
);
3138 static int deleteKey(redisDb
*db
, robj
*key
) {
3141 /* We need to protect key from destruction: after the first dictDelete()
3142 * it may happen that 'key' is no longer valid if we don't increment
3143 * it's count. This may happen when we get the object reference directly
3144 * from the hash table with dictRandomKey() or dict iterators */
3146 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
);
3147 retval
= dictDelete(db
->dict
,key
);
3150 return retval
== DICT_OK
;
3153 /* Check if the nul-terminated string 's' can be represented by a long
3154 * (that is, is a number that fits into long without any other space or
3155 * character before or after the digits).
3157 * If so, the function returns REDIS_OK and *longval is set to the value
3158 * of the number. Otherwise REDIS_ERR is returned */
3159 static int isStringRepresentableAsLong(sds s
, long *longval
) {
3160 char buf
[32], *endptr
;
3164 value
= strtol(s
, &endptr
, 10);
3165 if (endptr
[0] != '\0') return REDIS_ERR
;
3166 slen
= ll2string(buf
,32,value
);
3168 /* If the number converted back into a string is not identical
3169 * then it's not possible to encode the string as integer */
3170 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
3171 if (longval
) *longval
= value
;
3175 /* Try to encode a string object in order to save space */
3176 static robj
*tryObjectEncoding(robj
*o
) {
3180 if (o
->encoding
!= REDIS_ENCODING_RAW
)
3181 return o
; /* Already encoded */
3183 /* It's not safe to encode shared objects: shared objects can be shared
3184 * everywhere in the "object space" of Redis. Encoded objects can only
3185 * appear as "values" (and not, for instance, as keys) */
3186 if (o
->refcount
> 1) return o
;
3188 /* Currently we try to encode only strings */
3189 redisAssert(o
->type
== REDIS_STRING
);
3191 /* Check if we can represent this string as a long integer */
3192 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return o
;
3194 /* Ok, this object can be encoded */
3195 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
3197 incrRefCount(shared
.integers
[value
]);
3198 return shared
.integers
[value
];
3200 o
->encoding
= REDIS_ENCODING_INT
;
3202 o
->ptr
= (void*) value
;
3207 /* Get a decoded version of an encoded object (returned as a new object).
3208 * If the object is already raw-encoded just increment the ref count. */
3209 static robj
*getDecodedObject(robj
*o
) {
3212 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3216 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
3219 ll2string(buf
,32,(long)o
->ptr
);
3220 dec
= createStringObject(buf
,strlen(buf
));
3223 redisPanic("Unknown encoding type");
3227 /* Compare two string objects via strcmp() or alike.
3228 * Note that the objects may be integer-encoded. In such a case we
3229 * use ll2string() to get a string representation of the numbers on the stack
3230 * and compare the strings, it's much faster than calling getDecodedObject().
3232 * Important note: if objects are not integer encoded, but binary-safe strings,
3233 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
3235 static int compareStringObjects(robj
*a
, robj
*b
) {
3236 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
3237 char bufa
[128], bufb
[128], *astr
, *bstr
;
3240 if (a
== b
) return 0;
3241 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
3242 ll2string(bufa
,sizeof(bufa
),(long) a
->ptr
);
3248 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
3249 ll2string(bufb
,sizeof(bufb
),(long) b
->ptr
);
3255 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
3258 /* Equal string objects return 1 if the two objects are the same from the
3259 * point of view of a string comparison, otherwise 0 is returned. Note that
3260 * this function is faster then checking for (compareStringObject(a,b) == 0)
3261 * because it can perform some more optimization. */
3262 static int equalStringObjects(robj
*a
, robj
*b
) {
3263 if (a
->encoding
!= REDIS_ENCODING_RAW
&& b
->encoding
!= REDIS_ENCODING_RAW
){
3264 return a
->ptr
== b
->ptr
;
3266 return compareStringObjects(a
,b
) == 0;
3270 static size_t stringObjectLen(robj
*o
) {
3271 redisAssert(o
->type
== REDIS_STRING
);
3272 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3273 return sdslen(o
->ptr
);
3277 return ll2string(buf
,32,(long)o
->ptr
);
3281 static int getDoubleFromObject(robj
*o
, double *target
) {
3288 redisAssert(o
->type
== REDIS_STRING
);
3289 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3290 value
= strtod(o
->ptr
, &eptr
);
3291 if (eptr
[0] != '\0') return REDIS_ERR
;
3292 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3293 value
= (long)o
->ptr
;
3295 redisPanic("Unknown string encoding");
3303 static int getDoubleFromObjectOrReply(redisClient
*c
, robj
*o
, double *target
, const char *msg
) {
3305 if (getDoubleFromObject(o
, &value
) != REDIS_OK
) {
3307 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3309 addReplySds(c
, sdsnew("-ERR value is not a double\r\n"));
3318 static int getLongLongFromObject(robj
*o
, long long *target
) {
3325 redisAssert(o
->type
== REDIS_STRING
);
3326 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3327 value
= strtoll(o
->ptr
, &eptr
, 10);
3328 if (eptr
[0] != '\0') return REDIS_ERR
;
3329 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3330 value
= (long)o
->ptr
;
3332 redisPanic("Unknown string encoding");
3340 static int getLongLongFromObjectOrReply(redisClient
*c
, robj
*o
, long long *target
, const char *msg
) {
3342 if (getLongLongFromObject(o
, &value
) != REDIS_OK
) {
3344 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3346 addReplySds(c
, sdsnew("-ERR value is not an integer\r\n"));
3355 static int getLongFromObjectOrReply(redisClient
*c
, robj
*o
, long *target
, const char *msg
) {
3358 if (getLongLongFromObjectOrReply(c
, o
, &value
, msg
) != REDIS_OK
) return REDIS_ERR
;
3359 if (value
< LONG_MIN
|| value
> LONG_MAX
) {
3361 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3363 addReplySds(c
, sdsnew("-ERR value is out of range\r\n"));
3372 /*============================ RDB saving/loading =========================== */
3374 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3375 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3379 static int rdbSaveTime(FILE *fp
, time_t t
) {
3380 int32_t t32
= (int32_t) t
;
3381 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3385 /* check rdbLoadLen() comments for more info */
3386 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3387 unsigned char buf
[2];
3390 /* Save a 6 bit len */
3391 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3392 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3393 } else if (len
< (1<<14)) {
3394 /* Save a 14 bit len */
3395 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3397 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3399 /* Save a 32 bit len */
3400 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3401 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3403 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3408 /* Encode 'value' as an integer if possible (if integer will fit the
3409 * supported range). If the function sucessful encoded the integer
3410 * then the (up to 5 bytes) encoded representation is written in the
3411 * string pointed by 'enc' and the length is returned. Otherwise
3413 static int rdbEncodeInteger(long long value
, unsigned char *enc
) {
3414 /* Finally check if it fits in our ranges */
3415 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3416 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3417 enc
[1] = value
&0xFF;
3419 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3420 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3421 enc
[1] = value
&0xFF;
3422 enc
[2] = (value
>>8)&0xFF;
3424 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3425 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3426 enc
[1] = value
&0xFF;
3427 enc
[2] = (value
>>8)&0xFF;
3428 enc
[3] = (value
>>16)&0xFF;
3429 enc
[4] = (value
>>24)&0xFF;
3436 /* String objects in the form "2391" "-100" without any space and with a
3437 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3438 * encoded as integers to save space */
3439 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3441 char *endptr
, buf
[32];
3443 /* Check if it's possible to encode this value as a number */
3444 value
= strtoll(s
, &endptr
, 10);
3445 if (endptr
[0] != '\0') return 0;
3446 ll2string(buf
,32,value
);
3448 /* If the number converted back into a string is not identical
3449 * then it's not possible to encode the string as integer */
3450 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3452 return rdbEncodeInteger(value
,enc
);
3455 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3456 size_t comprlen
, outlen
;
3460 /* We require at least four bytes compression for this to be worth it */
3461 if (len
<= 4) return 0;
3463 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3464 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3465 if (comprlen
== 0) {
3469 /* Data compressed! Let's save it on disk */
3470 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3471 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3472 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3473 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3474 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3483 /* Save a string objet as [len][data] on disk. If the object is a string
3484 * representation of an integer value we try to safe it in a special form */
3485 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3488 /* Try integer encoding */
3490 unsigned char buf
[5];
3491 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3492 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3497 /* Try LZF compression - under 20 bytes it's unable to compress even
3498 * aaaaaaaaaaaaaaaaaa so skip it */
3499 if (server
.rdbcompression
&& len
> 20) {
3502 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3503 if (retval
== -1) return -1;
3504 if (retval
> 0) return 0;
3505 /* retval == 0 means data can't be compressed, save the old way */
3508 /* Store verbatim */
3509 if (rdbSaveLen(fp
,len
) == -1) return -1;
3510 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3514 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3515 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3518 /* Avoid to decode the object, then encode it again, if the
3519 * object is alrady integer encoded. */
3520 if (obj
->encoding
== REDIS_ENCODING_INT
) {
3521 long val
= (long) obj
->ptr
;
3522 unsigned char buf
[5];
3525 if ((enclen
= rdbEncodeInteger(val
,buf
)) > 0) {
3526 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3529 /* otherwise... fall throught and continue with the usual
3533 /* Avoid incr/decr ref count business when possible.
3534 * This plays well with copy-on-write given that we are probably
3535 * in a child process (BGSAVE). Also this makes sure key objects
3536 * of swapped objects are not incRefCount-ed (an assert does not allow
3537 * this in order to avoid bugs) */
3538 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
3539 obj
= getDecodedObject(obj
);
3540 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3543 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3548 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3549 * 8 bit integer specifing the length of the representation.
3550 * This 8 bit integer has special values in order to specify the following
3556 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3557 unsigned char buf
[128];
3563 } else if (!isfinite(val
)) {
3565 buf
[0] = (val
< 0) ? 255 : 254;
3567 #if (DBL_MANT_DIG >= 52) && (LLONG_MAX == 0x7fffffffffffffffLL)
3568 /* Check if the float is in a safe range to be casted into a
3569 * long long. We are assuming that long long is 64 bit here.
3570 * Also we are assuming that there are no implementations around where
3571 * double has precision < 52 bit.
3573 * Under this assumptions we test if a double is inside an interval
3574 * where casting to long long is safe. Then using two castings we
3575 * make sure the decimal part is zero. If all this is true we use
3576 * integer printing function that is much faster. */
3577 double min
= -4503599627370495; /* (2^52)-1 */
3578 double max
= 4503599627370496; /* -(2^52) */
3579 if (val
> min
&& val
< max
&& val
== ((double)((long long)val
)))
3580 ll2string((char*)buf
+1,sizeof(buf
),(long long)val
);
3583 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3584 buf
[0] = strlen((char*)buf
+1);
3587 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3591 /* Save a Redis object. */
3592 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3593 if (o
->type
== REDIS_STRING
) {
3594 /* Save a string value */
3595 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3596 } else if (o
->type
== REDIS_LIST
) {
3597 /* Save a list value */
3598 list
*list
= o
->ptr
;
3602 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3603 listRewind(list
,&li
);
3604 while((ln
= listNext(&li
))) {
3605 robj
*eleobj
= listNodeValue(ln
);
3607 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3609 } else if (o
->type
== REDIS_SET
) {
3610 /* Save a set value */
3612 dictIterator
*di
= dictGetIterator(set
);
3615 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3616 while((de
= dictNext(di
)) != NULL
) {
3617 robj
*eleobj
= dictGetEntryKey(de
);
3619 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3621 dictReleaseIterator(di
);
3622 } else if (o
->type
== REDIS_ZSET
) {
3623 /* Save a set value */
3625 dictIterator
*di
= dictGetIterator(zs
->dict
);
3628 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3629 while((de
= dictNext(di
)) != NULL
) {
3630 robj
*eleobj
= dictGetEntryKey(de
);
3631 double *score
= dictGetEntryVal(de
);
3633 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3634 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3636 dictReleaseIterator(di
);
3637 } else if (o
->type
== REDIS_HASH
) {
3638 /* Save a hash value */
3639 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3640 unsigned char *p
= zipmapRewind(o
->ptr
);
3641 unsigned int count
= zipmapLen(o
->ptr
);
3642 unsigned char *key
, *val
;
3643 unsigned int klen
, vlen
;
3645 if (rdbSaveLen(fp
,count
) == -1) return -1;
3646 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3647 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3648 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3651 dictIterator
*di
= dictGetIterator(o
->ptr
);
3654 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3655 while((de
= dictNext(di
)) != NULL
) {
3656 robj
*key
= dictGetEntryKey(de
);
3657 robj
*val
= dictGetEntryVal(de
);
3659 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3660 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3662 dictReleaseIterator(di
);
3665 redisPanic("Unknown object type");
3670 /* Return the length the object will have on disk if saved with
3671 * the rdbSaveObject() function. Currently we use a trick to get
3672 * this length with very little changes to the code. In the future
3673 * we could switch to a faster solution. */
3674 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3675 if (fp
== NULL
) fp
= server
.devnull
;
3677 assert(rdbSaveObject(fp
,o
) != 1);
3681 /* Return the number of pages required to save this object in the swap file */
3682 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3683 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3685 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3688 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3689 static int rdbSave(char *filename
) {
3690 dictIterator
*di
= NULL
;
3695 time_t now
= time(NULL
);
3697 /* Wait for I/O therads to terminate, just in case this is a
3698 * foreground-saving, to avoid seeking the swap file descriptor at the
3700 if (server
.vm_enabled
)
3701 waitEmptyIOJobsQueue();
3703 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3704 fp
= fopen(tmpfile
,"w");
3706 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3709 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3710 for (j
= 0; j
< server
.dbnum
; j
++) {
3711 redisDb
*db
= server
.db
+j
;
3713 if (dictSize(d
) == 0) continue;
3714 di
= dictGetIterator(d
);
3720 /* Write the SELECT DB opcode */
3721 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3722 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3724 /* Iterate this DB writing every entry */
3725 while((de
= dictNext(di
)) != NULL
) {
3726 robj
*key
= dictGetEntryKey(de
);
3727 robj
*o
= dictGetEntryVal(de
);
3728 time_t expiretime
= getExpire(db
,key
);
3730 /* Save the expire time */
3731 if (expiretime
!= -1) {
3732 /* If this key is already expired skip it */
3733 if (expiretime
< now
) continue;
3734 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3735 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3737 /* Save the key and associated value. This requires special
3738 * handling if the value is swapped out. */
3739 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
3740 key
->storage
== REDIS_VM_SWAPPING
) {
3741 /* Save type, key, value */
3742 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3743 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3744 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3746 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3748 /* Get a preview of the object in memory */
3749 po
= vmPreviewObject(key
);
3750 /* Save type, key, value */
3751 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
;
3752 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3753 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3754 /* Remove the loaded object from memory */
3758 dictReleaseIterator(di
);
3761 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3763 /* Make sure data will not remain on the OS's output buffers */
3768 /* Use RENAME to make sure the DB file is changed atomically only
3769 * if the generate DB file is ok. */
3770 if (rename(tmpfile
,filename
) == -1) {
3771 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3775 redisLog(REDIS_NOTICE
,"DB saved on disk");
3777 server
.lastsave
= time(NULL
);
3783 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3784 if (di
) dictReleaseIterator(di
);
3788 static int rdbSaveBackground(char *filename
) {
3791 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3792 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3793 if ((childpid
= fork()) == 0) {
3795 if (server
.vm_enabled
) vmReopenSwapFile();
3797 if (rdbSave(filename
) == REDIS_OK
) {
3804 if (childpid
== -1) {
3805 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3809 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3810 server
.bgsavechildpid
= childpid
;
3811 updateDictResizePolicy();
3814 return REDIS_OK
; /* unreached */
3817 static void rdbRemoveTempFile(pid_t childpid
) {
3820 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
3824 static int rdbLoadType(FILE *fp
) {
3826 if (fread(&type
,1,1,fp
) == 0) return -1;
3830 static time_t rdbLoadTime(FILE *fp
) {
3832 if (fread(&t32
,4,1,fp
) == 0) return -1;
3833 return (time_t) t32
;
3836 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3837 * of this file for a description of how this are stored on disk.
3839 * isencoded is set to 1 if the readed length is not actually a length but
3840 * an "encoding type", check the above comments for more info */
3841 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
3842 unsigned char buf
[2];
3846 if (isencoded
) *isencoded
= 0;
3847 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3848 type
= (buf
[0]&0xC0)>>6;
3849 if (type
== REDIS_RDB_6BITLEN
) {
3850 /* Read a 6 bit len */
3852 } else if (type
== REDIS_RDB_ENCVAL
) {
3853 /* Read a 6 bit len encoding type */
3854 if (isencoded
) *isencoded
= 1;
3856 } else if (type
== REDIS_RDB_14BITLEN
) {
3857 /* Read a 14 bit len */
3858 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3859 return ((buf
[0]&0x3F)<<8)|buf
[1];
3861 /* Read a 32 bit len */
3862 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
3867 /* Load an integer-encoded object from file 'fp', with the specified
3868 * encoding type 'enctype'. If encode is true the function may return
3869 * an integer-encoded object as reply, otherwise the returned object
3870 * will always be encoded as a raw string. */
3871 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
, int encode
) {
3872 unsigned char enc
[4];
3875 if (enctype
== REDIS_RDB_ENC_INT8
) {
3876 if (fread(enc
,1,1,fp
) == 0) return NULL
;
3877 val
= (signed char)enc
[0];
3878 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
3880 if (fread(enc
,2,1,fp
) == 0) return NULL
;
3881 v
= enc
[0]|(enc
[1]<<8);
3883 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
3885 if (fread(enc
,4,1,fp
) == 0) return NULL
;
3886 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
3889 val
= 0; /* anti-warning */
3890 redisPanic("Unknown RDB integer encoding type");
3893 return createStringObjectFromLongLong(val
);
3895 return createObject(REDIS_STRING
,sdsfromlonglong(val
));
3898 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
3899 unsigned int len
, clen
;
3900 unsigned char *c
= NULL
;
3903 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3904 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3905 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
3906 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
3907 if (fread(c
,clen
,1,fp
) == 0) goto err
;
3908 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
3910 return createObject(REDIS_STRING
,val
);
3917 static robj
*rdbGenericLoadStringObject(FILE*fp
, int encode
) {
3922 len
= rdbLoadLen(fp
,&isencoded
);
3925 case REDIS_RDB_ENC_INT8
:
3926 case REDIS_RDB_ENC_INT16
:
3927 case REDIS_RDB_ENC_INT32
:
3928 return rdbLoadIntegerObject(fp
,len
,encode
);
3929 case REDIS_RDB_ENC_LZF
:
3930 return rdbLoadLzfStringObject(fp
);
3932 redisPanic("Unknown RDB encoding type");
3936 if (len
== REDIS_RDB_LENERR
) return NULL
;
3937 val
= sdsnewlen(NULL
,len
);
3938 if (len
&& fread(val
,len
,1,fp
) == 0) {
3942 return createObject(REDIS_STRING
,val
);
3945 static robj
*rdbLoadStringObject(FILE *fp
) {
3946 return rdbGenericLoadStringObject(fp
,0);
3949 static robj
*rdbLoadEncodedStringObject(FILE *fp
) {
3950 return rdbGenericLoadStringObject(fp
,1);
3953 /* For information about double serialization check rdbSaveDoubleValue() */
3954 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
3958 if (fread(&len
,1,1,fp
) == 0) return -1;
3960 case 255: *val
= R_NegInf
; return 0;
3961 case 254: *val
= R_PosInf
; return 0;
3962 case 253: *val
= R_Nan
; return 0;
3964 if (fread(buf
,len
,1,fp
) == 0) return -1;
3966 sscanf(buf
, "%lg", val
);
3971 /* Load a Redis object of the specified type from the specified file.
3972 * On success a newly allocated object is returned, otherwise NULL. */
3973 static robj
*rdbLoadObject(int type
, FILE *fp
) {
3976 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
3977 if (type
== REDIS_STRING
) {
3978 /* Read string value */
3979 if ((o
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
3980 o
= tryObjectEncoding(o
);
3981 } else if (type
== REDIS_LIST
|| type
== REDIS_SET
) {
3982 /* Read list/set value */
3985 if ((listlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3986 o
= (type
== REDIS_LIST
) ? createListObject() : createSetObject();
3987 /* It's faster to expand the dict to the right size asap in order
3988 * to avoid rehashing */
3989 if (type
== REDIS_SET
&& listlen
> DICT_HT_INITIAL_SIZE
)
3990 dictExpand(o
->ptr
,listlen
);
3991 /* Load every single element of the list/set */
3995 if ((ele
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
3996 ele
= tryObjectEncoding(ele
);
3997 if (type
== REDIS_LIST
) {
3998 listAddNodeTail((list
*)o
->ptr
,ele
);
4000 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
4003 } else if (type
== REDIS_ZSET
) {
4004 /* Read list/set value */
4008 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4009 o
= createZsetObject();
4011 /* Load every single element of the list/set */
4014 double *score
= zmalloc(sizeof(double));
4016 if ((ele
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4017 ele
= tryObjectEncoding(ele
);
4018 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
4019 dictAdd(zs
->dict
,ele
,score
);
4020 zslInsert(zs
->zsl
,*score
,ele
);
4021 incrRefCount(ele
); /* added to skiplist */
4023 } else if (type
== REDIS_HASH
) {
4026 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4027 o
= createHashObject();
4028 /* Too many entries? Use an hash table. */
4029 if (hashlen
> server
.hash_max_zipmap_entries
)
4030 convertToRealHash(o
);
4031 /* Load every key/value, then set it into the zipmap or hash
4032 * table, as needed. */
4036 if ((key
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
4037 if ((val
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
4038 /* If we are using a zipmap and there are too big values
4039 * the object is converted to real hash table encoding. */
4040 if (o
->encoding
!= REDIS_ENCODING_HT
&&
4041 (sdslen(key
->ptr
) > server
.hash_max_zipmap_value
||
4042 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
))
4044 convertToRealHash(o
);
4047 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
4048 unsigned char *zm
= o
->ptr
;
4050 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
4051 val
->ptr
,sdslen(val
->ptr
),NULL
);
4056 key
= tryObjectEncoding(key
);
4057 val
= tryObjectEncoding(val
);
4058 dictAdd((dict
*)o
->ptr
,key
,val
);
4062 redisPanic("Unknown object type");
4067 static int rdbLoad(char *filename
) {
4070 int type
, retval
, rdbver
;
4071 int swap_all_values
= 0;
4072 dict
*d
= server
.db
[0].dict
;
4073 redisDb
*db
= server
.db
+0;
4075 time_t expiretime
, now
= time(NULL
);
4076 long long loadedkeys
= 0;
4078 fp
= fopen(filename
,"r");
4079 if (!fp
) return REDIS_ERR
;
4080 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
4082 if (memcmp(buf
,"REDIS",5) != 0) {
4084 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
4087 rdbver
= atoi(buf
+5);
4090 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
4098 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
4099 if (type
== REDIS_EXPIRETIME
) {
4100 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
4101 /* We read the time so we need to read the object type again */
4102 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
4104 if (type
== REDIS_EOF
) break;
4105 /* Handle SELECT DB opcode as a special case */
4106 if (type
== REDIS_SELECTDB
) {
4107 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
4109 if (dbid
>= (unsigned)server
.dbnum
) {
4110 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
4113 db
= server
.db
+dbid
;
4118 if ((key
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
4120 if ((val
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
4121 /* Check if the key already expired */
4122 if (expiretime
!= -1 && expiretime
< now
) {
4127 /* Add the new object in the hash table */
4128 retval
= dictAdd(d
,key
,val
);
4129 if (retval
== DICT_ERR
) {
4130 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", key
->ptr
);
4134 /* Set the expire time if needed */
4135 if (expiretime
!= -1) setExpire(db
,key
,expiretime
);
4137 /* Handle swapping while loading big datasets when VM is on */
4139 /* If we detecter we are hopeless about fitting something in memory
4140 * we just swap every new key on disk. Directly...
4141 * Note that's important to check for this condition before resorting
4142 * to random sampling, otherwise we may try to swap already
4144 if (swap_all_values
) {
4145 dictEntry
*de
= dictFind(d
,key
);
4147 /* de may be NULL since the key already expired */
4149 key
= dictGetEntryKey(de
);
4150 val
= dictGetEntryVal(de
);
4152 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
4153 dictGetEntryVal(de
) = NULL
;
4159 /* If we have still some hope of having some value fitting memory
4160 * then we try random sampling. */
4161 if (!swap_all_values
&& server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
4162 while (zmalloc_used_memory() > server
.vm_max_memory
) {
4163 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
4165 if (zmalloc_used_memory() > server
.vm_max_memory
)
4166 swap_all_values
= 1; /* We are already using too much mem */
4172 eoferr
: /* unexpected end of file is handled here with a fatal exit */
4173 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
4175 return REDIS_ERR
; /* Just to avoid warning */
4178 /*================================== Shutdown =============================== */
4179 static int prepareForShutdown() {
4180 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4181 /* Kill the saving child if there is a background saving in progress.
4182 We want to avoid race conditions, for instance our saving child may
4183 overwrite the synchronous saving did by SHUTDOWN. */
4184 if (server
.bgsavechildpid
!= -1) {
4185 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4186 kill(server
.bgsavechildpid
,SIGKILL
);
4187 rdbRemoveTempFile(server
.bgsavechildpid
);
4189 if (server
.appendonly
) {
4190 /* Append only file: fsync() the AOF and exit */
4191 fsync(server
.appendfd
);
4192 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4194 /* Snapshotting. Perform a SYNC SAVE and exit */
4195 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4196 if (server
.daemonize
)
4197 unlink(server
.pidfile
);
4198 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4200 /* Ooops.. error saving! The best we can do is to continue
4201 * operating. Note that if there was a background saving process,
4202 * in the next cron() Redis will be notified that the background
4203 * saving aborted, handling special stuff like slaves pending for
4204 * synchronization... */
4205 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4209 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4213 /*================================== Commands =============================== */
4215 static void authCommand(redisClient
*c
) {
4216 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
4217 c
->authenticated
= 1;
4218 addReply(c
,shared
.ok
);
4220 c
->authenticated
= 0;
4221 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
4225 static void pingCommand(redisClient
*c
) {
4226 addReply(c
,shared
.pong
);
4229 static void echoCommand(redisClient
*c
) {
4230 addReplyBulk(c
,c
->argv
[1]);
4233 /*=================================== Strings =============================== */
4235 static void setGenericCommand(redisClient
*c
, int nx
, robj
*key
, robj
*val
, robj
*expire
) {
4237 long seconds
= 0; /* initialized to avoid an harmness warning */
4240 if (getLongFromObjectOrReply(c
, expire
, &seconds
, NULL
) != REDIS_OK
)
4243 addReplySds(c
,sdsnew("-ERR invalid expire time in SETEX\r\n"));
4248 touchWatchedKey(c
->db
,key
);
4249 if (nx
) deleteIfVolatile(c
->db
,key
);
4250 retval
= dictAdd(c
->db
->dict
,key
,val
);
4251 if (retval
== DICT_ERR
) {
4253 /* If the key is about a swapped value, we want a new key object
4254 * to overwrite the old. So we delete the old key in the database.
4255 * This will also make sure that swap pages about the old object
4256 * will be marked as free. */
4257 if (server
.vm_enabled
&& deleteIfSwapped(c
->db
,key
))
4259 dictReplace(c
->db
->dict
,key
,val
);
4262 addReply(c
,shared
.czero
);
4270 removeExpire(c
->db
,key
);
4271 if (expire
) setExpire(c
->db
,key
,time(NULL
)+seconds
);
4272 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4275 static void setCommand(redisClient
*c
) {
4276 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[2],NULL
);
4279 static void setnxCommand(redisClient
*c
) {
4280 setGenericCommand(c
,1,c
->argv
[1],c
->argv
[2],NULL
);
4283 static void setexCommand(redisClient
*c
) {
4284 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[3],c
->argv
[2]);
4287 static int getGenericCommand(redisClient
*c
) {
4290 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
4293 if (o
->type
!= REDIS_STRING
) {
4294 addReply(c
,shared
.wrongtypeerr
);
4302 static void getCommand(redisClient
*c
) {
4303 getGenericCommand(c
);
4306 static void getsetCommand(redisClient
*c
) {
4307 if (getGenericCommand(c
) == REDIS_ERR
) return;
4308 if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) {
4309 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4311 incrRefCount(c
->argv
[1]);
4313 incrRefCount(c
->argv
[2]);
4315 removeExpire(c
->db
,c
->argv
[1]);
4318 static void mgetCommand(redisClient
*c
) {
4321 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
4322 for (j
= 1; j
< c
->argc
; j
++) {
4323 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
4325 addReply(c
,shared
.nullbulk
);
4327 if (o
->type
!= REDIS_STRING
) {
4328 addReply(c
,shared
.nullbulk
);
4336 static void msetGenericCommand(redisClient
*c
, int nx
) {
4337 int j
, busykeys
= 0;
4339 if ((c
->argc
% 2) == 0) {
4340 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
4343 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
4344 * set nothing at all if at least one already key exists. */
4346 for (j
= 1; j
< c
->argc
; j
+= 2) {
4347 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
4353 addReply(c
, shared
.czero
);
4357 for (j
= 1; j
< c
->argc
; j
+= 2) {
4360 c
->argv
[j
+1] = tryObjectEncoding(c
->argv
[j
+1]);
4361 retval
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
4362 if (retval
== DICT_ERR
) {
4363 dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
4364 incrRefCount(c
->argv
[j
+1]);
4366 incrRefCount(c
->argv
[j
]);
4367 incrRefCount(c
->argv
[j
+1]);
4369 removeExpire(c
->db
,c
->argv
[j
]);
4371 server
.dirty
+= (c
->argc
-1)/2;
4372 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4375 static void msetCommand(redisClient
*c
) {
4376 msetGenericCommand(c
,0);
4379 static void msetnxCommand(redisClient
*c
) {
4380 msetGenericCommand(c
,1);
4383 static void incrDecrCommand(redisClient
*c
, long long incr
) {
4388 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4389 if (o
!= NULL
&& checkType(c
,o
,REDIS_STRING
)) return;
4390 if (getLongLongFromObjectOrReply(c
,o
,&value
,NULL
) != REDIS_OK
) return;
4393 o
= createStringObjectFromLongLong(value
);
4394 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],o
);
4395 if (retval
== DICT_ERR
) {
4396 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4397 removeExpire(c
->db
,c
->argv
[1]);
4399 incrRefCount(c
->argv
[1]);
4402 addReply(c
,shared
.colon
);
4404 addReply(c
,shared
.crlf
);
4407 static void incrCommand(redisClient
*c
) {
4408 incrDecrCommand(c
,1);
4411 static void decrCommand(redisClient
*c
) {
4412 incrDecrCommand(c
,-1);
4415 static void incrbyCommand(redisClient
*c
) {
4418 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4419 incrDecrCommand(c
,incr
);
4422 static void decrbyCommand(redisClient
*c
) {
4425 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4426 incrDecrCommand(c
,-incr
);
4429 static void appendCommand(redisClient
*c
) {
4434 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4436 /* Create the key */
4437 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4438 incrRefCount(c
->argv
[1]);
4439 incrRefCount(c
->argv
[2]);
4440 totlen
= stringObjectLen(c
->argv
[2]);
4444 de
= dictFind(c
->db
->dict
,c
->argv
[1]);
4447 o
= dictGetEntryVal(de
);
4448 if (o
->type
!= REDIS_STRING
) {
4449 addReply(c
,shared
.wrongtypeerr
);
4452 /* If the object is specially encoded or shared we have to make
4454 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
4455 robj
*decoded
= getDecodedObject(o
);
4457 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
4458 decrRefCount(decoded
);
4459 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4462 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
4463 o
->ptr
= sdscatlen(o
->ptr
,
4464 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
4466 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
4467 (unsigned long) c
->argv
[2]->ptr
);
4469 totlen
= sdslen(o
->ptr
);
4472 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
4475 static void substrCommand(redisClient
*c
) {
4477 long start
= atoi(c
->argv
[2]->ptr
);
4478 long end
= atoi(c
->argv
[3]->ptr
);
4479 size_t rangelen
, strlen
;
4482 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4483 checkType(c
,o
,REDIS_STRING
)) return;
4485 o
= getDecodedObject(o
);
4486 strlen
= sdslen(o
->ptr
);
4488 /* convert negative indexes */
4489 if (start
< 0) start
= strlen
+start
;
4490 if (end
< 0) end
= strlen
+end
;
4491 if (start
< 0) start
= 0;
4492 if (end
< 0) end
= 0;
4494 /* indexes sanity checks */
4495 if (start
> end
|| (size_t)start
>= strlen
) {
4496 /* Out of range start or start > end result in null reply */
4497 addReply(c
,shared
.nullbulk
);
4501 if ((size_t)end
>= strlen
) end
= strlen
-1;
4502 rangelen
= (end
-start
)+1;
4504 /* Return the result */
4505 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4506 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4507 addReplySds(c
,range
);
4508 addReply(c
,shared
.crlf
);
4512 /* ========================= Type agnostic commands ========================= */
4514 static void delCommand(redisClient
*c
) {
4517 for (j
= 1; j
< c
->argc
; j
++) {
4518 if (deleteKey(c
->db
,c
->argv
[j
])) {
4519 touchWatchedKey(c
->db
,c
->argv
[j
]);
4524 addReplyLongLong(c
,deleted
);
4527 static void existsCommand(redisClient
*c
) {
4528 expireIfNeeded(c
->db
,c
->argv
[1]);
4529 if (dictFind(c
->db
->dict
,c
->argv
[1])) {
4530 addReply(c
, shared
.cone
);
4532 addReply(c
, shared
.czero
);
4536 static void selectCommand(redisClient
*c
) {
4537 int id
= atoi(c
->argv
[1]->ptr
);
4539 if (selectDb(c
,id
) == REDIS_ERR
) {
4540 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4542 addReply(c
,shared
.ok
);
4546 static void randomkeyCommand(redisClient
*c
) {
4551 de
= dictGetRandomKey(c
->db
->dict
);
4552 if (!de
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break;
4556 addReply(c
,shared
.nullbulk
);
4560 key
= dictGetEntryKey(de
);
4561 if (server
.vm_enabled
) {
4562 key
= dupStringObject(key
);
4563 addReplyBulk(c
,key
);
4566 addReplyBulk(c
,key
);
4570 static void keysCommand(redisClient
*c
) {
4573 sds pattern
= c
->argv
[1]->ptr
;
4574 int plen
= sdslen(pattern
);
4575 unsigned long numkeys
= 0;
4576 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4578 di
= dictGetIterator(c
->db
->dict
);
4580 decrRefCount(lenobj
);
4581 while((de
= dictNext(di
)) != NULL
) {
4582 robj
*keyobj
= dictGetEntryKey(de
);
4584 sds key
= keyobj
->ptr
;
4585 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4586 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4587 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4588 addReplyBulk(c
,keyobj
);
4593 dictReleaseIterator(di
);
4594 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4597 static void dbsizeCommand(redisClient
*c
) {
4599 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4602 static void lastsaveCommand(redisClient
*c
) {
4604 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4607 static void typeCommand(redisClient
*c
) {
4611 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4616 case REDIS_STRING
: type
= "+string"; break;
4617 case REDIS_LIST
: type
= "+list"; break;
4618 case REDIS_SET
: type
= "+set"; break;
4619 case REDIS_ZSET
: type
= "+zset"; break;
4620 case REDIS_HASH
: type
= "+hash"; break;
4621 default: type
= "+unknown"; break;
4624 addReplySds(c
,sdsnew(type
));
4625 addReply(c
,shared
.crlf
);
4628 static void saveCommand(redisClient
*c
) {
4629 if (server
.bgsavechildpid
!= -1) {
4630 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4633 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4634 addReply(c
,shared
.ok
);
4636 addReply(c
,shared
.err
);
4640 static void bgsaveCommand(redisClient
*c
) {
4641 if (server
.bgsavechildpid
!= -1) {
4642 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4645 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4646 char *status
= "+Background saving started\r\n";
4647 addReplySds(c
,sdsnew(status
));
4649 addReply(c
,shared
.err
);
4653 static void shutdownCommand(redisClient
*c
) {
4654 if (prepareForShutdown() == REDIS_OK
)
4656 addReplySds(c
, sdsnew("-ERR Errors trying to SHUTDOWN. Check logs.\r\n"));
4659 static void renameGenericCommand(redisClient
*c
, int nx
) {
4662 /* To use the same key as src and dst is probably an error */
4663 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4664 addReply(c
,shared
.sameobjecterr
);
4668 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4672 deleteIfVolatile(c
->db
,c
->argv
[2]);
4673 if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) {
4676 addReply(c
,shared
.czero
);
4679 dictReplace(c
->db
->dict
,c
->argv
[2],o
);
4681 incrRefCount(c
->argv
[2]);
4683 deleteKey(c
->db
,c
->argv
[1]);
4685 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4688 static void renameCommand(redisClient
*c
) {
4689 renameGenericCommand(c
,0);
4692 static void renamenxCommand(redisClient
*c
) {
4693 renameGenericCommand(c
,1);
4696 static void moveCommand(redisClient
*c
) {
4701 /* Obtain source and target DB pointers */
4704 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4705 addReply(c
,shared
.outofrangeerr
);
4709 selectDb(c
,srcid
); /* Back to the source DB */
4711 /* If the user is moving using as target the same
4712 * DB as the source DB it is probably an error. */
4714 addReply(c
,shared
.sameobjecterr
);
4718 /* Check if the element exists and get a reference */
4719 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4721 addReply(c
,shared
.czero
);
4725 /* Try to add the element to the target DB */
4726 deleteIfVolatile(dst
,c
->argv
[1]);
4727 if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) {
4728 addReply(c
,shared
.czero
);
4731 incrRefCount(c
->argv
[1]);
4734 /* OK! key moved, free the entry in the source DB */
4735 deleteKey(src
,c
->argv
[1]);
4737 addReply(c
,shared
.cone
);
4740 /* =================================== Lists ================================ */
4741 static void pushGenericCommand(redisClient
*c
, int where
) {
4745 lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4747 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4748 addReply(c
,shared
.cone
);
4751 lobj
= createListObject();
4753 if (where
== REDIS_HEAD
) {
4754 listAddNodeHead(list
,c
->argv
[2]);
4756 listAddNodeTail(list
,c
->argv
[2]);
4758 dictAdd(c
->db
->dict
,c
->argv
[1],lobj
);
4759 incrRefCount(c
->argv
[1]);
4760 incrRefCount(c
->argv
[2]);
4762 if (lobj
->type
!= REDIS_LIST
) {
4763 addReply(c
,shared
.wrongtypeerr
);
4766 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4767 addReply(c
,shared
.cone
);
4771 if (where
== REDIS_HEAD
) {
4772 listAddNodeHead(list
,c
->argv
[2]);
4774 listAddNodeTail(list
,c
->argv
[2]);
4776 incrRefCount(c
->argv
[2]);
4779 addReplyLongLong(c
,listLength(list
));
4782 static void lpushCommand(redisClient
*c
) {
4783 pushGenericCommand(c
,REDIS_HEAD
);
4786 static void rpushCommand(redisClient
*c
) {
4787 pushGenericCommand(c
,REDIS_TAIL
);
4790 static void llenCommand(redisClient
*c
) {
4794 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4795 checkType(c
,o
,REDIS_LIST
)) return;
4798 addReplyUlong(c
,listLength(l
));
4801 static void lindexCommand(redisClient
*c
) {
4803 int index
= atoi(c
->argv
[2]->ptr
);
4807 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4808 checkType(c
,o
,REDIS_LIST
)) return;
4811 ln
= listIndex(list
, index
);
4813 addReply(c
,shared
.nullbulk
);
4815 robj
*ele
= listNodeValue(ln
);
4816 addReplyBulk(c
,ele
);
4820 static void lsetCommand(redisClient
*c
) {
4822 int index
= atoi(c
->argv
[2]->ptr
);
4826 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
||
4827 checkType(c
,o
,REDIS_LIST
)) return;
4830 ln
= listIndex(list
, index
);
4832 addReply(c
,shared
.outofrangeerr
);
4834 robj
*ele
= listNodeValue(ln
);
4837 listNodeValue(ln
) = c
->argv
[3];
4838 incrRefCount(c
->argv
[3]);
4839 addReply(c
,shared
.ok
);
4844 static void popGenericCommand(redisClient
*c
, int where
) {
4849 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4850 checkType(c
,o
,REDIS_LIST
)) return;
4853 if (where
== REDIS_HEAD
)
4854 ln
= listFirst(list
);
4856 ln
= listLast(list
);
4859 addReply(c
,shared
.nullbulk
);
4861 robj
*ele
= listNodeValue(ln
);
4862 addReplyBulk(c
,ele
);
4863 listDelNode(list
,ln
);
4864 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4869 static void lpopCommand(redisClient
*c
) {
4870 popGenericCommand(c
,REDIS_HEAD
);
4873 static void rpopCommand(redisClient
*c
) {
4874 popGenericCommand(c
,REDIS_TAIL
);
4877 static void lrangeCommand(redisClient
*c
) {
4879 int start
= atoi(c
->argv
[2]->ptr
);
4880 int end
= atoi(c
->argv
[3]->ptr
);
4887 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
4888 || checkType(c
,o
,REDIS_LIST
)) return;
4890 llen
= listLength(list
);
4892 /* convert negative indexes */
4893 if (start
< 0) start
= llen
+start
;
4894 if (end
< 0) end
= llen
+end
;
4895 if (start
< 0) start
= 0;
4896 if (end
< 0) end
= 0;
4898 /* indexes sanity checks */
4899 if (start
> end
|| start
>= llen
) {
4900 /* Out of range start or start > end result in empty list */
4901 addReply(c
,shared
.emptymultibulk
);
4904 if (end
>= llen
) end
= llen
-1;
4905 rangelen
= (end
-start
)+1;
4907 /* Return the result in form of a multi-bulk reply */
4908 ln
= listIndex(list
, start
);
4909 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
4910 for (j
= 0; j
< rangelen
; j
++) {
4911 ele
= listNodeValue(ln
);
4912 addReplyBulk(c
,ele
);
4917 static void ltrimCommand(redisClient
*c
) {
4919 int start
= atoi(c
->argv
[2]->ptr
);
4920 int end
= atoi(c
->argv
[3]->ptr
);
4922 int j
, ltrim
, rtrim
;
4926 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
4927 checkType(c
,o
,REDIS_LIST
)) return;
4929 llen
= listLength(list
);
4931 /* convert negative indexes */
4932 if (start
< 0) start
= llen
+start
;
4933 if (end
< 0) end
= llen
+end
;
4934 if (start
< 0) start
= 0;
4935 if (end
< 0) end
= 0;
4937 /* indexes sanity checks */
4938 if (start
> end
|| start
>= llen
) {
4939 /* Out of range start or start > end result in empty list */
4943 if (end
>= llen
) end
= llen
-1;
4948 /* Remove list elements to perform the trim */
4949 for (j
= 0; j
< ltrim
; j
++) {
4950 ln
= listFirst(list
);
4951 listDelNode(list
,ln
);
4953 for (j
= 0; j
< rtrim
; j
++) {
4954 ln
= listLast(list
);
4955 listDelNode(list
,ln
);
4957 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4959 addReply(c
,shared
.ok
);
4962 static void lremCommand(redisClient
*c
) {
4965 listNode
*ln
, *next
;
4966 int toremove
= atoi(c
->argv
[2]->ptr
);
4970 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4971 checkType(c
,o
,REDIS_LIST
)) return;
4975 toremove
= -toremove
;
4978 ln
= fromtail
? list
->tail
: list
->head
;
4980 robj
*ele
= listNodeValue(ln
);
4982 next
= fromtail
? ln
->prev
: ln
->next
;
4983 if (equalStringObjects(ele
,c
->argv
[3])) {
4984 listDelNode(list
,ln
);
4987 if (toremove
&& removed
== toremove
) break;
4991 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4992 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
4995 /* This is the semantic of this command:
4996 * RPOPLPUSH srclist dstlist:
4997 * IF LLEN(srclist) > 0
4998 * element = RPOP srclist
4999 * LPUSH dstlist element
5006 * The idea is to be able to get an element from a list in a reliable way
5007 * since the element is not just returned but pushed against another list
5008 * as well. This command was originally proposed by Ezra Zygmuntowicz.
5010 static void rpoplpushcommand(redisClient
*c
) {
5015 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5016 checkType(c
,sobj
,REDIS_LIST
)) return;
5017 srclist
= sobj
->ptr
;
5018 ln
= listLast(srclist
);
5021 addReply(c
,shared
.nullbulk
);
5023 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
5024 robj
*ele
= listNodeValue(ln
);
5027 if (dobj
&& dobj
->type
!= REDIS_LIST
) {
5028 addReply(c
,shared
.wrongtypeerr
);
5032 /* Add the element to the target list (unless it's directly
5033 * passed to some BLPOP-ing client */
5034 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) {
5036 /* Create the list if the key does not exist */
5037 dobj
= createListObject();
5038 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
);
5039 incrRefCount(c
->argv
[2]);
5041 dstlist
= dobj
->ptr
;
5042 listAddNodeHead(dstlist
,ele
);
5046 /* Send the element to the client as reply as well */
5047 addReplyBulk(c
,ele
);
5049 /* Finally remove the element from the source list */
5050 listDelNode(srclist
,ln
);
5051 if (listLength(srclist
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5056 /* ==================================== Sets ================================ */
5058 static void saddCommand(redisClient
*c
) {
5061 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5063 set
= createSetObject();
5064 dictAdd(c
->db
->dict
,c
->argv
[1],set
);
5065 incrRefCount(c
->argv
[1]);
5067 if (set
->type
!= REDIS_SET
) {
5068 addReply(c
,shared
.wrongtypeerr
);
5072 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
5073 incrRefCount(c
->argv
[2]);
5075 addReply(c
,shared
.cone
);
5077 addReply(c
,shared
.czero
);
5081 static void sremCommand(redisClient
*c
) {
5084 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5085 checkType(c
,set
,REDIS_SET
)) return;
5087 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
5089 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
5090 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5091 addReply(c
,shared
.cone
);
5093 addReply(c
,shared
.czero
);
5097 static void smoveCommand(redisClient
*c
) {
5098 robj
*srcset
, *dstset
;
5100 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5101 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
5103 /* If the source key does not exist return 0, if it's of the wrong type
5105 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
5106 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
5109 /* Error if the destination key is not a set as well */
5110 if (dstset
&& dstset
->type
!= REDIS_SET
) {
5111 addReply(c
,shared
.wrongtypeerr
);
5114 /* Remove the element from the source set */
5115 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
5116 /* Key not found in the src set! return zero */
5117 addReply(c
,shared
.czero
);
5120 if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset
!= dstset
)
5121 deleteKey(c
->db
,c
->argv
[1]);
5123 /* Add the element to the destination set */
5125 dstset
= createSetObject();
5126 dictAdd(c
->db
->dict
,c
->argv
[2],dstset
);
5127 incrRefCount(c
->argv
[2]);
5129 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
5130 incrRefCount(c
->argv
[3]);
5131 addReply(c
,shared
.cone
);
5134 static void sismemberCommand(redisClient
*c
) {
5137 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5138 checkType(c
,set
,REDIS_SET
)) return;
5140 if (dictFind(set
->ptr
,c
->argv
[2]))
5141 addReply(c
,shared
.cone
);
5143 addReply(c
,shared
.czero
);
5146 static void scardCommand(redisClient
*c
) {
5150 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5151 checkType(c
,o
,REDIS_SET
)) return;
5154 addReplyUlong(c
,dictSize(s
));
5157 static void spopCommand(redisClient
*c
) {
5161 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5162 checkType(c
,set
,REDIS_SET
)) return;
5164 de
= dictGetRandomKey(set
->ptr
);
5166 addReply(c
,shared
.nullbulk
);
5168 robj
*ele
= dictGetEntryKey(de
);
5170 addReplyBulk(c
,ele
);
5171 dictDelete(set
->ptr
,ele
);
5172 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
5173 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5178 static void srandmemberCommand(redisClient
*c
) {
5182 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5183 checkType(c
,set
,REDIS_SET
)) return;
5185 de
= dictGetRandomKey(set
->ptr
);
5187 addReply(c
,shared
.nullbulk
);
5189 robj
*ele
= dictGetEntryKey(de
);
5191 addReplyBulk(c
,ele
);
5195 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
5196 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
5198 return dictSize(*d1
)-dictSize(*d2
);
5201 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
5202 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
5205 robj
*lenobj
= NULL
, *dstset
= NULL
;
5206 unsigned long j
, cardinality
= 0;
5208 for (j
= 0; j
< setsnum
; j
++) {
5212 lookupKeyWrite(c
->db
,setskeys
[j
]) :
5213 lookupKeyRead(c
->db
,setskeys
[j
]);
5217 if (deleteKey(c
->db
,dstkey
))
5219 addReply(c
,shared
.czero
);
5221 addReply(c
,shared
.emptymultibulk
);
5225 if (setobj
->type
!= REDIS_SET
) {
5227 addReply(c
,shared
.wrongtypeerr
);
5230 dv
[j
] = setobj
->ptr
;
5232 /* Sort sets from the smallest to largest, this will improve our
5233 * algorithm's performace */
5234 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
5236 /* The first thing we should output is the total number of elements...
5237 * since this is a multi-bulk write, but at this stage we don't know
5238 * the intersection set size, so we use a trick, append an empty object
5239 * to the output list and save the pointer to later modify it with the
5242 lenobj
= createObject(REDIS_STRING
,NULL
);
5244 decrRefCount(lenobj
);
5246 /* If we have a target key where to store the resulting set
5247 * create this key with an empty set inside */
5248 dstset
= createSetObject();
5251 /* Iterate all the elements of the first (smallest) set, and test
5252 * the element against all the other sets, if at least one set does
5253 * not include the element it is discarded */
5254 di
= dictGetIterator(dv
[0]);
5256 while((de
= dictNext(di
)) != NULL
) {
5259 for (j
= 1; j
< setsnum
; j
++)
5260 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
5262 continue; /* at least one set does not contain the member */
5263 ele
= dictGetEntryKey(de
);
5265 addReplyBulk(c
,ele
);
5268 dictAdd(dstset
->ptr
,ele
,NULL
);
5272 dictReleaseIterator(di
);
5275 /* Store the resulting set into the target, if the intersection
5276 * is not an empty set. */
5277 deleteKey(c
->db
,dstkey
);
5278 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5279 dictAdd(c
->db
->dict
,dstkey
,dstset
);
5280 incrRefCount(dstkey
);
5281 addReplyLongLong(c
,dictSize((dict
*)dstset
->ptr
));
5283 decrRefCount(dstset
);
5284 addReply(c
,shared
.czero
);
5288 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
5293 static void sinterCommand(redisClient
*c
) {
5294 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
5297 static void sinterstoreCommand(redisClient
*c
) {
5298 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
5301 #define REDIS_OP_UNION 0
5302 #define REDIS_OP_DIFF 1
5303 #define REDIS_OP_INTER 2
5305 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
5306 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
5309 robj
*dstset
= NULL
;
5310 int j
, cardinality
= 0;
5312 for (j
= 0; j
< setsnum
; j
++) {
5316 lookupKeyWrite(c
->db
,setskeys
[j
]) :
5317 lookupKeyRead(c
->db
,setskeys
[j
]);
5322 if (setobj
->type
!= REDIS_SET
) {
5324 addReply(c
,shared
.wrongtypeerr
);
5327 dv
[j
] = setobj
->ptr
;
5330 /* We need a temp set object to store our union. If the dstkey
5331 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
5332 * this set object will be the resulting object to set into the target key*/
5333 dstset
= createSetObject();
5335 /* Iterate all the elements of all the sets, add every element a single
5336 * time to the result set */
5337 for (j
= 0; j
< setsnum
; j
++) {
5338 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
5339 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
5341 di
= dictGetIterator(dv
[j
]);
5343 while((de
= dictNext(di
)) != NULL
) {
5346 /* dictAdd will not add the same element multiple times */
5347 ele
= dictGetEntryKey(de
);
5348 if (op
== REDIS_OP_UNION
|| j
== 0) {
5349 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
5353 } else if (op
== REDIS_OP_DIFF
) {
5354 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
5359 dictReleaseIterator(di
);
5361 /* result set is empty? Exit asap. */
5362 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
5365 /* Output the content of the resulting set, if not in STORE mode */
5367 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
5368 di
= dictGetIterator(dstset
->ptr
);
5369 while((de
= dictNext(di
)) != NULL
) {
5372 ele
= dictGetEntryKey(de
);
5373 addReplyBulk(c
,ele
);
5375 dictReleaseIterator(di
);
5376 decrRefCount(dstset
);
5378 /* If we have a target key where to store the resulting set
5379 * create this key with the result set inside */
5380 deleteKey(c
->db
,dstkey
);
5381 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5382 dictAdd(c
->db
->dict
,dstkey
,dstset
);
5383 incrRefCount(dstkey
);
5384 addReplyLongLong(c
,dictSize((dict
*)dstset
->ptr
));
5386 decrRefCount(dstset
);
5387 addReply(c
,shared
.czero
);
5394 static void sunionCommand(redisClient
*c
) {
5395 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
5398 static void sunionstoreCommand(redisClient
*c
) {
5399 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
5402 static void sdiffCommand(redisClient
*c
) {
5403 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
5406 static void sdiffstoreCommand(redisClient
*c
) {
5407 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
5410 /* ==================================== ZSets =============================== */
5412 /* ZSETs are ordered sets using two data structures to hold the same elements
5413 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
5416 * The elements are added to an hash table mapping Redis objects to scores.
5417 * At the same time the elements are added to a skip list mapping scores
5418 * to Redis objects (so objects are sorted by scores in this "view"). */
5420 /* This skiplist implementation is almost a C translation of the original
5421 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
5422 * Alternative to Balanced Trees", modified in three ways:
5423 * a) this implementation allows for repeated values.
5424 * b) the comparison is not just by key (our 'score') but by satellite data.
5425 * c) there is a back pointer, so it's a doubly linked list with the back
5426 * pointers being only at "level 1". This allows to traverse the list
5427 * from tail to head, useful for ZREVRANGE. */
5429 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
5430 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
5432 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
5434 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
5442 static zskiplist
*zslCreate(void) {
5446 zsl
= zmalloc(sizeof(*zsl
));
5449 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
5450 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
5451 zsl
->header
->forward
[j
] = NULL
;
5453 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
5454 if (j
< ZSKIPLIST_MAXLEVEL
-1)
5455 zsl
->header
->span
[j
] = 0;
5457 zsl
->header
->backward
= NULL
;
5462 static void zslFreeNode(zskiplistNode
*node
) {
5463 decrRefCount(node
->obj
);
5464 zfree(node
->forward
);
5469 static void zslFree(zskiplist
*zsl
) {
5470 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
5472 zfree(zsl
->header
->forward
);
5473 zfree(zsl
->header
->span
);
5476 next
= node
->forward
[0];
5483 static int zslRandomLevel(void) {
5485 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
5487 return (level
<ZSKIPLIST_MAXLEVEL
) ? level
: ZSKIPLIST_MAXLEVEL
;
5490 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
5491 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5492 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
5496 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5497 /* store rank that is crossed to reach the insert position */
5498 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
5500 while (x
->forward
[i
] &&
5501 (x
->forward
[i
]->score
< score
||
5502 (x
->forward
[i
]->score
== score
&&
5503 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
5504 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
5509 /* we assume the key is not already inside, since we allow duplicated
5510 * scores, and the re-insertion of score and redis object should never
5511 * happpen since the caller of zslInsert() should test in the hash table
5512 * if the element is already inside or not. */
5513 level
= zslRandomLevel();
5514 if (level
> zsl
->level
) {
5515 for (i
= zsl
->level
; i
< level
; i
++) {
5517 update
[i
] = zsl
->header
;
5518 update
[i
]->span
[i
-1] = zsl
->length
;
5522 x
= zslCreateNode(level
,score
,obj
);
5523 for (i
= 0; i
< level
; i
++) {
5524 x
->forward
[i
] = update
[i
]->forward
[i
];
5525 update
[i
]->forward
[i
] = x
;
5527 /* update span covered by update[i] as x is inserted here */
5529 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5530 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5534 /* increment span for untouched levels */
5535 for (i
= level
; i
< zsl
->level
; i
++) {
5536 update
[i
]->span
[i
-1]++;
5539 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
5541 x
->forward
[0]->backward
= x
;
5547 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
5548 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
5550 for (i
= 0; i
< zsl
->level
; i
++) {
5551 if (update
[i
]->forward
[i
] == x
) {
5553 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5555 update
[i
]->forward
[i
] = x
->forward
[i
];
5557 /* invariant: i > 0, because update[0]->forward[0]
5558 * is always equal to x */
5559 update
[i
]->span
[i
-1] -= 1;
5562 if (x
->forward
[0]) {
5563 x
->forward
[0]->backward
= x
->backward
;
5565 zsl
->tail
= x
->backward
;
5567 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5572 /* Delete an element with matching score/object from the skiplist. */
5573 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
5574 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5578 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5579 while (x
->forward
[i
] &&
5580 (x
->forward
[i
]->score
< score
||
5581 (x
->forward
[i
]->score
== score
&&
5582 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
5586 /* We may have multiple elements with the same score, what we need
5587 * is to find the element with both the right score and object. */
5589 if (x
&& score
== x
->score
&& equalStringObjects(x
->obj
,obj
)) {
5590 zslDeleteNode(zsl
, x
, update
);
5594 return 0; /* not found */
5596 return 0; /* not found */
5599 /* Delete all the elements with score between min and max from the skiplist.
5600 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5601 * Note that this function takes the reference to the hash table view of the
5602 * sorted set, in order to remove the elements from the hash table too. */
5603 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
5604 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5605 unsigned long removed
= 0;
5609 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5610 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
5614 /* We may have multiple elements with the same score, what we need
5615 * is to find the element with both the right score and object. */
5617 while (x
&& x
->score
<= max
) {
5618 zskiplistNode
*next
= x
->forward
[0];
5619 zslDeleteNode(zsl
, x
, update
);
5620 dictDelete(dict
,x
->obj
);
5625 return removed
; /* not found */
5628 /* Delete all the elements with rank between start and end from the skiplist.
5629 * Start and end are inclusive. Note that start and end need to be 1-based */
5630 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
5631 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5632 unsigned long traversed
= 0, removed
= 0;
5636 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5637 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
5638 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5646 while (x
&& traversed
<= end
) {
5647 zskiplistNode
*next
= x
->forward
[0];
5648 zslDeleteNode(zsl
, x
, update
);
5649 dictDelete(dict
,x
->obj
);
5658 /* Find the first node having a score equal or greater than the specified one.
5659 * Returns NULL if there is no match. */
5660 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
5665 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5666 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
5669 /* We may have multiple elements with the same score, what we need
5670 * is to find the element with both the right score and object. */
5671 return x
->forward
[0];
5674 /* Find the rank for an element by both score and key.
5675 * Returns 0 when the element cannot be found, rank otherwise.
5676 * Note that the rank is 1-based due to the span of zsl->header to the
5678 static unsigned long zslGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
5680 unsigned long rank
= 0;
5684 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5685 while (x
->forward
[i
] &&
5686 (x
->forward
[i
]->score
< score
||
5687 (x
->forward
[i
]->score
== score
&&
5688 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
5689 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
5693 /* x might be equal to zsl->header, so test if obj is non-NULL */
5694 if (x
->obj
&& equalStringObjects(x
->obj
,o
)) {
5701 /* Finds an element by its rank. The rank argument needs to be 1-based. */
5702 zskiplistNode
* zslGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
5704 unsigned long traversed
= 0;
5708 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5709 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
5711 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5714 if (traversed
== rank
) {
5721 /* The actual Z-commands implementations */
5723 /* This generic command implements both ZADD and ZINCRBY.
5724 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
5725 * the increment if the operation is a ZINCRBY (doincrement == 1). */
5726 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
5731 zsetobj
= lookupKeyWrite(c
->db
,key
);
5732 if (zsetobj
== NULL
) {
5733 zsetobj
= createZsetObject();
5734 dictAdd(c
->db
->dict
,key
,zsetobj
);
5737 if (zsetobj
->type
!= REDIS_ZSET
) {
5738 addReply(c
,shared
.wrongtypeerr
);
5744 /* Ok now since we implement both ZADD and ZINCRBY here the code
5745 * needs to handle the two different conditions. It's all about setting
5746 * '*score', that is, the new score to set, to the right value. */
5747 score
= zmalloc(sizeof(double));
5751 /* Read the old score. If the element was not present starts from 0 */
5752 de
= dictFind(zs
->dict
,ele
);
5754 double *oldscore
= dictGetEntryVal(de
);
5755 *score
= *oldscore
+ scoreval
;
5763 /* What follows is a simple remove and re-insert operation that is common
5764 * to both ZADD and ZINCRBY... */
5765 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
5766 /* case 1: New element */
5767 incrRefCount(ele
); /* added to hash */
5768 zslInsert(zs
->zsl
,*score
,ele
);
5769 incrRefCount(ele
); /* added to skiplist */
5772 addReplyDouble(c
,*score
);
5774 addReply(c
,shared
.cone
);
5779 /* case 2: Score update operation */
5780 de
= dictFind(zs
->dict
,ele
);
5781 redisAssert(de
!= NULL
);
5782 oldscore
= dictGetEntryVal(de
);
5783 if (*score
!= *oldscore
) {
5786 /* Remove and insert the element in the skip list with new score */
5787 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
5788 redisAssert(deleted
!= 0);
5789 zslInsert(zs
->zsl
,*score
,ele
);
5791 /* Update the score in the hash table */
5792 dictReplace(zs
->dict
,ele
,score
);
5798 addReplyDouble(c
,*score
);
5800 addReply(c
,shared
.czero
);
5804 static void zaddCommand(redisClient
*c
) {
5807 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
5808 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
5811 static void zincrbyCommand(redisClient
*c
) {
5814 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
5815 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
5818 static void zremCommand(redisClient
*c
) {
5825 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5826 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5829 de
= dictFind(zs
->dict
,c
->argv
[2]);
5831 addReply(c
,shared
.czero
);
5834 /* Delete from the skiplist */
5835 oldscore
= dictGetEntryVal(de
);
5836 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
5837 redisAssert(deleted
!= 0);
5839 /* Delete from the hash table */
5840 dictDelete(zs
->dict
,c
->argv
[2]);
5841 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5842 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5844 addReply(c
,shared
.cone
);
5847 static void zremrangebyscoreCommand(redisClient
*c
) {
5854 if ((getDoubleFromObjectOrReply(c
, c
->argv
[2], &min
, NULL
) != REDIS_OK
) ||
5855 (getDoubleFromObjectOrReply(c
, c
->argv
[3], &max
, NULL
) != REDIS_OK
)) return;
5857 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5858 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5861 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
5862 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5863 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5864 server
.dirty
+= deleted
;
5865 addReplyLongLong(c
,deleted
);
5868 static void zremrangebyrankCommand(redisClient
*c
) {
5876 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
5877 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
5879 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5880 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5882 llen
= zs
->zsl
->length
;
5884 /* convert negative indexes */
5885 if (start
< 0) start
= llen
+start
;
5886 if (end
< 0) end
= llen
+end
;
5887 if (start
< 0) start
= 0;
5888 if (end
< 0) end
= 0;
5890 /* indexes sanity checks */
5891 if (start
> end
|| start
>= llen
) {
5892 addReply(c
,shared
.czero
);
5895 if (end
>= llen
) end
= llen
-1;
5897 /* increment start and end because zsl*Rank functions
5898 * use 1-based rank */
5899 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
5900 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5901 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5902 server
.dirty
+= deleted
;
5903 addReplyLongLong(c
, deleted
);
5911 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
5912 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
5913 unsigned long size1
, size2
;
5914 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
5915 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
5916 return size1
- size2
;
5919 #define REDIS_AGGR_SUM 1
5920 #define REDIS_AGGR_MIN 2
5921 #define REDIS_AGGR_MAX 3
5923 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
5924 if (aggregate
== REDIS_AGGR_SUM
) {
5925 *target
= *target
+ val
;
5926 } else if (aggregate
== REDIS_AGGR_MIN
) {
5927 *target
= val
< *target
? val
: *target
;
5928 } else if (aggregate
== REDIS_AGGR_MAX
) {
5929 *target
= val
> *target
? val
: *target
;
5932 redisPanic("Unknown ZUNION/INTER aggregate type");
5936 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
5938 int aggregate
= REDIS_AGGR_SUM
;
5945 /* expect zsetnum input keys to be given */
5946 zsetnum
= atoi(c
->argv
[2]->ptr
);
5948 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNIONSTORE/ZINTERSTORE\r\n"));
5952 /* test if the expected number of keys would overflow */
5953 if (3+zsetnum
> c
->argc
) {
5954 addReply(c
,shared
.syntaxerr
);
5958 /* read keys to be used for input */
5959 src
= zmalloc(sizeof(zsetopsrc
) * zsetnum
);
5960 for (i
= 0, j
= 3; i
< zsetnum
; i
++, j
++) {
5961 robj
*zsetobj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
5965 if (zsetobj
->type
!= REDIS_ZSET
) {
5967 addReply(c
,shared
.wrongtypeerr
);
5970 src
[i
].dict
= ((zset
*)zsetobj
->ptr
)->dict
;
5973 /* default all weights to 1 */
5974 src
[i
].weight
= 1.0;
5977 /* parse optional extra arguments */
5979 int remaining
= c
->argc
- j
;
5982 if (remaining
>= (zsetnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
5984 for (i
= 0; i
< zsetnum
; i
++, j
++, remaining
--) {
5985 if (getDoubleFromObjectOrReply(c
, c
->argv
[j
], &src
[i
].weight
, NULL
) != REDIS_OK
)
5988 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
5990 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
5991 aggregate
= REDIS_AGGR_SUM
;
5992 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
5993 aggregate
= REDIS_AGGR_MIN
;
5994 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
5995 aggregate
= REDIS_AGGR_MAX
;
5998 addReply(c
,shared
.syntaxerr
);
6004 addReply(c
,shared
.syntaxerr
);
6010 /* sort sets from the smallest to largest, this will improve our
6011 * algorithm's performance */
6012 qsort(src
,zsetnum
,sizeof(zsetopsrc
), qsortCompareZsetopsrcByCardinality
);
6014 dstobj
= createZsetObject();
6015 dstzset
= dstobj
->ptr
;
6017 if (op
== REDIS_OP_INTER
) {
6018 /* skip going over all entries if the smallest zset is NULL or empty */
6019 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
6020 /* precondition: as src[0].dict is non-empty and the zsets are ordered
6021 * from small to large, all src[i > 0].dict are non-empty too */
6022 di
= dictGetIterator(src
[0].dict
);
6023 while((de
= dictNext(di
)) != NULL
) {
6024 double *score
= zmalloc(sizeof(double)), value
;
6025 *score
= src
[0].weight
* (*(double*)dictGetEntryVal(de
));
6027 for (j
= 1; j
< zsetnum
; j
++) {
6028 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
6030 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
6031 zunionInterAggregate(score
, value
, aggregate
);
6037 /* skip entry when not present in every source dict */
6041 robj
*o
= dictGetEntryKey(de
);
6042 dictAdd(dstzset
->dict
,o
,score
);
6043 incrRefCount(o
); /* added to dictionary */
6044 zslInsert(dstzset
->zsl
,*score
,o
);
6045 incrRefCount(o
); /* added to skiplist */
6048 dictReleaseIterator(di
);
6050 } else if (op
== REDIS_OP_UNION
) {
6051 for (i
= 0; i
< zsetnum
; i
++) {
6052 if (!src
[i
].dict
) continue;
6054 di
= dictGetIterator(src
[i
].dict
);
6055 while((de
= dictNext(di
)) != NULL
) {
6056 /* skip key when already processed */
6057 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
6059 double *score
= zmalloc(sizeof(double)), value
;
6060 *score
= src
[i
].weight
* (*(double*)dictGetEntryVal(de
));
6062 /* because the zsets are sorted by size, its only possible
6063 * for sets at larger indices to hold this entry */
6064 for (j
= (i
+1); j
< zsetnum
; j
++) {
6065 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
6067 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
6068 zunionInterAggregate(score
, value
, aggregate
);
6072 robj
*o
= dictGetEntryKey(de
);
6073 dictAdd(dstzset
->dict
,o
,score
);
6074 incrRefCount(o
); /* added to dictionary */
6075 zslInsert(dstzset
->zsl
,*score
,o
);
6076 incrRefCount(o
); /* added to skiplist */
6078 dictReleaseIterator(di
);
6081 /* unknown operator */
6082 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
6085 deleteKey(c
->db
,dstkey
);
6086 if (dstzset
->zsl
->length
) {
6087 dictAdd(c
->db
->dict
,dstkey
,dstobj
);
6088 incrRefCount(dstkey
);
6089 addReplyLongLong(c
, dstzset
->zsl
->length
);
6092 decrRefCount(dstobj
);
6093 addReply(c
, shared
.czero
);
6098 static void zunionstoreCommand(redisClient
*c
) {
6099 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
6102 static void zinterstoreCommand(redisClient
*c
) {
6103 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
6106 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
6118 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
6119 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
6121 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
6123 } else if (c
->argc
>= 5) {
6124 addReply(c
,shared
.syntaxerr
);
6128 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6129 || checkType(c
,o
,REDIS_ZSET
)) return;
6134 /* convert negative indexes */
6135 if (start
< 0) start
= llen
+start
;
6136 if (end
< 0) end
= llen
+end
;
6137 if (start
< 0) start
= 0;
6138 if (end
< 0) end
= 0;
6140 /* indexes sanity checks */
6141 if (start
> end
|| start
>= llen
) {
6142 /* Out of range start or start > end result in empty list */
6143 addReply(c
,shared
.emptymultibulk
);
6146 if (end
>= llen
) end
= llen
-1;
6147 rangelen
= (end
-start
)+1;
6149 /* check if starting point is trivial, before searching
6150 * the element in log(N) time */
6152 ln
= start
== 0 ? zsl
->tail
: zslGetElementByRank(zsl
, llen
-start
);
6155 zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1);
6158 /* Return the result in form of a multi-bulk reply */
6159 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
6160 withscores
? (rangelen
*2) : rangelen
));
6161 for (j
= 0; j
< rangelen
; j
++) {
6163 addReplyBulk(c
,ele
);
6165 addReplyDouble(c
,ln
->score
);
6166 ln
= reverse
? ln
->backward
: ln
->forward
[0];
6170 static void zrangeCommand(redisClient
*c
) {
6171 zrangeGenericCommand(c
,0);
6174 static void zrevrangeCommand(redisClient
*c
) {
6175 zrangeGenericCommand(c
,1);
6178 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
6179 * If justcount is non-zero, just the count is returned. */
6180 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
6183 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
6184 int offset
= 0, limit
= -1;
6188 /* Parse the min-max interval. If one of the values is prefixed
6189 * by the "(" character, it's considered "open". For instance
6190 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
6191 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
6192 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
6193 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
6196 min
= strtod(c
->argv
[2]->ptr
,NULL
);
6198 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
6199 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
6202 max
= strtod(c
->argv
[3]->ptr
,NULL
);
6205 /* Parse "WITHSCORES": note that if the command was called with
6206 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
6207 * enter the following paths to parse WITHSCORES and LIMIT. */
6208 if (c
->argc
== 5 || c
->argc
== 8) {
6209 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
6214 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
6218 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
6223 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
6224 addReply(c
,shared
.syntaxerr
);
6226 } else if (c
->argc
== (7 + withscores
)) {
6227 offset
= atoi(c
->argv
[5]->ptr
);
6228 limit
= atoi(c
->argv
[6]->ptr
);
6229 if (offset
< 0) offset
= 0;
6232 /* Ok, lookup the key and get the range */
6233 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
6235 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6237 if (o
->type
!= REDIS_ZSET
) {
6238 addReply(c
,shared
.wrongtypeerr
);
6240 zset
*zsetobj
= o
->ptr
;
6241 zskiplist
*zsl
= zsetobj
->zsl
;
6243 robj
*ele
, *lenobj
= NULL
;
6244 unsigned long rangelen
= 0;
6246 /* Get the first node with the score >= min, or with
6247 * score > min if 'minex' is true. */
6248 ln
= zslFirstWithScore(zsl
,min
);
6249 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
6252 /* No element matching the speciifed interval */
6253 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6257 /* We don't know in advance how many matching elements there
6258 * are in the list, so we push this object that will represent
6259 * the multi-bulk length in the output buffer, and will "fix"
6262 lenobj
= createObject(REDIS_STRING
,NULL
);
6264 decrRefCount(lenobj
);
6267 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
6270 ln
= ln
->forward
[0];
6273 if (limit
== 0) break;
6276 addReplyBulk(c
,ele
);
6278 addReplyDouble(c
,ln
->score
);
6280 ln
= ln
->forward
[0];
6282 if (limit
> 0) limit
--;
6285 addReplyLongLong(c
,(long)rangelen
);
6287 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
6288 withscores
? (rangelen
*2) : rangelen
);
6294 static void zrangebyscoreCommand(redisClient
*c
) {
6295 genericZrangebyscoreCommand(c
,0);
6298 static void zcountCommand(redisClient
*c
) {
6299 genericZrangebyscoreCommand(c
,1);
6302 static void zcardCommand(redisClient
*c
) {
6306 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6307 checkType(c
,o
,REDIS_ZSET
)) return;
6310 addReplyUlong(c
,zs
->zsl
->length
);
6313 static void zscoreCommand(redisClient
*c
) {
6318 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6319 checkType(c
,o
,REDIS_ZSET
)) return;
6322 de
= dictFind(zs
->dict
,c
->argv
[2]);
6324 addReply(c
,shared
.nullbulk
);
6326 double *score
= dictGetEntryVal(de
);
6328 addReplyDouble(c
,*score
);
6332 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
6340 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6341 checkType(c
,o
,REDIS_ZSET
)) return;
6345 de
= dictFind(zs
->dict
,c
->argv
[2]);
6347 addReply(c
,shared
.nullbulk
);
6351 score
= dictGetEntryVal(de
);
6352 rank
= zslGetRank(zsl
, *score
, c
->argv
[2]);
6355 addReplyLongLong(c
, zsl
->length
- rank
);
6357 addReplyLongLong(c
, rank
-1);
6360 addReply(c
,shared
.nullbulk
);
6364 static void zrankCommand(redisClient
*c
) {
6365 zrankGenericCommand(c
, 0);
6368 static void zrevrankCommand(redisClient
*c
) {
6369 zrankGenericCommand(c
, 1);
6372 /* ========================= Hashes utility functions ======================= */
6373 #define REDIS_HASH_KEY 1
6374 #define REDIS_HASH_VALUE 2
6376 /* Check the length of a number of objects to see if we need to convert a
6377 * zipmap to a real hash. Note that we only check string encoded objects
6378 * as their string length can be queried in constant time. */
6379 static void hashTryConversion(robj
*subject
, robj
**argv
, int start
, int end
) {
6381 if (subject
->encoding
!= REDIS_ENCODING_ZIPMAP
) return;
6383 for (i
= start
; i
<= end
; i
++) {
6384 if (argv
[i
]->encoding
== REDIS_ENCODING_RAW
&&
6385 sdslen(argv
[i
]->ptr
) > server
.hash_max_zipmap_value
)
6387 convertToRealHash(subject
);
6393 /* Encode given objects in-place when the hash uses a dict. */
6394 static void hashTryObjectEncoding(robj
*subject
, robj
**o1
, robj
**o2
) {
6395 if (subject
->encoding
== REDIS_ENCODING_HT
) {
6396 if (o1
) *o1
= tryObjectEncoding(*o1
);
6397 if (o2
) *o2
= tryObjectEncoding(*o2
);
6401 /* Get the value from a hash identified by key. Returns either a string
6402 * object or NULL if the value cannot be found. The refcount of the object
6403 * is always increased by 1 when the value was found. */
6404 static robj
*hashGet(robj
*o
, robj
*key
) {
6406 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6409 key
= getDecodedObject(key
);
6410 if (zipmapGet(o
->ptr
,key
->ptr
,sdslen(key
->ptr
),&v
,&vlen
)) {
6411 value
= createStringObject((char*)v
,vlen
);
6415 dictEntry
*de
= dictFind(o
->ptr
,key
);
6417 value
= dictGetEntryVal(de
);
6418 incrRefCount(value
);
6424 /* Test if the key exists in the given hash. Returns 1 if the key
6425 * exists and 0 when it doesn't. */
6426 static int hashExists(robj
*o
, robj
*key
) {
6427 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6428 key
= getDecodedObject(key
);
6429 if (zipmapExists(o
->ptr
,key
->ptr
,sdslen(key
->ptr
))) {
6435 if (dictFind(o
->ptr
,key
) != NULL
) {
6442 /* Add an element, discard the old if the key already exists.
6443 * Return 0 on insert and 1 on update. */
6444 static int hashSet(robj
*o
, robj
*key
, robj
*value
) {
6446 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6447 key
= getDecodedObject(key
);
6448 value
= getDecodedObject(value
);
6449 o
->ptr
= zipmapSet(o
->ptr
,
6450 key
->ptr
,sdslen(key
->ptr
),
6451 value
->ptr
,sdslen(value
->ptr
), &update
);
6453 decrRefCount(value
);
6455 /* Check if the zipmap needs to be upgraded to a real hash table */
6456 if (zipmapLen(o
->ptr
) > server
.hash_max_zipmap_entries
)
6457 convertToRealHash(o
);
6459 if (dictReplace(o
->ptr
,key
,value
)) {
6466 incrRefCount(value
);
6471 /* Delete an element from a hash.
6472 * Return 1 on deleted and 0 on not found. */
6473 static int hashDelete(robj
*o
, robj
*key
) {
6475 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6476 key
= getDecodedObject(key
);
6477 o
->ptr
= zipmapDel(o
->ptr
,key
->ptr
,sdslen(key
->ptr
), &deleted
);
6480 deleted
= dictDelete((dict
*)o
->ptr
,key
) == DICT_OK
;
6481 /* Always check if the dictionary needs a resize after a delete. */
6482 if (deleted
&& htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
6487 /* Return the number of elements in a hash. */
6488 static unsigned long hashLength(robj
*o
) {
6489 return (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
6490 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
6493 /* Structure to hold hash iteration abstration. Note that iteration over
6494 * hashes involves both fields and values. Because it is possible that
6495 * not both are required, store pointers in the iterator to avoid
6496 * unnecessary memory allocation for fields/values. */
6500 unsigned char *zk
, *zv
;
6501 unsigned int zklen
, zvlen
;
6507 static hashIterator
*hashInitIterator(robj
*subject
) {
6508 hashIterator
*hi
= zmalloc(sizeof(hashIterator
));
6509 hi
->encoding
= subject
->encoding
;
6510 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6511 hi
->zi
= zipmapRewind(subject
->ptr
);
6512 } else if (hi
->encoding
== REDIS_ENCODING_HT
) {
6513 hi
->di
= dictGetIterator(subject
->ptr
);
6520 static void hashReleaseIterator(hashIterator
*hi
) {
6521 if (hi
->encoding
== REDIS_ENCODING_HT
) {
6522 dictReleaseIterator(hi
->di
);
6527 /* Move to the next entry in the hash. Return REDIS_OK when the next entry
6528 * could be found and REDIS_ERR when the iterator reaches the end. */
6529 static int hashNext(hashIterator
*hi
) {
6530 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6531 if ((hi
->zi
= zipmapNext(hi
->zi
, &hi
->zk
, &hi
->zklen
,
6532 &hi
->zv
, &hi
->zvlen
)) == NULL
) return REDIS_ERR
;
6534 if ((hi
->de
= dictNext(hi
->di
)) == NULL
) return REDIS_ERR
;
6539 /* Get key or value object at current iteration position.
6540 * This increases the refcount of the field object by 1. */
6541 static robj
*hashCurrent(hashIterator
*hi
, int what
) {
6543 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6544 if (what
& REDIS_HASH_KEY
) {
6545 o
= createStringObject((char*)hi
->zk
,hi
->zklen
);
6547 o
= createStringObject((char*)hi
->zv
,hi
->zvlen
);
6550 if (what
& REDIS_HASH_KEY
) {
6551 o
= dictGetEntryKey(hi
->de
);
6553 o
= dictGetEntryVal(hi
->de
);
6560 static robj
*hashLookupWriteOrCreate(redisClient
*c
, robj
*key
) {
6561 robj
*o
= lookupKeyWrite(c
->db
,key
);
6563 o
= createHashObject();
6564 dictAdd(c
->db
->dict
,key
,o
);
6567 if (o
->type
!= REDIS_HASH
) {
6568 addReply(c
,shared
.wrongtypeerr
);
6575 /* ============================= Hash commands ============================== */
6576 static void hsetCommand(redisClient
*c
) {
6580 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6581 hashTryConversion(o
,c
->argv
,2,3);
6582 hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
6583 update
= hashSet(o
,c
->argv
[2],c
->argv
[3]);
6584 addReply(c
, update
? shared
.czero
: shared
.cone
);
6588 static void hsetnxCommand(redisClient
*c
) {
6590 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6591 hashTryConversion(o
,c
->argv
,2,3);
6593 if (hashExists(o
, c
->argv
[2])) {
6594 addReply(c
, shared
.czero
);
6596 hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
6597 hashSet(o
,c
->argv
[2],c
->argv
[3]);
6598 addReply(c
, shared
.cone
);
6603 static void hmsetCommand(redisClient
*c
) {
6607 if ((c
->argc
% 2) == 1) {
6608 addReplySds(c
,sdsnew("-ERR wrong number of arguments for HMSET\r\n"));
6612 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6613 hashTryConversion(o
,c
->argv
,2,c
->argc
-1);
6614 for (i
= 2; i
< c
->argc
; i
+= 2) {
6615 hashTryObjectEncoding(o
,&c
->argv
[i
], &c
->argv
[i
+1]);
6616 hashSet(o
,c
->argv
[i
],c
->argv
[i
+1]);
6618 addReply(c
, shared
.ok
);
6622 static void hincrbyCommand(redisClient
*c
) {
6623 long long value
, incr
;
6624 robj
*o
, *current
, *new;
6626 if (getLongLongFromObjectOrReply(c
,c
->argv
[3],&incr
,NULL
) != REDIS_OK
) return;
6627 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6628 if ((current
= hashGet(o
,c
->argv
[2])) != NULL
) {
6629 if (getLongLongFromObjectOrReply(c
,current
,&value
,
6630 "hash value is not an integer") != REDIS_OK
) {
6631 decrRefCount(current
);
6634 decrRefCount(current
);
6640 new = createStringObjectFromLongLong(value
);
6641 hashTryObjectEncoding(o
,&c
->argv
[2],NULL
);
6642 hashSet(o
,c
->argv
[2],new);
6644 addReplyLongLong(c
,value
);
6648 static void hgetCommand(redisClient
*c
) {
6650 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6651 checkType(c
,o
,REDIS_HASH
)) return;
6653 if ((value
= hashGet(o
,c
->argv
[2])) != NULL
) {
6654 addReplyBulk(c
,value
);
6655 decrRefCount(value
);
6657 addReply(c
,shared
.nullbulk
);
6661 static void hmgetCommand(redisClient
*c
) {
6664 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
6665 if (o
!= NULL
&& o
->type
!= REDIS_HASH
) {
6666 addReply(c
,shared
.wrongtypeerr
);
6669 /* Note the check for o != NULL happens inside the loop. This is
6670 * done because objects that cannot be found are considered to be
6671 * an empty hash. The reply should then be a series of NULLs. */
6672 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-2));
6673 for (i
= 2; i
< c
->argc
; i
++) {
6674 if (o
!= NULL
&& (value
= hashGet(o
,c
->argv
[i
])) != NULL
) {
6675 addReplyBulk(c
,value
);
6676 decrRefCount(value
);
6678 addReply(c
,shared
.nullbulk
);
6683 static void hdelCommand(redisClient
*c
) {
6685 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6686 checkType(c
,o
,REDIS_HASH
)) return;
6688 if (hashDelete(o
,c
->argv
[2])) {
6689 if (hashLength(o
) == 0) deleteKey(c
->db
,c
->argv
[1]);
6690 addReply(c
,shared
.cone
);
6693 addReply(c
,shared
.czero
);
6697 static void hlenCommand(redisClient
*c
) {
6699 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6700 checkType(c
,o
,REDIS_HASH
)) return;
6702 addReplyUlong(c
,hashLength(o
));
6705 static void genericHgetallCommand(redisClient
*c
, int flags
) {
6706 robj
*o
, *lenobj
, *obj
;
6707 unsigned long count
= 0;
6710 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6711 || checkType(c
,o
,REDIS_HASH
)) return;
6713 lenobj
= createObject(REDIS_STRING
,NULL
);
6715 decrRefCount(lenobj
);
6717 hi
= hashInitIterator(o
);
6718 while (hashNext(hi
) != REDIS_ERR
) {
6719 if (flags
& REDIS_HASH_KEY
) {
6720 obj
= hashCurrent(hi
,REDIS_HASH_KEY
);
6721 addReplyBulk(c
,obj
);
6725 if (flags
& REDIS_HASH_VALUE
) {
6726 obj
= hashCurrent(hi
,REDIS_HASH_VALUE
);
6727 addReplyBulk(c
,obj
);
6732 hashReleaseIterator(hi
);
6734 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
6737 static void hkeysCommand(redisClient
*c
) {
6738 genericHgetallCommand(c
,REDIS_HASH_KEY
);
6741 static void hvalsCommand(redisClient
*c
) {
6742 genericHgetallCommand(c
,REDIS_HASH_VALUE
);
6745 static void hgetallCommand(redisClient
*c
) {
6746 genericHgetallCommand(c
,REDIS_HASH_KEY
|REDIS_HASH_VALUE
);
6749 static void hexistsCommand(redisClient
*c
) {
6751 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6752 checkType(c
,o
,REDIS_HASH
)) return;
6754 addReply(c
, hashExists(o
,c
->argv
[2]) ? shared
.cone
: shared
.czero
);
6757 static void convertToRealHash(robj
*o
) {
6758 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
6759 unsigned int klen
, vlen
;
6760 dict
*dict
= dictCreate(&hashDictType
,NULL
);
6762 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
6763 p
= zipmapRewind(zm
);
6764 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
6765 robj
*keyobj
, *valobj
;
6767 keyobj
= createStringObject((char*)key
,klen
);
6768 valobj
= createStringObject((char*)val
,vlen
);
6769 keyobj
= tryObjectEncoding(keyobj
);
6770 valobj
= tryObjectEncoding(valobj
);
6771 dictAdd(dict
,keyobj
,valobj
);
6773 o
->encoding
= REDIS_ENCODING_HT
;
6778 /* ========================= Non type-specific commands ==================== */
6780 static void flushdbCommand(redisClient
*c
) {
6781 server
.dirty
+= dictSize(c
->db
->dict
);
6782 dictEmpty(c
->db
->dict
);
6783 dictEmpty(c
->db
->expires
);
6784 addReply(c
,shared
.ok
);
6787 static void flushallCommand(redisClient
*c
) {
6788 server
.dirty
+= emptyDb();
6789 addReply(c
,shared
.ok
);
6790 if (server
.bgsavechildpid
!= -1) {
6791 kill(server
.bgsavechildpid
,SIGKILL
);
6792 rdbRemoveTempFile(server
.bgsavechildpid
);
6794 rdbSave(server
.dbfilename
);
6798 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
6799 redisSortOperation
*so
= zmalloc(sizeof(*so
));
6801 so
->pattern
= pattern
;
6805 /* Return the value associated to the key with a name obtained
6806 * substituting the first occurence of '*' in 'pattern' with 'subst'.
6807 * The returned object will always have its refcount increased by 1
6808 * when it is non-NULL. */
6809 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
6812 robj keyobj
, fieldobj
, *o
;
6813 int prefixlen
, sublen
, postfixlen
, fieldlen
;
6814 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
6818 char buf
[REDIS_SORTKEY_MAX
+1];
6819 } keyname
, fieldname
;
6821 /* If the pattern is "#" return the substitution object itself in order
6822 * to implement the "SORT ... GET #" feature. */
6823 spat
= pattern
->ptr
;
6824 if (spat
[0] == '#' && spat
[1] == '\0') {
6825 incrRefCount(subst
);
6829 /* The substitution object may be specially encoded. If so we create
6830 * a decoded object on the fly. Otherwise getDecodedObject will just
6831 * increment the ref count, that we'll decrement later. */
6832 subst
= getDecodedObject(subst
);
6835 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
6836 p
= strchr(spat
,'*');
6838 decrRefCount(subst
);
6842 /* Find out if we're dealing with a hash dereference. */
6843 if ((f
= strstr(p
+1, "->")) != NULL
) {
6844 fieldlen
= sdslen(spat
)-(f
-spat
);
6845 /* this also copies \0 character */
6846 memcpy(fieldname
.buf
,f
+2,fieldlen
-1);
6847 fieldname
.len
= fieldlen
-2;
6853 sublen
= sdslen(ssub
);
6854 postfixlen
= sdslen(spat
)-(prefixlen
+1)-fieldlen
;
6855 memcpy(keyname
.buf
,spat
,prefixlen
);
6856 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
6857 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
6858 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
6859 keyname
.len
= prefixlen
+sublen
+postfixlen
;
6860 decrRefCount(subst
);
6862 /* Lookup substituted key */
6863 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2));
6864 o
= lookupKeyRead(db
,&keyobj
);
6865 if (o
== NULL
) return NULL
;
6868 if (o
->type
!= REDIS_HASH
|| fieldname
.len
< 1) return NULL
;
6870 /* Retrieve value from hash by the field name. This operation
6871 * already increases the refcount of the returned object. */
6872 initStaticStringObject(fieldobj
,((char*)&fieldname
)+(sizeof(long)*2));
6873 o
= hashGet(o
, &fieldobj
);
6875 if (o
->type
!= REDIS_STRING
) return NULL
;
6877 /* Every object that this function returns needs to have its refcount
6878 * increased. sortCommand decreases it again. */
6885 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
6886 * the additional parameter is not standard but a BSD-specific we have to
6887 * pass sorting parameters via the global 'server' structure */
6888 static int sortCompare(const void *s1
, const void *s2
) {
6889 const redisSortObject
*so1
= s1
, *so2
= s2
;
6892 if (!server
.sort_alpha
) {
6893 /* Numeric sorting. Here it's trivial as we precomputed scores */
6894 if (so1
->u
.score
> so2
->u
.score
) {
6896 } else if (so1
->u
.score
< so2
->u
.score
) {
6902 /* Alphanumeric sorting */
6903 if (server
.sort_bypattern
) {
6904 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
6905 /* At least one compare object is NULL */
6906 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
6908 else if (so1
->u
.cmpobj
== NULL
)
6913 /* We have both the objects, use strcoll */
6914 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
6917 /* Compare elements directly. */
6918 cmp
= compareStringObjects(so1
->obj
,so2
->obj
);
6921 return server
.sort_desc
? -cmp
: cmp
;
6924 /* The SORT command is the most complex command in Redis. Warning: this code
6925 * is optimized for speed and a bit less for readability */
6926 static void sortCommand(redisClient
*c
) {
6929 int desc
= 0, alpha
= 0;
6930 int limit_start
= 0, limit_count
= -1, start
, end
;
6931 int j
, dontsort
= 0, vectorlen
;
6932 int getop
= 0; /* GET operation counter */
6933 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
6934 redisSortObject
*vector
; /* Resulting vector to sort */
6936 /* Lookup the key to sort. It must be of the right types */
6937 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
6938 if (sortval
== NULL
) {
6939 addReply(c
,shared
.emptymultibulk
);
6942 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
6943 sortval
->type
!= REDIS_ZSET
)
6945 addReply(c
,shared
.wrongtypeerr
);
6949 /* Create a list of operations to perform for every sorted element.
6950 * Operations can be GET/DEL/INCR/DECR */
6951 operations
= listCreate();
6952 listSetFreeMethod(operations
,zfree
);
6955 /* Now we need to protect sortval incrementing its count, in the future
6956 * SORT may have options able to overwrite/delete keys during the sorting
6957 * and the sorted key itself may get destroied */
6958 incrRefCount(sortval
);
6960 /* The SORT command has an SQL-alike syntax, parse it */
6961 while(j
< c
->argc
) {
6962 int leftargs
= c
->argc
-j
-1;
6963 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
6965 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
6967 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
6969 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
6970 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
6971 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
6973 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
6974 storekey
= c
->argv
[j
+1];
6976 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
6977 sortby
= c
->argv
[j
+1];
6978 /* If the BY pattern does not contain '*', i.e. it is constant,
6979 * we don't need to sort nor to lookup the weight keys. */
6980 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
6982 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
6983 listAddNodeTail(operations
,createSortOperation(
6984 REDIS_SORT_GET
,c
->argv
[j
+1]));
6988 decrRefCount(sortval
);
6989 listRelease(operations
);
6990 addReply(c
,shared
.syntaxerr
);
6996 /* Load the sorting vector with all the objects to sort */
6997 switch(sortval
->type
) {
6998 case REDIS_LIST
: vectorlen
= listLength((list
*)sortval
->ptr
); break;
6999 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
7000 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
7001 default: vectorlen
= 0; redisPanic("Bad SORT type"); /* Avoid GCC warning */
7003 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
7006 if (sortval
->type
== REDIS_LIST
) {
7007 list
*list
= sortval
->ptr
;
7011 listRewind(list
,&li
);
7012 while((ln
= listNext(&li
))) {
7013 robj
*ele
= ln
->value
;
7014 vector
[j
].obj
= ele
;
7015 vector
[j
].u
.score
= 0;
7016 vector
[j
].u
.cmpobj
= NULL
;
7024 if (sortval
->type
== REDIS_SET
) {
7027 zset
*zs
= sortval
->ptr
;
7031 di
= dictGetIterator(set
);
7032 while((setele
= dictNext(di
)) != NULL
) {
7033 vector
[j
].obj
= dictGetEntryKey(setele
);
7034 vector
[j
].u
.score
= 0;
7035 vector
[j
].u
.cmpobj
= NULL
;
7038 dictReleaseIterator(di
);
7040 redisAssert(j
== vectorlen
);
7042 /* Now it's time to load the right scores in the sorting vector */
7043 if (dontsort
== 0) {
7044 for (j
= 0; j
< vectorlen
; j
++) {
7047 /* lookup value to sort by */
7048 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
7049 if (!byval
) continue;
7051 /* use object itself to sort by */
7052 byval
= vector
[j
].obj
;
7056 if (sortby
) vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
7058 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
7059 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
7060 } else if (byval
->encoding
== REDIS_ENCODING_INT
) {
7061 /* Don't need to decode the object if it's
7062 * integer-encoded (the only encoding supported) so
7063 * far. We can just cast it */
7064 vector
[j
].u
.score
= (long)byval
->ptr
;
7066 redisAssert(1 != 1);
7070 /* when the object was retrieved using lookupKeyByPattern,
7071 * its refcount needs to be decreased. */
7073 decrRefCount(byval
);
7078 /* We are ready to sort the vector... perform a bit of sanity check
7079 * on the LIMIT option too. We'll use a partial version of quicksort. */
7080 start
= (limit_start
< 0) ? 0 : limit_start
;
7081 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
7082 if (start
>= vectorlen
) {
7083 start
= vectorlen
-1;
7086 if (end
>= vectorlen
) end
= vectorlen
-1;
7088 if (dontsort
== 0) {
7089 server
.sort_desc
= desc
;
7090 server
.sort_alpha
= alpha
;
7091 server
.sort_bypattern
= sortby
? 1 : 0;
7092 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
7093 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
7095 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
7098 /* Send command output to the output buffer, performing the specified
7099 * GET/DEL/INCR/DECR operations if any. */
7100 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
7101 if (storekey
== NULL
) {
7102 /* STORE option not specified, sent the sorting result to client */
7103 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
7104 for (j
= start
; j
<= end
; j
++) {
7108 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
7109 listRewind(operations
,&li
);
7110 while((ln
= listNext(&li
))) {
7111 redisSortOperation
*sop
= ln
->value
;
7112 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
7115 if (sop
->type
== REDIS_SORT_GET
) {
7117 addReply(c
,shared
.nullbulk
);
7119 addReplyBulk(c
,val
);
7123 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
7128 robj
*listObject
= createListObject();
7129 list
*listPtr
= (list
*) listObject
->ptr
;
7131 /* STORE option specified, set the sorting result as a List object */
7132 for (j
= start
; j
<= end
; j
++) {
7137 listAddNodeTail(listPtr
,vector
[j
].obj
);
7138 incrRefCount(vector
[j
].obj
);
7140 listRewind(operations
,&li
);
7141 while((ln
= listNext(&li
))) {
7142 redisSortOperation
*sop
= ln
->value
;
7143 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
7146 if (sop
->type
== REDIS_SORT_GET
) {
7148 listAddNodeTail(listPtr
,createStringObject("",0));
7150 /* We should do a incrRefCount on val because it is
7151 * added to the list, but also a decrRefCount because
7152 * it is returned by lookupKeyByPattern. This results
7153 * in doing nothing at all. */
7154 listAddNodeTail(listPtr
,val
);
7157 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
7161 if (dictReplace(c
->db
->dict
,storekey
,listObject
)) {
7162 incrRefCount(storekey
);
7164 /* Note: we add 1 because the DB is dirty anyway since even if the
7165 * SORT result is empty a new key is set and maybe the old content
7167 server
.dirty
+= 1+outputlen
;
7168 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
7172 decrRefCount(sortval
);
7173 listRelease(operations
);
7174 for (j
= 0; j
< vectorlen
; j
++) {
7175 if (alpha
&& vector
[j
].u
.cmpobj
)
7176 decrRefCount(vector
[j
].u
.cmpobj
);
7181 /* Convert an amount of bytes into a human readable string in the form
7182 * of 100B, 2G, 100M, 4K, and so forth. */
7183 static void bytesToHuman(char *s
, unsigned long long n
) {
7188 sprintf(s
,"%lluB",n
);
7190 } else if (n
< (1024*1024)) {
7191 d
= (double)n
/(1024);
7192 sprintf(s
,"%.2fK",d
);
7193 } else if (n
< (1024LL*1024*1024)) {
7194 d
= (double)n
/(1024*1024);
7195 sprintf(s
,"%.2fM",d
);
7196 } else if (n
< (1024LL*1024*1024*1024)) {
7197 d
= (double)n
/(1024LL*1024*1024);
7198 sprintf(s
,"%.2fG",d
);
7202 /* Create the string returned by the INFO command. This is decoupled
7203 * by the INFO command itself as we need to report the same information
7204 * on memory corruption problems. */
7205 static sds
genRedisInfoString(void) {
7207 time_t uptime
= time(NULL
)-server
.stat_starttime
;
7211 bytesToHuman(hmem
,zmalloc_used_memory());
7212 info
= sdscatprintf(sdsempty(),
7213 "redis_version:%s\r\n"
7214 "redis_git_sha1:%s\r\n"
7215 "redis_git_dirty:%d\r\n"
7217 "multiplexing_api:%s\r\n"
7218 "process_id:%ld\r\n"
7219 "uptime_in_seconds:%ld\r\n"
7220 "uptime_in_days:%ld\r\n"
7221 "connected_clients:%d\r\n"
7222 "connected_slaves:%d\r\n"
7223 "blocked_clients:%d\r\n"
7224 "used_memory:%zu\r\n"
7225 "used_memory_human:%s\r\n"
7226 "changes_since_last_save:%lld\r\n"
7227 "bgsave_in_progress:%d\r\n"
7228 "last_save_time:%ld\r\n"
7229 "bgrewriteaof_in_progress:%d\r\n"
7230 "total_connections_received:%lld\r\n"
7231 "total_commands_processed:%lld\r\n"
7232 "expired_keys:%lld\r\n"
7233 "hash_max_zipmap_entries:%zu\r\n"
7234 "hash_max_zipmap_value:%zu\r\n"
7235 "pubsub_channels:%ld\r\n"
7236 "pubsub_patterns:%u\r\n"
7241 strtol(REDIS_GIT_DIRTY
,NULL
,10) > 0,
7242 (sizeof(long) == 8) ? "64" : "32",
7247 listLength(server
.clients
)-listLength(server
.slaves
),
7248 listLength(server
.slaves
),
7249 server
.blpop_blocked_clients
,
7250 zmalloc_used_memory(),
7253 server
.bgsavechildpid
!= -1,
7255 server
.bgrewritechildpid
!= -1,
7256 server
.stat_numconnections
,
7257 server
.stat_numcommands
,
7258 server
.stat_expiredkeys
,
7259 server
.hash_max_zipmap_entries
,
7260 server
.hash_max_zipmap_value
,
7261 dictSize(server
.pubsub_channels
),
7262 listLength(server
.pubsub_patterns
),
7263 server
.vm_enabled
!= 0,
7264 server
.masterhost
== NULL
? "master" : "slave"
7266 if (server
.masterhost
) {
7267 info
= sdscatprintf(info
,
7268 "master_host:%s\r\n"
7269 "master_port:%d\r\n"
7270 "master_link_status:%s\r\n"
7271 "master_last_io_seconds_ago:%d\r\n"
7274 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
7276 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
7279 if (server
.vm_enabled
) {
7281 info
= sdscatprintf(info
,
7282 "vm_conf_max_memory:%llu\r\n"
7283 "vm_conf_page_size:%llu\r\n"
7284 "vm_conf_pages:%llu\r\n"
7285 "vm_stats_used_pages:%llu\r\n"
7286 "vm_stats_swapped_objects:%llu\r\n"
7287 "vm_stats_swappin_count:%llu\r\n"
7288 "vm_stats_swappout_count:%llu\r\n"
7289 "vm_stats_io_newjobs_len:%lu\r\n"
7290 "vm_stats_io_processing_len:%lu\r\n"
7291 "vm_stats_io_processed_len:%lu\r\n"
7292 "vm_stats_io_active_threads:%lu\r\n"
7293 "vm_stats_blocked_clients:%lu\r\n"
7294 ,(unsigned long long) server
.vm_max_memory
,
7295 (unsigned long long) server
.vm_page_size
,
7296 (unsigned long long) server
.vm_pages
,
7297 (unsigned long long) server
.vm_stats_used_pages
,
7298 (unsigned long long) server
.vm_stats_swapped_objects
,
7299 (unsigned long long) server
.vm_stats_swapins
,
7300 (unsigned long long) server
.vm_stats_swapouts
,
7301 (unsigned long) listLength(server
.io_newjobs
),
7302 (unsigned long) listLength(server
.io_processing
),
7303 (unsigned long) listLength(server
.io_processed
),
7304 (unsigned long) server
.io_active_threads
,
7305 (unsigned long) server
.vm_blocked_clients
7309 for (j
= 0; j
< server
.dbnum
; j
++) {
7310 long long keys
, vkeys
;
7312 keys
= dictSize(server
.db
[j
].dict
);
7313 vkeys
= dictSize(server
.db
[j
].expires
);
7314 if (keys
|| vkeys
) {
7315 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
7322 static void infoCommand(redisClient
*c
) {
7323 sds info
= genRedisInfoString();
7324 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
7325 (unsigned long)sdslen(info
)));
7326 addReplySds(c
,info
);
7327 addReply(c
,shared
.crlf
);
7330 static void monitorCommand(redisClient
*c
) {
7331 /* ignore MONITOR if aleady slave or in monitor mode */
7332 if (c
->flags
& REDIS_SLAVE
) return;
7334 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
7336 listAddNodeTail(server
.monitors
,c
);
7337 addReply(c
,shared
.ok
);
7340 /* ================================= Expire ================================= */
7341 static int removeExpire(redisDb
*db
, robj
*key
) {
7342 if (dictDelete(db
->expires
,key
) == DICT_OK
) {
7349 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
7350 if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) {
7358 /* Return the expire time of the specified key, or -1 if no expire
7359 * is associated with this key (i.e. the key is non volatile) */
7360 static time_t getExpire(redisDb
*db
, robj
*key
) {
7363 /* No expire? return ASAP */
7364 if (dictSize(db
->expires
) == 0 ||
7365 (de
= dictFind(db
->expires
,key
)) == NULL
) return -1;
7367 return (time_t) dictGetEntryVal(de
);
7370 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
7374 /* No expire? return ASAP */
7375 if (dictSize(db
->expires
) == 0 ||
7376 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7378 /* Lookup the expire */
7379 when
= (time_t) dictGetEntryVal(de
);
7380 if (time(NULL
) <= when
) return 0;
7382 /* Delete the key */
7383 dictDelete(db
->expires
,key
);
7384 server
.stat_expiredkeys
++;
7385 return dictDelete(db
->dict
,key
) == DICT_OK
;
7388 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
7391 /* No expire? return ASAP */
7392 if (dictSize(db
->expires
) == 0 ||
7393 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7395 /* Delete the key */
7397 server
.stat_expiredkeys
++;
7398 dictDelete(db
->expires
,key
);
7399 return dictDelete(db
->dict
,key
) == DICT_OK
;
7402 static void expireGenericCommand(redisClient
*c
, robj
*key
, robj
*param
, long offset
) {
7406 if (getLongFromObjectOrReply(c
, param
, &seconds
, NULL
) != REDIS_OK
) return;
7410 de
= dictFind(c
->db
->dict
,key
);
7412 addReply(c
,shared
.czero
);
7416 if (deleteKey(c
->db
,key
)) server
.dirty
++;
7417 addReply(c
, shared
.cone
);
7420 time_t when
= time(NULL
)+seconds
;
7421 if (setExpire(c
->db
,key
,when
)) {
7422 addReply(c
,shared
.cone
);
7425 addReply(c
,shared
.czero
);
7431 static void expireCommand(redisClient
*c
) {
7432 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],0);
7435 static void expireatCommand(redisClient
*c
) {
7436 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],time(NULL
));
7439 static void ttlCommand(redisClient
*c
) {
7443 expire
= getExpire(c
->db
,c
->argv
[1]);
7445 ttl
= (int) (expire
-time(NULL
));
7446 if (ttl
< 0) ttl
= -1;
7448 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
7451 /* ================================ MULTI/EXEC ============================== */
7453 /* Client state initialization for MULTI/EXEC */
7454 static void initClientMultiState(redisClient
*c
) {
7455 c
->mstate
.commands
= NULL
;
7456 c
->mstate
.count
= 0;
7459 /* Release all the resources associated with MULTI/EXEC state */
7460 static void freeClientMultiState(redisClient
*c
) {
7463 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7465 multiCmd
*mc
= c
->mstate
.commands
+j
;
7467 for (i
= 0; i
< mc
->argc
; i
++)
7468 decrRefCount(mc
->argv
[i
]);
7471 zfree(c
->mstate
.commands
);
7474 /* Add a new command into the MULTI commands queue */
7475 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
7479 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
7480 sizeof(multiCmd
)*(c
->mstate
.count
+1));
7481 mc
= c
->mstate
.commands
+c
->mstate
.count
;
7484 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
7485 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
7486 for (j
= 0; j
< c
->argc
; j
++)
7487 incrRefCount(mc
->argv
[j
]);
7491 static void multiCommand(redisClient
*c
) {
7492 c
->flags
|= REDIS_MULTI
;
7493 addReply(c
,shared
.ok
);
7496 static void discardCommand(redisClient
*c
) {
7497 if (!(c
->flags
& REDIS_MULTI
)) {
7498 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
7502 freeClientMultiState(c
);
7503 initClientMultiState(c
);
7504 c
->flags
&= (~REDIS_MULTI
);
7505 addReply(c
,shared
.ok
);
7508 /* Send a MULTI command to all the slaves and AOF file. Check the execCommand
7509 * implememntation for more information. */
7510 static void execCommandReplicateMulti(redisClient
*c
) {
7511 struct redisCommand
*cmd
;
7512 robj
*multistring
= createStringObject("MULTI",5);
7514 cmd
= lookupCommand("multi");
7515 if (server
.appendonly
)
7516 feedAppendOnlyFile(cmd
,c
->db
->id
,&multistring
,1);
7517 if (listLength(server
.slaves
))
7518 replicationFeedSlaves(server
.slaves
,c
->db
->id
,&multistring
,1);
7519 decrRefCount(multistring
);
7522 static void execCommand(redisClient
*c
) {
7527 if (!(c
->flags
& REDIS_MULTI
)) {
7528 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
7532 /* Check if we need to abort the EXEC if some WATCHed key was touched.
7533 * A failed EXEC will return a multi bulk nil object. */
7534 if (c
->flags
& REDIS_DIRTY_CAS
) {
7535 freeClientMultiState(c
);
7536 initClientMultiState(c
);
7537 c
->flags
&= ~(REDIS_MULTI
|REDIS_DIRTY_CAS
);
7539 addReply(c
,shared
.nullmultibulk
);
7543 /* Replicate a MULTI request now that we are sure the block is executed.
7544 * This way we'll deliver the MULTI/..../EXEC block as a whole and
7545 * both the AOF and the replication link will have the same consistency
7546 * and atomicity guarantees. */
7547 execCommandReplicateMulti(c
);
7549 /* Exec all the queued commands */
7550 orig_argv
= c
->argv
;
7551 orig_argc
= c
->argc
;
7552 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
7553 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7554 c
->argc
= c
->mstate
.commands
[j
].argc
;
7555 c
->argv
= c
->mstate
.commands
[j
].argv
;
7556 call(c
,c
->mstate
.commands
[j
].cmd
);
7558 c
->argv
= orig_argv
;
7559 c
->argc
= orig_argc
;
7560 freeClientMultiState(c
);
7561 initClientMultiState(c
);
7562 c
->flags
&= (~REDIS_MULTI
);
7564 /* Make sure the EXEC command is always replicated / AOF, since we
7565 * always send the MULTI command (we can't know beforehand if the
7566 * next operations will contain at least a modification to the DB). */
7570 /* =========================== Blocking Operations ========================= */
7572 /* Currently Redis blocking operations support is limited to list POP ops,
7573 * so the current implementation is not fully generic, but it is also not
7574 * completely specific so it will not require a rewrite to support new
7575 * kind of blocking operations in the future.
7577 * Still it's important to note that list blocking operations can be already
7578 * used as a notification mechanism in order to implement other blocking
7579 * operations at application level, so there must be a very strong evidence
7580 * of usefulness and generality before new blocking operations are implemented.
7582 * This is how the current blocking POP works, we use BLPOP as example:
7583 * - If the user calls BLPOP and the key exists and contains a non empty list
7584 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
7585 * if there is not to block.
7586 * - If instead BLPOP is called and the key does not exists or the list is
7587 * empty we need to block. In order to do so we remove the notification for
7588 * new data to read in the client socket (so that we'll not serve new
7589 * requests if the blocking request is not served). Also we put the client
7590 * in a dictionary (db->blocking_keys) mapping keys to a list of clients
7591 * blocking for this keys.
7592 * - If a PUSH operation against a key with blocked clients waiting is
7593 * performed, we serve the first in the list: basically instead to push
7594 * the new element inside the list we return it to the (first / oldest)
7595 * blocking client, unblock the client, and remove it form the list.
7597 * The above comment and the source code should be enough in order to understand
7598 * the implementation and modify / fix it later.
7601 /* Set a client in blocking mode for the specified key, with the specified
7603 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
7608 c
->blocking_keys
= zmalloc(sizeof(robj
*)*numkeys
);
7609 c
->blocking_keys_num
= numkeys
;
7610 c
->blockingto
= timeout
;
7611 for (j
= 0; j
< numkeys
; j
++) {
7612 /* Add the key in the client structure, to map clients -> keys */
7613 c
->blocking_keys
[j
] = keys
[j
];
7614 incrRefCount(keys
[j
]);
7616 /* And in the other "side", to map keys -> clients */
7617 de
= dictFind(c
->db
->blocking_keys
,keys
[j
]);
7621 /* For every key we take a list of clients blocked for it */
7623 retval
= dictAdd(c
->db
->blocking_keys
,keys
[j
],l
);
7624 incrRefCount(keys
[j
]);
7625 assert(retval
== DICT_OK
);
7627 l
= dictGetEntryVal(de
);
7629 listAddNodeTail(l
,c
);
7631 /* Mark the client as a blocked client */
7632 c
->flags
|= REDIS_BLOCKED
;
7633 server
.blpop_blocked_clients
++;
7636 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
7637 static void unblockClientWaitingData(redisClient
*c
) {
7642 assert(c
->blocking_keys
!= NULL
);
7643 /* The client may wait for multiple keys, so unblock it for every key. */
7644 for (j
= 0; j
< c
->blocking_keys_num
; j
++) {
7645 /* Remove this client from the list of clients waiting for this key. */
7646 de
= dictFind(c
->db
->blocking_keys
,c
->blocking_keys
[j
]);
7648 l
= dictGetEntryVal(de
);
7649 listDelNode(l
,listSearchKey(l
,c
));
7650 /* If the list is empty we need to remove it to avoid wasting memory */
7651 if (listLength(l
) == 0)
7652 dictDelete(c
->db
->blocking_keys
,c
->blocking_keys
[j
]);
7653 decrRefCount(c
->blocking_keys
[j
]);
7655 /* Cleanup the client structure */
7656 zfree(c
->blocking_keys
);
7657 c
->blocking_keys
= NULL
;
7658 c
->flags
&= (~REDIS_BLOCKED
);
7659 server
.blpop_blocked_clients
--;
7660 /* We want to process data if there is some command waiting
7661 * in the input buffer. Note that this is safe even if
7662 * unblockClientWaitingData() gets called from freeClient() because
7663 * freeClient() will be smart enough to call this function
7664 * *after* c->querybuf was set to NULL. */
7665 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
7668 /* This should be called from any function PUSHing into lists.
7669 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
7670 * 'ele' is the element pushed.
7672 * If the function returns 0 there was no client waiting for a list push
7675 * If the function returns 1 there was a client waiting for a list push
7676 * against this key, the element was passed to this client thus it's not
7677 * needed to actually add it to the list and the caller should return asap. */
7678 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
7679 struct dictEntry
*de
;
7680 redisClient
*receiver
;
7684 de
= dictFind(c
->db
->blocking_keys
,key
);
7685 if (de
== NULL
) return 0;
7686 l
= dictGetEntryVal(de
);
7689 receiver
= ln
->value
;
7691 addReplySds(receiver
,sdsnew("*2\r\n"));
7692 addReplyBulk(receiver
,key
);
7693 addReplyBulk(receiver
,ele
);
7694 unblockClientWaitingData(receiver
);
7698 /* Blocking RPOP/LPOP */
7699 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
7704 for (j
= 1; j
< c
->argc
-1; j
++) {
7705 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
7707 if (o
->type
!= REDIS_LIST
) {
7708 addReply(c
,shared
.wrongtypeerr
);
7711 list
*list
= o
->ptr
;
7712 if (listLength(list
) != 0) {
7713 /* If the list contains elements fall back to the usual
7714 * non-blocking POP operation */
7715 robj
*argv
[2], **orig_argv
;
7718 /* We need to alter the command arguments before to call
7719 * popGenericCommand() as the command takes a single key. */
7720 orig_argv
= c
->argv
;
7721 orig_argc
= c
->argc
;
7722 argv
[1] = c
->argv
[j
];
7726 /* Also the return value is different, we need to output
7727 * the multi bulk reply header and the key name. The
7728 * "real" command will add the last element (the value)
7729 * for us. If this souds like an hack to you it's just
7730 * because it is... */
7731 addReplySds(c
,sdsnew("*2\r\n"));
7732 addReplyBulk(c
,argv
[1]);
7733 popGenericCommand(c
,where
);
7735 /* Fix the client structure with the original stuff */
7736 c
->argv
= orig_argv
;
7737 c
->argc
= orig_argc
;
7743 /* If the list is empty or the key does not exists we must block */
7744 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
7745 if (timeout
> 0) timeout
+= time(NULL
);
7746 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
7749 static void blpopCommand(redisClient
*c
) {
7750 blockingPopGenericCommand(c
,REDIS_HEAD
);
7753 static void brpopCommand(redisClient
*c
) {
7754 blockingPopGenericCommand(c
,REDIS_TAIL
);
7757 /* =============================== Replication ============================= */
7759 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7760 ssize_t nwritten
, ret
= size
;
7761 time_t start
= time(NULL
);
7765 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
7766 nwritten
= write(fd
,ptr
,size
);
7767 if (nwritten
== -1) return -1;
7771 if ((time(NULL
)-start
) > timeout
) {
7779 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7780 ssize_t nread
, totread
= 0;
7781 time_t start
= time(NULL
);
7785 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
7786 nread
= read(fd
,ptr
,size
);
7787 if (nread
== -1) return -1;
7792 if ((time(NULL
)-start
) > timeout
) {
7800 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7807 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
7810 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
7821 static void syncCommand(redisClient
*c
) {
7822 /* ignore SYNC if aleady slave or in monitor mode */
7823 if (c
->flags
& REDIS_SLAVE
) return;
7825 /* SYNC can't be issued when the server has pending data to send to
7826 * the client about already issued commands. We need a fresh reply
7827 * buffer registering the differences between the BGSAVE and the current
7828 * dataset, so that we can copy to other slaves if needed. */
7829 if (listLength(c
->reply
) != 0) {
7830 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
7834 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
7835 /* Here we need to check if there is a background saving operation
7836 * in progress, or if it is required to start one */
7837 if (server
.bgsavechildpid
!= -1) {
7838 /* Ok a background save is in progress. Let's check if it is a good
7839 * one for replication, i.e. if there is another slave that is
7840 * registering differences since the server forked to save */
7845 listRewind(server
.slaves
,&li
);
7846 while((ln
= listNext(&li
))) {
7848 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
7851 /* Perfect, the server is already registering differences for
7852 * another slave. Set the right state, and copy the buffer. */
7853 listRelease(c
->reply
);
7854 c
->reply
= listDup(slave
->reply
);
7855 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7856 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
7858 /* No way, we need to wait for the next BGSAVE in order to
7859 * register differences */
7860 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7861 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
7864 /* Ok we don't have a BGSAVE in progress, let's start one */
7865 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
7866 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7867 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
7868 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
7871 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7874 c
->flags
|= REDIS_SLAVE
;
7876 listAddNodeTail(server
.slaves
,c
);
7880 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
7881 redisClient
*slave
= privdata
;
7883 REDIS_NOTUSED(mask
);
7884 char buf
[REDIS_IOBUF_LEN
];
7885 ssize_t nwritten
, buflen
;
7887 if (slave
->repldboff
== 0) {
7888 /* Write the bulk write count before to transfer the DB. In theory here
7889 * we don't know how much room there is in the output buffer of the
7890 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
7891 * operations) will never be smaller than the few bytes we need. */
7894 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
7896 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
7904 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
7905 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
7907 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
7908 (buflen
== 0) ? "premature EOF" : strerror(errno
));
7912 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
7913 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
7918 slave
->repldboff
+= nwritten
;
7919 if (slave
->repldboff
== slave
->repldbsize
) {
7920 close(slave
->repldbfd
);
7921 slave
->repldbfd
= -1;
7922 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7923 slave
->replstate
= REDIS_REPL_ONLINE
;
7924 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
7925 sendReplyToClient
, slave
) == AE_ERR
) {
7929 addReplySds(slave
,sdsempty());
7930 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
7934 /* This function is called at the end of every backgrond saving.
7935 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
7936 * otherwise REDIS_ERR is passed to the function.
7938 * The goal of this function is to handle slaves waiting for a successful
7939 * background saving in order to perform non-blocking synchronization. */
7940 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
7942 int startbgsave
= 0;
7945 listRewind(server
.slaves
,&li
);
7946 while((ln
= listNext(&li
))) {
7947 redisClient
*slave
= ln
->value
;
7949 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
7951 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7952 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
7953 struct redis_stat buf
;
7955 if (bgsaveerr
!= REDIS_OK
) {
7957 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
7960 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
7961 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
7963 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
7966 slave
->repldboff
= 0;
7967 slave
->repldbsize
= buf
.st_size
;
7968 slave
->replstate
= REDIS_REPL_SEND_BULK
;
7969 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7970 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
7977 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7980 listRewind(server
.slaves
,&li
);
7981 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
7982 while((ln
= listNext(&li
))) {
7983 redisClient
*slave
= ln
->value
;
7985 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
7992 static int syncWithMaster(void) {
7993 char buf
[1024], tmpfile
[256], authcmd
[1024];
7995 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
7996 int dfd
, maxtries
= 5;
7999 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
8004 /* AUTH with the master if required. */
8005 if(server
.masterauth
) {
8006 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
8007 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
8009 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
8013 /* Read the AUTH result. */
8014 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
8016 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
8020 if (buf
[0] != '+') {
8022 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
8027 /* Issue the SYNC command */
8028 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
8030 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
8034 /* Read the bulk write count */
8035 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
8037 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
8041 if (buf
[0] != '$') {
8043 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
8046 dumpsize
= strtol(buf
+1,NULL
,10);
8047 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
8048 /* Read the bulk write data on a temp file */
8050 snprintf(tmpfile
,256,
8051 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
8052 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
8053 if (dfd
!= -1) break;
8058 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
8062 int nread
, nwritten
;
8064 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
8066 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
8072 nwritten
= write(dfd
,buf
,nread
);
8073 if (nwritten
== -1) {
8074 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
8082 if (rename(tmpfile
,server
.dbfilename
) == -1) {
8083 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
8089 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
8090 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
8094 server
.master
= createClient(fd
);
8095 server
.master
->flags
|= REDIS_MASTER
;
8096 server
.master
->authenticated
= 1;
8097 server
.replstate
= REDIS_REPL_CONNECTED
;
8101 static void slaveofCommand(redisClient
*c
) {
8102 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
8103 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
8104 if (server
.masterhost
) {
8105 sdsfree(server
.masterhost
);
8106 server
.masterhost
= NULL
;
8107 if (server
.master
) freeClient(server
.master
);
8108 server
.replstate
= REDIS_REPL_NONE
;
8109 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
8112 sdsfree(server
.masterhost
);
8113 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
8114 server
.masterport
= atoi(c
->argv
[2]->ptr
);
8115 if (server
.master
) freeClient(server
.master
);
8116 server
.replstate
= REDIS_REPL_CONNECT
;
8117 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
8118 server
.masterhost
, server
.masterport
);
8120 addReply(c
,shared
.ok
);
8123 /* ============================ Maxmemory directive ======================== */
8125 /* Try to free one object form the pre-allocated objects free list.
8126 * This is useful under low mem conditions as by default we take 1 million
8127 * free objects allocated. On success REDIS_OK is returned, otherwise
8129 static int tryFreeOneObjectFromFreelist(void) {
8132 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
8133 if (listLength(server
.objfreelist
)) {
8134 listNode
*head
= listFirst(server
.objfreelist
);
8135 o
= listNodeValue(head
);
8136 listDelNode(server
.objfreelist
,head
);
8137 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
8141 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
8146 /* This function gets called when 'maxmemory' is set on the config file to limit
8147 * the max memory used by the server, and we are out of memory.
8148 * This function will try to, in order:
8150 * - Free objects from the free list
8151 * - Try to remove keys with an EXPIRE set
8153 * It is not possible to free enough memory to reach used-memory < maxmemory
8154 * the server will start refusing commands that will enlarge even more the
8157 static void freeMemoryIfNeeded(void) {
8158 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
8159 int j
, k
, freed
= 0;
8161 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
8162 for (j
= 0; j
< server
.dbnum
; j
++) {
8164 robj
*minkey
= NULL
;
8165 struct dictEntry
*de
;
8167 if (dictSize(server
.db
[j
].expires
)) {
8169 /* From a sample of three keys drop the one nearest to
8170 * the natural expire */
8171 for (k
= 0; k
< 3; k
++) {
8174 de
= dictGetRandomKey(server
.db
[j
].expires
);
8175 t
= (time_t) dictGetEntryVal(de
);
8176 if (minttl
== -1 || t
< minttl
) {
8177 minkey
= dictGetEntryKey(de
);
8181 deleteKey(server
.db
+j
,minkey
);
8184 if (!freed
) return; /* nothing to free... */
8188 /* ============================== Append Only file ========================== */
8190 /* Write the append only file buffer on disk.
8192 * Since we are required to write the AOF before replying to the client,
8193 * and the only way the client socket can get a write is entering when the
8194 * the event loop, we accumulate all the AOF writes in a memory
8195 * buffer and write it on disk using this function just before entering
8196 * the event loop again. */
8197 static void flushAppendOnlyFile(void) {
8201 if (sdslen(server
.aofbuf
) == 0) return;
8203 /* We want to perform a single write. This should be guaranteed atomic
8204 * at least if the filesystem we are writing is a real physical one.
8205 * While this will save us against the server being killed I don't think
8206 * there is much to do about the whole server stopping for power problems
8208 nwritten
= write(server
.appendfd
,server
.aofbuf
,sdslen(server
.aofbuf
));
8209 if (nwritten
!= (signed)sdslen(server
.aofbuf
)) {
8210 /* Ooops, we are in troubles. The best thing to do for now is
8211 * aborting instead of giving the illusion that everything is
8212 * working as expected. */
8213 if (nwritten
== -1) {
8214 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
8216 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
8220 sdsfree(server
.aofbuf
);
8221 server
.aofbuf
= sdsempty();
8223 /* Fsync if needed */
8225 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
8226 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
8227 now
-server
.lastfsync
> 1))
8229 /* aof_fsync is defined as fdatasync() for Linux in order to avoid
8230 * flushing metadata. */
8231 aof_fsync(server
.appendfd
); /* Let's try to get this data on the disk */
8232 server
.lastfsync
= now
;
8236 static sds
catAppendOnlyGenericCommand(sds buf
, int argc
, robj
**argv
) {
8238 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
8239 for (j
= 0; j
< argc
; j
++) {
8240 robj
*o
= getDecodedObject(argv
[j
]);
8241 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
8242 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
8243 buf
= sdscatlen(buf
,"\r\n",2);
8249 static sds
catAppendOnlyExpireAtCommand(sds buf
, robj
*key
, robj
*seconds
) {
8254 /* Make sure we can use strtol */
8255 seconds
= getDecodedObject(seconds
);
8256 when
= time(NULL
)+strtol(seconds
->ptr
,NULL
,10);
8257 decrRefCount(seconds
);
8259 argv
[0] = createStringObject("EXPIREAT",8);
8261 argv
[2] = createObject(REDIS_STRING
,
8262 sdscatprintf(sdsempty(),"%ld",when
));
8263 buf
= catAppendOnlyGenericCommand(buf
, argc
, argv
);
8264 decrRefCount(argv
[0]);
8265 decrRefCount(argv
[2]);
8269 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
8270 sds buf
= sdsempty();
8273 /* The DB this command was targetting is not the same as the last command
8274 * we appendend. To issue a SELECT command is needed. */
8275 if (dictid
!= server
.appendseldb
) {
8278 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
8279 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
8280 (unsigned long)strlen(seldb
),seldb
);
8281 server
.appendseldb
= dictid
;
8284 if (cmd
->proc
== expireCommand
) {
8285 /* Translate EXPIRE into EXPIREAT */
8286 buf
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]);
8287 } else if (cmd
->proc
== setexCommand
) {
8288 /* Translate SETEX to SET and EXPIREAT */
8289 tmpargv
[0] = createStringObject("SET",3);
8290 tmpargv
[1] = argv
[1];
8291 tmpargv
[2] = argv
[3];
8292 buf
= catAppendOnlyGenericCommand(buf
,3,tmpargv
);
8293 decrRefCount(tmpargv
[0]);
8294 buf
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]);
8296 buf
= catAppendOnlyGenericCommand(buf
,argc
,argv
);
8299 /* Append to the AOF buffer. This will be flushed on disk just before
8300 * of re-entering the event loop, so before the client will get a
8301 * positive reply about the operation performed. */
8302 server
.aofbuf
= sdscatlen(server
.aofbuf
,buf
,sdslen(buf
));
8304 /* If a background append only file rewriting is in progress we want to
8305 * accumulate the differences between the child DB and the current one
8306 * in a buffer, so that when the child process will do its work we
8307 * can append the differences to the new append only file. */
8308 if (server
.bgrewritechildpid
!= -1)
8309 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
8314 /* In Redis commands are always executed in the context of a client, so in
8315 * order to load the append only file we need to create a fake client. */
8316 static struct redisClient
*createFakeClient(void) {
8317 struct redisClient
*c
= zmalloc(sizeof(*c
));
8321 c
->querybuf
= sdsempty();
8325 /* We set the fake client as a slave waiting for the synchronization
8326 * so that Redis will not try to send replies to this client. */
8327 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
8328 c
->reply
= listCreate();
8329 listSetFreeMethod(c
->reply
,decrRefCount
);
8330 listSetDupMethod(c
->reply
,dupClientReplyValue
);
8331 initClientMultiState(c
);
8335 static void freeFakeClient(struct redisClient
*c
) {
8336 sdsfree(c
->querybuf
);
8337 listRelease(c
->reply
);
8338 freeClientMultiState(c
);
8342 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
8343 * error (the append only file is zero-length) REDIS_ERR is returned. On
8344 * fatal error an error message is logged and the program exists. */
8345 int loadAppendOnlyFile(char *filename
) {
8346 struct redisClient
*fakeClient
;
8347 FILE *fp
= fopen(filename
,"r");
8348 struct redis_stat sb
;
8349 unsigned long long loadedkeys
= 0;
8350 int appendonly
= server
.appendonly
;
8352 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
8356 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
8360 /* Temporarily disable AOF, to prevent EXEC from feeding a MULTI
8361 * to the same file we're about to read. */
8362 server
.appendonly
= 0;
8364 fakeClient
= createFakeClient();
8371 struct redisCommand
*cmd
;
8373 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
8379 if (buf
[0] != '*') goto fmterr
;
8381 argv
= zmalloc(sizeof(robj
*)*argc
);
8382 for (j
= 0; j
< argc
; j
++) {
8383 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
8384 if (buf
[0] != '$') goto fmterr
;
8385 len
= strtol(buf
+1,NULL
,10);
8386 argsds
= sdsnewlen(NULL
,len
);
8387 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
8388 argv
[j
] = createObject(REDIS_STRING
,argsds
);
8389 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
8392 /* Command lookup */
8393 cmd
= lookupCommand(argv
[0]->ptr
);
8395 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
8398 /* Try object encoding */
8399 if (cmd
->flags
& REDIS_CMD_BULK
)
8400 argv
[argc
-1] = tryObjectEncoding(argv
[argc
-1]);
8401 /* Run the command in the context of a fake client */
8402 fakeClient
->argc
= argc
;
8403 fakeClient
->argv
= argv
;
8404 cmd
->proc(fakeClient
);
8405 /* Discard the reply objects list from the fake client */
8406 while(listLength(fakeClient
->reply
))
8407 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
8408 /* Clean up, ready for the next command */
8409 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
8411 /* Handle swapping while loading big datasets when VM is on */
8413 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
8414 while (zmalloc_used_memory() > server
.vm_max_memory
) {
8415 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
8420 /* This point can only be reached when EOF is reached without errors.
8421 * If the client is in the middle of a MULTI/EXEC, log error and quit. */
8422 if (fakeClient
->flags
& REDIS_MULTI
) goto readerr
;
8425 freeFakeClient(fakeClient
);
8426 server
.appendonly
= appendonly
;
8431 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
8433 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
8437 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
8441 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
8442 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
8446 /* Avoid the incr/decr ref count business if possible to help
8447 * copy-on-write (we are often in a child process when this function
8449 * Also makes sure that key objects don't get incrRefCount-ed when VM
8451 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
8452 obj
= getDecodedObject(obj
);
8455 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
));
8456 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
;
8457 if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0)
8459 if (fwrite("\r\n",2,1,fp
) == 0) goto err
;
8460 if (decrrc
) decrRefCount(obj
);
8463 if (decrrc
) decrRefCount(obj
);
8467 /* Write binary-safe string into a file in the bulkformat
8468 * $<count>\r\n<payload>\r\n */
8469 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
8472 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
);
8473 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8474 if (len
&& fwrite(s
,len
,1,fp
) == 0) return 0;
8475 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
8479 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
8480 static int fwriteBulkDouble(FILE *fp
, double d
) {
8481 char buf
[128], dbuf
[128];
8483 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
8484 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
8485 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8486 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
8490 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
8491 static int fwriteBulkLong(FILE *fp
, long l
) {
8492 char buf
[128], lbuf
[128];
8494 snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
);
8495 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2);
8496 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8497 if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0;
8501 /* Write a sequence of commands able to fully rebuild the dataset into
8502 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
8503 static int rewriteAppendOnlyFile(char *filename
) {
8504 dictIterator
*di
= NULL
;
8509 time_t now
= time(NULL
);
8511 /* Note that we have to use a different temp name here compared to the
8512 * one used by rewriteAppendOnlyFileBackground() function. */
8513 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
8514 fp
= fopen(tmpfile
,"w");
8516 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
8519 for (j
= 0; j
< server
.dbnum
; j
++) {
8520 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
8521 redisDb
*db
= server
.db
+j
;
8523 if (dictSize(d
) == 0) continue;
8524 di
= dictGetIterator(d
);
8530 /* SELECT the new DB */
8531 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
8532 if (fwriteBulkLong(fp
,j
) == 0) goto werr
;
8534 /* Iterate this DB writing every entry */
8535 while((de
= dictNext(di
)) != NULL
) {
8540 key
= dictGetEntryKey(de
);
8541 /* If the value for this key is swapped, load a preview in memory.
8542 * We use a "swapped" flag to remember if we need to free the
8543 * value object instead to just increment the ref count anyway
8544 * in order to avoid copy-on-write of pages if we are forked() */
8545 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
8546 key
->storage
== REDIS_VM_SWAPPING
) {
8547 o
= dictGetEntryVal(de
);
8550 o
= vmPreviewObject(key
);
8553 expiretime
= getExpire(db
,key
);
8555 /* Save the key and associated value */
8556 if (o
->type
== REDIS_STRING
) {
8557 /* Emit a SET command */
8558 char cmd
[]="*3\r\n$3\r\nSET\r\n";
8559 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8561 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8562 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
8563 } else if (o
->type
== REDIS_LIST
) {
8564 /* Emit the RPUSHes needed to rebuild the list */
8565 list
*list
= o
->ptr
;
8569 listRewind(list
,&li
);
8570 while((ln
= listNext(&li
))) {
8571 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
8572 robj
*eleobj
= listNodeValue(ln
);
8574 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8575 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8576 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8578 } else if (o
->type
== REDIS_SET
) {
8579 /* Emit the SADDs needed to rebuild the set */
8581 dictIterator
*di
= dictGetIterator(set
);
8584 while((de
= dictNext(di
)) != NULL
) {
8585 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
8586 robj
*eleobj
= dictGetEntryKey(de
);
8588 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8589 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8590 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8592 dictReleaseIterator(di
);
8593 } else if (o
->type
== REDIS_ZSET
) {
8594 /* Emit the ZADDs needed to rebuild the sorted set */
8596 dictIterator
*di
= dictGetIterator(zs
->dict
);
8599 while((de
= dictNext(di
)) != NULL
) {
8600 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
8601 robj
*eleobj
= dictGetEntryKey(de
);
8602 double *score
= dictGetEntryVal(de
);
8604 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8605 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8606 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
8607 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8609 dictReleaseIterator(di
);
8610 } else if (o
->type
== REDIS_HASH
) {
8611 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
8613 /* Emit the HSETs needed to rebuild the hash */
8614 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8615 unsigned char *p
= zipmapRewind(o
->ptr
);
8616 unsigned char *field
, *val
;
8617 unsigned int flen
, vlen
;
8619 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
8620 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8621 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8622 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
8624 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
8628 dictIterator
*di
= dictGetIterator(o
->ptr
);
8631 while((de
= dictNext(di
)) != NULL
) {
8632 robj
*field
= dictGetEntryKey(de
);
8633 robj
*val
= dictGetEntryVal(de
);
8635 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8636 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8637 if (fwriteBulkObject(fp
,field
) == -1) return -1;
8638 if (fwriteBulkObject(fp
,val
) == -1) return -1;
8640 dictReleaseIterator(di
);
8643 redisPanic("Unknown object type");
8645 /* Save the expire time */
8646 if (expiretime
!= -1) {
8647 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
8648 /* If this key is already expired skip it */
8649 if (expiretime
< now
) continue;
8650 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8651 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8652 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
;
8654 if (swapped
) decrRefCount(o
);
8656 dictReleaseIterator(di
);
8659 /* Make sure data will not remain on the OS's output buffers */
8664 /* Use RENAME to make sure the DB file is changed atomically only
8665 * if the generate DB file is ok. */
8666 if (rename(tmpfile
,filename
) == -1) {
8667 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
8671 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
8677 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
8678 if (di
) dictReleaseIterator(di
);
8682 /* This is how rewriting of the append only file in background works:
8684 * 1) The user calls BGREWRITEAOF
8685 * 2) Redis calls this function, that forks():
8686 * 2a) the child rewrite the append only file in a temp file.
8687 * 2b) the parent accumulates differences in server.bgrewritebuf.
8688 * 3) When the child finished '2a' exists.
8689 * 4) The parent will trap the exit code, if it's OK, will append the
8690 * data accumulated into server.bgrewritebuf into the temp file, and
8691 * finally will rename(2) the temp file in the actual file name.
8692 * The the new file is reopened as the new append only file. Profit!
8694 static int rewriteAppendOnlyFileBackground(void) {
8697 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
8698 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
8699 if ((childpid
= fork()) == 0) {
8703 if (server
.vm_enabled
) vmReopenSwapFile();
8705 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
8706 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
8713 if (childpid
== -1) {
8714 redisLog(REDIS_WARNING
,
8715 "Can't rewrite append only file in background: fork: %s",
8719 redisLog(REDIS_NOTICE
,
8720 "Background append only file rewriting started by pid %d",childpid
);
8721 server
.bgrewritechildpid
= childpid
;
8722 updateDictResizePolicy();
8723 /* We set appendseldb to -1 in order to force the next call to the
8724 * feedAppendOnlyFile() to issue a SELECT command, so the differences
8725 * accumulated by the parent into server.bgrewritebuf will start
8726 * with a SELECT statement and it will be safe to merge. */
8727 server
.appendseldb
= -1;
8730 return REDIS_OK
; /* unreached */
8733 static void bgrewriteaofCommand(redisClient
*c
) {
8734 if (server
.bgrewritechildpid
!= -1) {
8735 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
8738 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
8739 char *status
= "+Background append only file rewriting started\r\n";
8740 addReplySds(c
,sdsnew(status
));
8742 addReply(c
,shared
.err
);
8746 static void aofRemoveTempFile(pid_t childpid
) {
8749 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
8753 /* Virtual Memory is composed mainly of two subsystems:
8754 * - Blocking Virutal Memory
8755 * - Threaded Virtual Memory I/O
8756 * The two parts are not fully decoupled, but functions are split among two
8757 * different sections of the source code (delimited by comments) in order to
8758 * make more clear what functionality is about the blocking VM and what about
8759 * the threaded (not blocking) VM.
8763 * Redis VM is a blocking VM (one that blocks reading swapped values from
8764 * disk into memory when a value swapped out is needed in memory) that is made
8765 * unblocking by trying to examine the command argument vector in order to
8766 * load in background values that will likely be needed in order to exec
8767 * the command. The command is executed only once all the relevant keys
8768 * are loaded into memory.
8770 * This basically is almost as simple of a blocking VM, but almost as parallel
8771 * as a fully non-blocking VM.
8774 /* Called when the user switches from "appendonly yes" to "appendonly no"
8775 * at runtime using the CONFIG command. */
8776 static void stopAppendOnly(void) {
8777 flushAppendOnlyFile();
8778 fsync(server
.appendfd
);
8779 close(server
.appendfd
);
8781 server
.appendfd
= -1;
8782 server
.appendseldb
= -1;
8783 server
.appendonly
= 0;
8784 /* rewrite operation in progress? kill it, wait child exit */
8785 if (server
.bgsavechildpid
!= -1) {
8788 if (kill(server
.bgsavechildpid
,SIGKILL
) != -1)
8789 wait3(&statloc
,0,NULL
);
8790 /* reset the buffer accumulating changes while the child saves */
8791 sdsfree(server
.bgrewritebuf
);
8792 server
.bgrewritebuf
= sdsempty();
8793 server
.bgsavechildpid
= -1;
8797 /* Called when the user switches from "appendonly no" to "appendonly yes"
8798 * at runtime using the CONFIG command. */
8799 static int startAppendOnly(void) {
8800 server
.appendonly
= 1;
8801 server
.lastfsync
= time(NULL
);
8802 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
8803 if (server
.appendfd
== -1) {
8804 redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, but I can't open the AOF file: %s",strerror(errno
));
8807 if (rewriteAppendOnlyFileBackground() == REDIS_ERR
) {
8808 server
.appendonly
= 0;
8809 close(server
.appendfd
);
8810 redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, I can't trigger a background AOF rewrite operation. Check the above logs for more info about the error.",strerror(errno
));
8816 /* =================== Virtual Memory - Blocking Side ====================== */
8818 static void vmInit(void) {
8824 if (server
.vm_max_threads
!= 0)
8825 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
8827 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
8828 /* Try to open the old swap file, otherwise create it */
8829 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
8830 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
8832 if (server
.vm_fp
== NULL
) {
8833 redisLog(REDIS_WARNING
,
8834 "Can't open the swap file: %s. Exiting.",
8838 server
.vm_fd
= fileno(server
.vm_fp
);
8839 /* Lock the swap file for writing, this is useful in order to avoid
8840 * another instance to use the same swap file for a config error. */
8841 fl
.l_type
= F_WRLCK
;
8842 fl
.l_whence
= SEEK_SET
;
8843 fl
.l_start
= fl
.l_len
= 0;
8844 if (fcntl(server
.vm_fd
,F_SETLK
,&fl
) == -1) {
8845 redisLog(REDIS_WARNING
,
8846 "Can't lock the swap file at '%s': %s. Make sure it is not used by another Redis instance.", server
.vm_swap_file
, strerror(errno
));
8850 server
.vm_next_page
= 0;
8851 server
.vm_near_pages
= 0;
8852 server
.vm_stats_used_pages
= 0;
8853 server
.vm_stats_swapped_objects
= 0;
8854 server
.vm_stats_swapouts
= 0;
8855 server
.vm_stats_swapins
= 0;
8856 totsize
= server
.vm_pages
*server
.vm_page_size
;
8857 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
8858 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
8859 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
8863 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
8865 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
8866 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
8867 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
8868 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
8870 /* Initialize threaded I/O (used by Virtual Memory) */
8871 server
.io_newjobs
= listCreate();
8872 server
.io_processing
= listCreate();
8873 server
.io_processed
= listCreate();
8874 server
.io_ready_clients
= listCreate();
8875 pthread_mutex_init(&server
.io_mutex
,NULL
);
8876 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
8877 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
8878 server
.io_active_threads
= 0;
8879 if (pipe(pipefds
) == -1) {
8880 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
8884 server
.io_ready_pipe_read
= pipefds
[0];
8885 server
.io_ready_pipe_write
= pipefds
[1];
8886 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
8887 /* LZF requires a lot of stack */
8888 pthread_attr_init(&server
.io_threads_attr
);
8889 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
8890 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
8891 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
8892 /* Listen for events in the threaded I/O pipe */
8893 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
8894 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
8895 oom("creating file event");
8898 /* Mark the page as used */
8899 static void vmMarkPageUsed(off_t page
) {
8900 off_t byte
= page
/8;
8902 redisAssert(vmFreePage(page
) == 1);
8903 server
.vm_bitmap
[byte
] |= 1<<bit
;
8906 /* Mark N contiguous pages as used, with 'page' being the first. */
8907 static void vmMarkPagesUsed(off_t page
, off_t count
) {
8910 for (j
= 0; j
< count
; j
++)
8911 vmMarkPageUsed(page
+j
);
8912 server
.vm_stats_used_pages
+= count
;
8913 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
8914 (long long)count
, (long long)page
);
8917 /* Mark the page as free */
8918 static void vmMarkPageFree(off_t page
) {
8919 off_t byte
= page
/8;
8921 redisAssert(vmFreePage(page
) == 0);
8922 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
8925 /* Mark N contiguous pages as free, with 'page' being the first. */
8926 static void vmMarkPagesFree(off_t page
, off_t count
) {
8929 for (j
= 0; j
< count
; j
++)
8930 vmMarkPageFree(page
+j
);
8931 server
.vm_stats_used_pages
-= count
;
8932 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
8933 (long long)count
, (long long)page
);
8936 /* Test if the page is free */
8937 static int vmFreePage(off_t page
) {
8938 off_t byte
= page
/8;
8940 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
8943 /* Find N contiguous free pages storing the first page of the cluster in *first.
8944 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
8945 * REDIS_ERR is returned.
8947 * This function uses a simple algorithm: we try to allocate
8948 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
8949 * again from the start of the swap file searching for free spaces.
8951 * If it looks pretty clear that there are no free pages near our offset
8952 * we try to find less populated places doing a forward jump of
8953 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
8954 * without hurry, and then we jump again and so forth...
8956 * This function can be improved using a free list to avoid to guess
8957 * too much, since we could collect data about freed pages.
8959 * note: I implemented this function just after watching an episode of
8960 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
8962 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
8963 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
8965 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
8966 server
.vm_near_pages
= 0;
8967 server
.vm_next_page
= 0;
8969 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
8970 base
= server
.vm_next_page
;
8972 while(offset
< server
.vm_pages
) {
8973 off_t
this = base
+offset
;
8975 /* If we overflow, restart from page zero */
8976 if (this >= server
.vm_pages
) {
8977 this -= server
.vm_pages
;
8979 /* Just overflowed, what we found on tail is no longer
8980 * interesting, as it's no longer contiguous. */
8984 if (vmFreePage(this)) {
8985 /* This is a free page */
8987 /* Already got N free pages? Return to the caller, with success */
8989 *first
= this-(n
-1);
8990 server
.vm_next_page
= this+1;
8991 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
8995 /* The current one is not a free page */
8999 /* Fast-forward if the current page is not free and we already
9000 * searched enough near this place. */
9002 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
9003 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
9005 /* Note that even if we rewind after the jump, we are don't need
9006 * to make sure numfree is set to zero as we only jump *if* it
9007 * is set to zero. */
9009 /* Otherwise just check the next page */
9016 /* Write the specified object at the specified page of the swap file */
9017 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
9018 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
9019 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
9020 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9021 redisLog(REDIS_WARNING
,
9022 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
9026 rdbSaveObject(server
.vm_fp
,o
);
9027 fflush(server
.vm_fp
);
9028 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9032 /* Swap the 'val' object relative to 'key' into disk. Store all the information
9033 * needed to later retrieve the object into the key object.
9034 * If we can't find enough contiguous empty pages to swap the object on disk
9035 * REDIS_ERR is returned. */
9036 static int vmSwapObjectBlocking(robj
*key
, robj
*val
) {
9037 off_t pages
= rdbSavedObjectPages(val
,NULL
);
9040 assert(key
->storage
== REDIS_VM_MEMORY
);
9041 assert(key
->refcount
== 1);
9042 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
;
9043 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
;
9044 key
->vm
.page
= page
;
9045 key
->vm
.usedpages
= pages
;
9046 key
->storage
= REDIS_VM_SWAPPED
;
9047 key
->vtype
= val
->type
;
9048 decrRefCount(val
); /* Deallocate the object from memory. */
9049 vmMarkPagesUsed(page
,pages
);
9050 redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)",
9051 (unsigned char*) key
->ptr
,
9052 (unsigned long long) page
, (unsigned long long) pages
);
9053 server
.vm_stats_swapped_objects
++;
9054 server
.vm_stats_swapouts
++;
9058 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
9061 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
9062 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
9063 redisLog(REDIS_WARNING
,
9064 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
9068 o
= rdbLoadObject(type
,server
.vm_fp
);
9070 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
9073 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9077 /* Load the value object relative to the 'key' object from swap to memory.
9078 * The newly allocated object is returned.
9080 * If preview is true the unserialized object is returned to the caller but
9081 * no changes are made to the key object, nor the pages are marked as freed */
9082 static robj
*vmGenericLoadObject(robj
*key
, int preview
) {
9085 redisAssert(key
->storage
== REDIS_VM_SWAPPED
|| key
->storage
== REDIS_VM_LOADING
);
9086 val
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
);
9088 key
->storage
= REDIS_VM_MEMORY
;
9089 key
->vm
.atime
= server
.unixtime
;
9090 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
9091 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk",
9092 (unsigned char*) key
->ptr
);
9093 server
.vm_stats_swapped_objects
--;
9095 redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk",
9096 (unsigned char*) key
->ptr
);
9098 server
.vm_stats_swapins
++;
9102 /* Plain object loading, from swap to memory */
9103 static robj
*vmLoadObject(robj
*key
) {
9104 /* If we are loading the object in background, stop it, we
9105 * need to load this object synchronously ASAP. */
9106 if (key
->storage
== REDIS_VM_LOADING
)
9107 vmCancelThreadedIOJob(key
);
9108 return vmGenericLoadObject(key
,0);
9111 /* Just load the value on disk, without to modify the key.
9112 * This is useful when we want to perform some operation on the value
9113 * without to really bring it from swap to memory, like while saving the
9114 * dataset or rewriting the append only log. */
9115 static robj
*vmPreviewObject(robj
*key
) {
9116 return vmGenericLoadObject(key
,1);
9119 /* How a good candidate is this object for swapping?
9120 * The better candidate it is, the greater the returned value.
9122 * Currently we try to perform a fast estimation of the object size in
9123 * memory, and combine it with aging informations.
9125 * Basically swappability = idle-time * log(estimated size)
9127 * Bigger objects are preferred over smaller objects, but not
9128 * proportionally, this is why we use the logarithm. This algorithm is
9129 * just a first try and will probably be tuned later. */
9130 static double computeObjectSwappability(robj
*o
) {
9131 time_t age
= server
.unixtime
- o
->vm
.atime
;
9135 struct dictEntry
*de
;
9138 if (age
<= 0) return 0;
9141 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
9144 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
9149 listNode
*ln
= listFirst(l
);
9151 asize
= sizeof(list
);
9153 robj
*ele
= ln
->value
;
9156 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9157 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9159 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
9164 z
= (o
->type
== REDIS_ZSET
);
9165 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
9167 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
9168 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
9173 de
= dictGetRandomKey(d
);
9174 ele
= dictGetEntryKey(de
);
9175 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9176 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9178 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
9179 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
9183 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
9184 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
9185 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
9186 unsigned int klen
, vlen
;
9187 unsigned char *key
, *val
;
9189 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
9193 asize
= len
*(klen
+vlen
+3);
9194 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
9196 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
9201 de
= dictGetRandomKey(d
);
9202 ele
= dictGetEntryKey(de
);
9203 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9204 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9206 ele
= dictGetEntryVal(de
);
9207 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9208 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9210 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
9215 return (double)age
*log(1+asize
);
9218 /* Try to swap an object that's a good candidate for swapping.
9219 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
9220 * to swap any object at all.
9222 * If 'usethreaded' is true, Redis will try to swap the object in background
9223 * using I/O threads. */
9224 static int vmSwapOneObject(int usethreads
) {
9226 struct dictEntry
*best
= NULL
;
9227 double best_swappability
= 0;
9228 redisDb
*best_db
= NULL
;
9231 for (j
= 0; j
< server
.dbnum
; j
++) {
9232 redisDb
*db
= server
.db
+j
;
9233 /* Why maxtries is set to 100?
9234 * Because this way (usually) we'll find 1 object even if just 1% - 2%
9235 * are swappable objects */
9238 if (dictSize(db
->dict
) == 0) continue;
9239 for (i
= 0; i
< 5; i
++) {
9241 double swappability
;
9243 if (maxtries
) maxtries
--;
9244 de
= dictGetRandomKey(db
->dict
);
9245 key
= dictGetEntryKey(de
);
9246 val
= dictGetEntryVal(de
);
9247 /* Only swap objects that are currently in memory.
9249 * Also don't swap shared objects if threaded VM is on, as we
9250 * try to ensure that the main thread does not touch the
9251 * object while the I/O thread is using it, but we can't
9252 * control other keys without adding additional mutex. */
9253 if (key
->storage
!= REDIS_VM_MEMORY
||
9254 (server
.vm_max_threads
!= 0 && val
->refcount
!= 1)) {
9255 if (maxtries
) i
--; /* don't count this try */
9258 swappability
= computeObjectSwappability(val
);
9259 if (!best
|| swappability
> best_swappability
) {
9261 best_swappability
= swappability
;
9266 if (best
== NULL
) return REDIS_ERR
;
9267 key
= dictGetEntryKey(best
);
9268 val
= dictGetEntryVal(best
);
9270 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
9271 key
->ptr
, best_swappability
);
9273 /* Unshare the key if needed */
9274 if (key
->refcount
> 1) {
9275 robj
*newkey
= dupStringObject(key
);
9277 key
= dictGetEntryKey(best
) = newkey
;
9281 vmSwapObjectThreaded(key
,val
,best_db
);
9284 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
9285 dictGetEntryVal(best
) = NULL
;
9293 static int vmSwapOneObjectBlocking() {
9294 return vmSwapOneObject(0);
9297 static int vmSwapOneObjectThreaded() {
9298 return vmSwapOneObject(1);
9301 /* Return true if it's safe to swap out objects in a given moment.
9302 * Basically we don't want to swap objects out while there is a BGSAVE
9303 * or a BGAEOREWRITE running in backgroud. */
9304 static int vmCanSwapOut(void) {
9305 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
9308 /* Delete a key if swapped. Returns 1 if the key was found, was swapped
9309 * and was deleted. Otherwise 0 is returned. */
9310 static int deleteIfSwapped(redisDb
*db
, robj
*key
) {
9314 if ((de
= dictFind(db
->dict
,key
)) == NULL
) return 0;
9315 foundkey
= dictGetEntryKey(de
);
9316 if (foundkey
->storage
== REDIS_VM_MEMORY
) return 0;
9321 /* =================== Virtual Memory - Threaded I/O ======================= */
9323 static void freeIOJob(iojob
*j
) {
9324 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
9325 j
->type
== REDIS_IOJOB_DO_SWAP
||
9326 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
9327 decrRefCount(j
->val
);
9328 /* We don't decrRefCount the j->key field as we did't incremented
9329 * the count creating IO Jobs. This is because the key field here is
9330 * just used as an indentifier and if a key is removed the Job should
9331 * never be touched again. */
9335 /* Every time a thread finished a Job, it writes a byte into the write side
9336 * of an unix pipe in order to "awake" the main thread, and this function
9338 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
9342 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
9344 REDIS_NOTUSED(mask
);
9345 REDIS_NOTUSED(privdata
);
9347 /* For every byte we read in the read side of the pipe, there is one
9348 * I/O job completed to process. */
9349 while((retval
= read(fd
,buf
,1)) == 1) {
9353 struct dictEntry
*de
;
9355 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
9357 /* Get the processed element (the oldest one) */
9359 assert(listLength(server
.io_processed
) != 0);
9360 if (toprocess
== -1) {
9361 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
9362 if (toprocess
<= 0) toprocess
= 1;
9364 ln
= listFirst(server
.io_processed
);
9366 listDelNode(server
.io_processed
,ln
);
9368 /* If this job is marked as canceled, just ignore it */
9373 /* Post process it in the main thread, as there are things we
9374 * can do just here to avoid race conditions and/or invasive locks */
9375 redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
);
9376 de
= dictFind(j
->db
->dict
,j
->key
);
9378 key
= dictGetEntryKey(de
);
9379 if (j
->type
== REDIS_IOJOB_LOAD
) {
9382 /* Key loaded, bring it at home */
9383 key
->storage
= REDIS_VM_MEMORY
;
9384 key
->vm
.atime
= server
.unixtime
;
9385 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
9386 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
9387 (unsigned char*) key
->ptr
);
9388 server
.vm_stats_swapped_objects
--;
9389 server
.vm_stats_swapins
++;
9390 dictGetEntryVal(de
) = j
->val
;
9391 incrRefCount(j
->val
);
9394 /* Handle clients waiting for this key to be loaded. */
9395 handleClientsBlockedOnSwappedKey(db
,key
);
9396 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9397 /* Now we know the amount of pages required to swap this object.
9398 * Let's find some space for it, and queue this task again
9399 * rebranded as REDIS_IOJOB_DO_SWAP. */
9400 if (!vmCanSwapOut() ||
9401 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
9403 /* Ooops... no space or we can't swap as there is
9404 * a fork()ed Redis trying to save stuff on disk. */
9406 key
->storage
= REDIS_VM_MEMORY
; /* undo operation */
9408 /* Note that we need to mark this pages as used now,
9409 * if the job will be canceled, we'll mark them as freed
9411 vmMarkPagesUsed(j
->page
,j
->pages
);
9412 j
->type
= REDIS_IOJOB_DO_SWAP
;
9417 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9420 /* Key swapped. We can finally free some memory. */
9421 if (key
->storage
!= REDIS_VM_SWAPPING
) {
9422 printf("key->storage: %d\n",key
->storage
);
9423 printf("key->name: %s\n",(char*)key
->ptr
);
9424 printf("key->refcount: %d\n",key
->refcount
);
9425 printf("val: %p\n",(void*)j
->val
);
9426 printf("val->type: %d\n",j
->val
->type
);
9427 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
9429 redisAssert(key
->storage
== REDIS_VM_SWAPPING
);
9430 val
= dictGetEntryVal(de
);
9431 key
->vm
.page
= j
->page
;
9432 key
->vm
.usedpages
= j
->pages
;
9433 key
->storage
= REDIS_VM_SWAPPED
;
9434 key
->vtype
= j
->val
->type
;
9435 decrRefCount(val
); /* Deallocate the object from memory. */
9436 dictGetEntryVal(de
) = NULL
;
9437 redisLog(REDIS_DEBUG
,
9438 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
9439 (unsigned char*) key
->ptr
,
9440 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
9441 server
.vm_stats_swapped_objects
++;
9442 server
.vm_stats_swapouts
++;
9444 /* Put a few more swap requests in queue if we are still
9446 if (trytoswap
&& vmCanSwapOut() &&
9447 zmalloc_used_memory() > server
.vm_max_memory
)
9452 more
= listLength(server
.io_newjobs
) <
9453 (unsigned) server
.vm_max_threads
;
9455 /* Don't waste CPU time if swappable objects are rare. */
9456 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
9464 if (processed
== toprocess
) return;
9466 if (retval
< 0 && errno
!= EAGAIN
) {
9467 redisLog(REDIS_WARNING
,
9468 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
9473 static void lockThreadedIO(void) {
9474 pthread_mutex_lock(&server
.io_mutex
);
9477 static void unlockThreadedIO(void) {
9478 pthread_mutex_unlock(&server
.io_mutex
);
9481 /* Remove the specified object from the threaded I/O queue if still not
9482 * processed, otherwise make sure to flag it as canceled. */
9483 static void vmCancelThreadedIOJob(robj
*o
) {
9485 server
.io_newjobs
, /* 0 */
9486 server
.io_processing
, /* 1 */
9487 server
.io_processed
/* 2 */
9491 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
9494 /* Search for a matching key in one of the queues */
9495 for (i
= 0; i
< 3; i
++) {
9499 listRewind(lists
[i
],&li
);
9500 while ((ln
= listNext(&li
)) != NULL
) {
9501 iojob
*job
= ln
->value
;
9503 if (job
->canceled
) continue; /* Skip this, already canceled. */
9504 if (job
->key
== o
) {
9505 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n",
9506 (void*)job
, (char*)o
->ptr
, job
->type
, i
);
9507 /* Mark the pages as free since the swap didn't happened
9508 * or happened but is now discarded. */
9509 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
9510 vmMarkPagesFree(job
->page
,job
->pages
);
9511 /* Cancel the job. It depends on the list the job is
9514 case 0: /* io_newjobs */
9515 /* If the job was yet not processed the best thing to do
9516 * is to remove it from the queue at all */
9518 listDelNode(lists
[i
],ln
);
9520 case 1: /* io_processing */
9521 /* Oh Shi- the thread is messing with the Job:
9523 * Probably it's accessing the object if this is a
9524 * PREPARE_SWAP or DO_SWAP job.
9525 * If it's a LOAD job it may be reading from disk and
9526 * if we don't wait for the job to terminate before to
9527 * cancel it, maybe in a few microseconds data can be
9528 * corrupted in this pages. So the short story is:
9530 * Better to wait for the job to move into the
9531 * next queue (processed)... */
9533 /* We try again and again until the job is completed. */
9535 /* But let's wait some time for the I/O thread
9536 * to finish with this job. After all this condition
9537 * should be very rare. */
9540 case 2: /* io_processed */
9541 /* The job was already processed, that's easy...
9542 * just mark it as canceled so that we'll ignore it
9543 * when processing completed jobs. */
9547 /* Finally we have to adjust the storage type of the object
9548 * in order to "UNDO" the operaiton. */
9549 if (o
->storage
== REDIS_VM_LOADING
)
9550 o
->storage
= REDIS_VM_SWAPPED
;
9551 else if (o
->storage
== REDIS_VM_SWAPPING
)
9552 o
->storage
= REDIS_VM_MEMORY
;
9559 assert(1 != 1); /* We should never reach this */
9562 static void *IOThreadEntryPoint(void *arg
) {
9567 pthread_detach(pthread_self());
9569 /* Get a new job to process */
9571 if (listLength(server
.io_newjobs
) == 0) {
9572 /* No new jobs in queue, exit. */
9573 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
9574 (long) pthread_self());
9575 server
.io_active_threads
--;
9579 ln
= listFirst(server
.io_newjobs
);
9581 listDelNode(server
.io_newjobs
,ln
);
9582 /* Add the job in the processing queue */
9583 j
->thread
= pthread_self();
9584 listAddNodeTail(server
.io_processing
,j
);
9585 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
9587 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
9588 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
9590 /* Process the Job */
9591 if (j
->type
== REDIS_IOJOB_LOAD
) {
9592 j
->val
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
);
9593 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9594 FILE *fp
= fopen("/dev/null","w+");
9595 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
9597 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9598 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
9602 /* Done: insert the job into the processed queue */
9603 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
9604 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
9606 listDelNode(server
.io_processing
,ln
);
9607 listAddNodeTail(server
.io_processed
,j
);
9610 /* Signal the main thread there is new stuff to process */
9611 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
9613 return NULL
; /* never reached */
9616 static void spawnIOThread(void) {
9618 sigset_t mask
, omask
;
9622 sigaddset(&mask
,SIGCHLD
);
9623 sigaddset(&mask
,SIGHUP
);
9624 sigaddset(&mask
,SIGPIPE
);
9625 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
9626 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
9627 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
9631 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
9632 server
.io_active_threads
++;
9635 /* We need to wait for the last thread to exit before we are able to
9636 * fork() in order to BGSAVE or BGREWRITEAOF. */
9637 static void waitEmptyIOJobsQueue(void) {
9639 int io_processed_len
;
9642 if (listLength(server
.io_newjobs
) == 0 &&
9643 listLength(server
.io_processing
) == 0 &&
9644 server
.io_active_threads
== 0)
9649 /* While waiting for empty jobs queue condition we post-process some
9650 * finshed job, as I/O threads may be hanging trying to write against
9651 * the io_ready_pipe_write FD but there are so much pending jobs that
9653 io_processed_len
= listLength(server
.io_processed
);
9655 if (io_processed_len
) {
9656 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
9657 usleep(1000); /* 1 millisecond */
9659 usleep(10000); /* 10 milliseconds */
9664 static void vmReopenSwapFile(void) {
9665 /* Note: we don't close the old one as we are in the child process
9666 * and don't want to mess at all with the original file object. */
9667 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
9668 if (server
.vm_fp
== NULL
) {
9669 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
9670 server
.vm_swap_file
);
9673 server
.vm_fd
= fileno(server
.vm_fp
);
9676 /* This function must be called while with threaded IO locked */
9677 static void queueIOJob(iojob
*j
) {
9678 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
9679 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
9680 listAddNodeTail(server
.io_newjobs
,j
);
9681 if (server
.io_active_threads
< server
.vm_max_threads
)
9685 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
9688 assert(key
->storage
== REDIS_VM_MEMORY
);
9689 assert(key
->refcount
== 1);
9691 j
= zmalloc(sizeof(*j
));
9692 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
9698 j
->thread
= (pthread_t
) -1;
9699 key
->storage
= REDIS_VM_SWAPPING
;
9707 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
9709 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
9710 * If there is not already a job loading the key, it is craeted.
9711 * The key is added to the io_keys list in the client structure, and also
9712 * in the hash table mapping swapped keys to waiting clients, that is,
9713 * server.io_waited_keys. */
9714 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
9715 struct dictEntry
*de
;
9719 /* If the key does not exist or is already in RAM we don't need to
9720 * block the client at all. */
9721 de
= dictFind(c
->db
->dict
,key
);
9722 if (de
== NULL
) return 0;
9723 o
= dictGetEntryKey(de
);
9724 if (o
->storage
== REDIS_VM_MEMORY
) {
9726 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
9727 /* We were swapping the key, undo it! */
9728 vmCancelThreadedIOJob(o
);
9732 /* OK: the key is either swapped, or being loaded just now. */
9734 /* Add the key to the list of keys this client is waiting for.
9735 * This maps clients to keys they are waiting for. */
9736 listAddNodeTail(c
->io_keys
,key
);
9739 /* Add the client to the swapped keys => clients waiting map. */
9740 de
= dictFind(c
->db
->io_keys
,key
);
9744 /* For every key we take a list of clients blocked for it */
9746 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
9748 assert(retval
== DICT_OK
);
9750 l
= dictGetEntryVal(de
);
9752 listAddNodeTail(l
,c
);
9754 /* Are we already loading the key from disk? If not create a job */
9755 if (o
->storage
== REDIS_VM_SWAPPED
) {
9758 o
->storage
= REDIS_VM_LOADING
;
9759 j
= zmalloc(sizeof(*j
));
9760 j
->type
= REDIS_IOJOB_LOAD
;
9763 j
->key
->vtype
= o
->vtype
;
9764 j
->page
= o
->vm
.page
;
9767 j
->thread
= (pthread_t
) -1;
9775 /* Preload keys for any command with first, last and step values for
9776 * the command keys prototype, as defined in the command table. */
9777 static void waitForMultipleSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
9779 if (cmd
->vm_firstkey
== 0) return;
9780 last
= cmd
->vm_lastkey
;
9781 if (last
< 0) last
= argc
+last
;
9782 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
) {
9783 redisAssert(j
< argc
);
9784 waitForSwappedKey(c
,argv
[j
]);
9788 /* Preload keys needed for the ZUNIONSTORE and ZINTERSTORE commands.
9789 * Note that the number of keys to preload is user-defined, so we need to
9790 * apply a sanity check against argc. */
9791 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
9795 num
= atoi(argv
[2]->ptr
);
9796 if (num
> (argc
-3)) return;
9797 for (i
= 0; i
< num
; i
++) {
9798 waitForSwappedKey(c
,argv
[3+i
]);
9802 /* Preload keys needed to execute the entire MULTI/EXEC block.
9804 * This function is called by blockClientOnSwappedKeys when EXEC is issued,
9805 * and will block the client when any command requires a swapped out value. */
9806 static void execBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
9808 struct redisCommand
*mcmd
;
9811 REDIS_NOTUSED(argc
);
9812 REDIS_NOTUSED(argv
);
9814 if (!(c
->flags
& REDIS_MULTI
)) return;
9815 for (i
= 0; i
< c
->mstate
.count
; i
++) {
9816 mcmd
= c
->mstate
.commands
[i
].cmd
;
9817 margc
= c
->mstate
.commands
[i
].argc
;
9818 margv
= c
->mstate
.commands
[i
].argv
;
9820 if (mcmd
->vm_preload_proc
!= NULL
) {
9821 mcmd
->vm_preload_proc(c
,mcmd
,margc
,margv
);
9823 waitForMultipleSwappedKeys(c
,mcmd
,margc
,margv
);
9828 /* Is this client attempting to run a command against swapped keys?
9829 * If so, block it ASAP, load the keys in background, then resume it.
9831 * The important idea about this function is that it can fail! If keys will
9832 * still be swapped when the client is resumed, this key lookups will
9833 * just block loading keys from disk. In practical terms this should only
9834 * happen with SORT BY command or if there is a bug in this function.
9836 * Return 1 if the client is marked as blocked, 0 if the client can
9837 * continue as the keys it is going to access appear to be in memory. */
9838 static int blockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
) {
9839 if (cmd
->vm_preload_proc
!= NULL
) {
9840 cmd
->vm_preload_proc(c
,cmd
,c
->argc
,c
->argv
);
9842 waitForMultipleSwappedKeys(c
,cmd
,c
->argc
,c
->argv
);
9845 /* If the client was blocked for at least one key, mark it as blocked. */
9846 if (listLength(c
->io_keys
)) {
9847 c
->flags
|= REDIS_IO_WAIT
;
9848 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
9849 server
.vm_blocked_clients
++;
9856 /* Remove the 'key' from the list of blocked keys for a given client.
9858 * The function returns 1 when there are no longer blocking keys after
9859 * the current one was removed (and the client can be unblocked). */
9860 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
9864 struct dictEntry
*de
;
9866 /* Remove the key from the list of keys this client is waiting for. */
9867 listRewind(c
->io_keys
,&li
);
9868 while ((ln
= listNext(&li
)) != NULL
) {
9869 if (equalStringObjects(ln
->value
,key
)) {
9870 listDelNode(c
->io_keys
,ln
);
9876 /* Remove the client form the key => waiting clients map. */
9877 de
= dictFind(c
->db
->io_keys
,key
);
9879 l
= dictGetEntryVal(de
);
9880 ln
= listSearchKey(l
,c
);
9883 if (listLength(l
) == 0)
9884 dictDelete(c
->db
->io_keys
,key
);
9886 return listLength(c
->io_keys
) == 0;
9889 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
9890 struct dictEntry
*de
;
9895 de
= dictFind(db
->io_keys
,key
);
9898 l
= dictGetEntryVal(de
);
9899 len
= listLength(l
);
9900 /* Note: we can't use something like while(listLength(l)) as the list
9901 * can be freed by the calling function when we remove the last element. */
9904 redisClient
*c
= ln
->value
;
9906 if (dontWaitForSwappedKey(c
,key
)) {
9907 /* Put the client in the list of clients ready to go as we
9908 * loaded all the keys about it. */
9909 listAddNodeTail(server
.io_ready_clients
,c
);
9914 /* =========================== Remote Configuration ========================= */
9916 static void configSetCommand(redisClient
*c
) {
9917 robj
*o
= getDecodedObject(c
->argv
[3]);
9920 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
9921 zfree(server
.dbfilename
);
9922 server
.dbfilename
= zstrdup(o
->ptr
);
9923 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
9924 zfree(server
.requirepass
);
9925 server
.requirepass
= zstrdup(o
->ptr
);
9926 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
9927 zfree(server
.masterauth
);
9928 server
.masterauth
= zstrdup(o
->ptr
);
9929 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
9930 if (getLongLongFromObject(o
,&ll
) == REDIS_ERR
||
9931 ll
< 0) goto badfmt
;
9932 server
.maxmemory
= ll
;
9933 } else if (!strcasecmp(c
->argv
[2]->ptr
,"timeout")) {
9934 if (getLongLongFromObject(o
,&ll
) == REDIS_ERR
||
9935 ll
< 0 || ll
> LONG_MAX
) goto badfmt
;
9936 server
.maxidletime
= ll
;
9937 } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendfsync")) {
9938 if (!strcasecmp(o
->ptr
,"no")) {
9939 server
.appendfsync
= APPENDFSYNC_NO
;
9940 } else if (!strcasecmp(o
->ptr
,"everysec")) {
9941 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
9942 } else if (!strcasecmp(o
->ptr
,"always")) {
9943 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
9947 } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendonly")) {
9948 int old
= server
.appendonly
;
9949 int new = yesnotoi(o
->ptr
);
9951 if (new == -1) goto badfmt
;
9956 if (startAppendOnly() == REDIS_ERR
) {
9957 addReplySds(c
,sdscatprintf(sdsempty(),
9958 "-ERR Unable to turn on AOF. Check server logs.\r\n"));
9964 } else if (!strcasecmp(c
->argv
[2]->ptr
,"save")) {
9966 sds
*v
= sdssplitlen(o
->ptr
,sdslen(o
->ptr
)," ",1,&vlen
);
9968 /* Perform sanity check before setting the new config:
9969 * - Even number of args
9970 * - Seconds >= 1, changes >= 0 */
9972 sdsfreesplitres(v
,vlen
);
9975 for (j
= 0; j
< vlen
; j
++) {
9979 val
= strtoll(v
[j
], &eptr
, 10);
9980 if (eptr
[0] != '\0' ||
9981 ((j
& 1) == 0 && val
< 1) ||
9982 ((j
& 1) == 1 && val
< 0)) {
9983 sdsfreesplitres(v
,vlen
);
9987 /* Finally set the new config */
9988 resetServerSaveParams();
9989 for (j
= 0; j
< vlen
; j
+= 2) {
9993 seconds
= strtoll(v
[j
],NULL
,10);
9994 changes
= strtoll(v
[j
+1],NULL
,10);
9995 appendServerSaveParams(seconds
, changes
);
9997 sdsfreesplitres(v
,vlen
);
9999 addReplySds(c
,sdscatprintf(sdsempty(),
10000 "-ERR not supported CONFIG parameter %s\r\n",
10001 (char*)c
->argv
[2]->ptr
));
10006 addReply(c
,shared
.ok
);
10009 badfmt
: /* Bad format errors */
10010 addReplySds(c
,sdscatprintf(sdsempty(),
10011 "-ERR invalid argument '%s' for CONFIG SET '%s'\r\n",
10013 (char*)c
->argv
[2]->ptr
));
10017 static void configGetCommand(redisClient
*c
) {
10018 robj
*o
= getDecodedObject(c
->argv
[2]);
10019 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
10020 char *pattern
= o
->ptr
;
10023 addReply(c
,lenobj
);
10024 decrRefCount(lenobj
);
10026 if (stringmatch(pattern
,"dbfilename",0)) {
10027 addReplyBulkCString(c
,"dbfilename");
10028 addReplyBulkCString(c
,server
.dbfilename
);
10031 if (stringmatch(pattern
,"requirepass",0)) {
10032 addReplyBulkCString(c
,"requirepass");
10033 addReplyBulkCString(c
,server
.requirepass
);
10036 if (stringmatch(pattern
,"masterauth",0)) {
10037 addReplyBulkCString(c
,"masterauth");
10038 addReplyBulkCString(c
,server
.masterauth
);
10041 if (stringmatch(pattern
,"maxmemory",0)) {
10044 ll2string(buf
,128,server
.maxmemory
);
10045 addReplyBulkCString(c
,"maxmemory");
10046 addReplyBulkCString(c
,buf
);
10049 if (stringmatch(pattern
,"timeout",0)) {
10052 ll2string(buf
,128,server
.maxidletime
);
10053 addReplyBulkCString(c
,"timeout");
10054 addReplyBulkCString(c
,buf
);
10057 if (stringmatch(pattern
,"appendonly",0)) {
10058 addReplyBulkCString(c
,"appendonly");
10059 addReplyBulkCString(c
,server
.appendonly
? "yes" : "no");
10062 if (stringmatch(pattern
,"appendfsync",0)) {
10065 switch(server
.appendfsync
) {
10066 case APPENDFSYNC_NO
: policy
= "no"; break;
10067 case APPENDFSYNC_EVERYSEC
: policy
= "everysec"; break;
10068 case APPENDFSYNC_ALWAYS
: policy
= "always"; break;
10069 default: policy
= "unknown"; break; /* too harmless to panic */
10071 addReplyBulkCString(c
,"appendfsync");
10072 addReplyBulkCString(c
,policy
);
10075 if (stringmatch(pattern
,"save",0)) {
10076 sds buf
= sdsempty();
10079 for (j
= 0; j
< server
.saveparamslen
; j
++) {
10080 buf
= sdscatprintf(buf
,"%ld %d",
10081 server
.saveparams
[j
].seconds
,
10082 server
.saveparams
[j
].changes
);
10083 if (j
!= server
.saveparamslen
-1)
10084 buf
= sdscatlen(buf
," ",1);
10086 addReplyBulkCString(c
,"save");
10087 addReplyBulkCString(c
,buf
);
10092 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
10095 static void configCommand(redisClient
*c
) {
10096 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
10097 if (c
->argc
!= 4) goto badarity
;
10098 configSetCommand(c
);
10099 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
10100 if (c
->argc
!= 3) goto badarity
;
10101 configGetCommand(c
);
10102 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
10103 if (c
->argc
!= 2) goto badarity
;
10104 server
.stat_numcommands
= 0;
10105 server
.stat_numconnections
= 0;
10106 server
.stat_expiredkeys
= 0;
10107 server
.stat_starttime
= time(NULL
);
10108 addReply(c
,shared
.ok
);
10110 addReplySds(c
,sdscatprintf(sdsempty(),
10111 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
10116 addReplySds(c
,sdscatprintf(sdsempty(),
10117 "-ERR Wrong number of arguments for CONFIG %s\r\n",
10118 (char*) c
->argv
[1]->ptr
));
10121 /* =========================== Pubsub implementation ======================== */
10123 static void freePubsubPattern(void *p
) {
10124 pubsubPattern
*pat
= p
;
10126 decrRefCount(pat
->pattern
);
10130 static int listMatchPubsubPattern(void *a
, void *b
) {
10131 pubsubPattern
*pa
= a
, *pb
= b
;
10133 return (pa
->client
== pb
->client
) &&
10134 (equalStringObjects(pa
->pattern
,pb
->pattern
));
10137 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
10138 * 0 if the client was already subscribed to that channel. */
10139 static int pubsubSubscribeChannel(redisClient
*c
, robj
*channel
) {
10140 struct dictEntry
*de
;
10141 list
*clients
= NULL
;
10144 /* Add the channel to the client -> channels hash table */
10145 if (dictAdd(c
->pubsub_channels
,channel
,NULL
) == DICT_OK
) {
10147 incrRefCount(channel
);
10148 /* Add the client to the channel -> list of clients hash table */
10149 de
= dictFind(server
.pubsub_channels
,channel
);
10151 clients
= listCreate();
10152 dictAdd(server
.pubsub_channels
,channel
,clients
);
10153 incrRefCount(channel
);
10155 clients
= dictGetEntryVal(de
);
10157 listAddNodeTail(clients
,c
);
10159 /* Notify the client */
10160 addReply(c
,shared
.mbulk3
);
10161 addReply(c
,shared
.subscribebulk
);
10162 addReplyBulk(c
,channel
);
10163 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
10167 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
10168 * 0 if the client was not subscribed to the specified channel. */
10169 static int pubsubUnsubscribeChannel(redisClient
*c
, robj
*channel
, int notify
) {
10170 struct dictEntry
*de
;
10175 /* Remove the channel from the client -> channels hash table */
10176 incrRefCount(channel
); /* channel may be just a pointer to the same object
10177 we have in the hash tables. Protect it... */
10178 if (dictDelete(c
->pubsub_channels
,channel
) == DICT_OK
) {
10180 /* Remove the client from the channel -> clients list hash table */
10181 de
= dictFind(server
.pubsub_channels
,channel
);
10182 assert(de
!= NULL
);
10183 clients
= dictGetEntryVal(de
);
10184 ln
= listSearchKey(clients
,c
);
10185 assert(ln
!= NULL
);
10186 listDelNode(clients
,ln
);
10187 if (listLength(clients
) == 0) {
10188 /* Free the list and associated hash entry at all if this was
10189 * the latest client, so that it will be possible to abuse
10190 * Redis PUBSUB creating millions of channels. */
10191 dictDelete(server
.pubsub_channels
,channel
);
10194 /* Notify the client */
10196 addReply(c
,shared
.mbulk3
);
10197 addReply(c
,shared
.unsubscribebulk
);
10198 addReplyBulk(c
,channel
);
10199 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+
10200 listLength(c
->pubsub_patterns
));
10203 decrRefCount(channel
); /* it is finally safe to release it */
10207 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */
10208 static int pubsubSubscribePattern(redisClient
*c
, robj
*pattern
) {
10211 if (listSearchKey(c
->pubsub_patterns
,pattern
) == NULL
) {
10213 pubsubPattern
*pat
;
10214 listAddNodeTail(c
->pubsub_patterns
,pattern
);
10215 incrRefCount(pattern
);
10216 pat
= zmalloc(sizeof(*pat
));
10217 pat
->pattern
= getDecodedObject(pattern
);
10219 listAddNodeTail(server
.pubsub_patterns
,pat
);
10221 /* Notify the client */
10222 addReply(c
,shared
.mbulk3
);
10223 addReply(c
,shared
.psubscribebulk
);
10224 addReplyBulk(c
,pattern
);
10225 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
10229 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
10230 * 0 if the client was not subscribed to the specified channel. */
10231 static int pubsubUnsubscribePattern(redisClient
*c
, robj
*pattern
, int notify
) {
10236 incrRefCount(pattern
); /* Protect the object. May be the same we remove */
10237 if ((ln
= listSearchKey(c
->pubsub_patterns
,pattern
)) != NULL
) {
10239 listDelNode(c
->pubsub_patterns
,ln
);
10241 pat
.pattern
= pattern
;
10242 ln
= listSearchKey(server
.pubsub_patterns
,&pat
);
10243 listDelNode(server
.pubsub_patterns
,ln
);
10245 /* Notify the client */
10247 addReply(c
,shared
.mbulk3
);
10248 addReply(c
,shared
.punsubscribebulk
);
10249 addReplyBulk(c
,pattern
);
10250 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+
10251 listLength(c
->pubsub_patterns
));
10253 decrRefCount(pattern
);
10257 /* Unsubscribe from all the channels. Return the number of channels the
10258 * client was subscribed from. */
10259 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
) {
10260 dictIterator
*di
= dictGetIterator(c
->pubsub_channels
);
10264 while((de
= dictNext(di
)) != NULL
) {
10265 robj
*channel
= dictGetEntryKey(de
);
10267 count
+= pubsubUnsubscribeChannel(c
,channel
,notify
);
10269 dictReleaseIterator(di
);
10273 /* Unsubscribe from all the patterns. Return the number of patterns the
10274 * client was subscribed from. */
10275 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
) {
10280 listRewind(c
->pubsub_patterns
,&li
);
10281 while ((ln
= listNext(&li
)) != NULL
) {
10282 robj
*pattern
= ln
->value
;
10284 count
+= pubsubUnsubscribePattern(c
,pattern
,notify
);
10289 /* Publish a message */
10290 static int pubsubPublishMessage(robj
*channel
, robj
*message
) {
10292 struct dictEntry
*de
;
10296 /* Send to clients listening for that channel */
10297 de
= dictFind(server
.pubsub_channels
,channel
);
10299 list
*list
= dictGetEntryVal(de
);
10303 listRewind(list
,&li
);
10304 while ((ln
= listNext(&li
)) != NULL
) {
10305 redisClient
*c
= ln
->value
;
10307 addReply(c
,shared
.mbulk3
);
10308 addReply(c
,shared
.messagebulk
);
10309 addReplyBulk(c
,channel
);
10310 addReplyBulk(c
,message
);
10314 /* Send to clients listening to matching channels */
10315 if (listLength(server
.pubsub_patterns
)) {
10316 listRewind(server
.pubsub_patterns
,&li
);
10317 channel
= getDecodedObject(channel
);
10318 while ((ln
= listNext(&li
)) != NULL
) {
10319 pubsubPattern
*pat
= ln
->value
;
10321 if (stringmatchlen((char*)pat
->pattern
->ptr
,
10322 sdslen(pat
->pattern
->ptr
),
10323 (char*)channel
->ptr
,
10324 sdslen(channel
->ptr
),0)) {
10325 addReply(pat
->client
,shared
.mbulk4
);
10326 addReply(pat
->client
,shared
.pmessagebulk
);
10327 addReplyBulk(pat
->client
,pat
->pattern
);
10328 addReplyBulk(pat
->client
,channel
);
10329 addReplyBulk(pat
->client
,message
);
10333 decrRefCount(channel
);
10338 static void subscribeCommand(redisClient
*c
) {
10341 for (j
= 1; j
< c
->argc
; j
++)
10342 pubsubSubscribeChannel(c
,c
->argv
[j
]);
10345 static void unsubscribeCommand(redisClient
*c
) {
10346 if (c
->argc
== 1) {
10347 pubsubUnsubscribeAllChannels(c
,1);
10352 for (j
= 1; j
< c
->argc
; j
++)
10353 pubsubUnsubscribeChannel(c
,c
->argv
[j
],1);
10357 static void psubscribeCommand(redisClient
*c
) {
10360 for (j
= 1; j
< c
->argc
; j
++)
10361 pubsubSubscribePattern(c
,c
->argv
[j
]);
10364 static void punsubscribeCommand(redisClient
*c
) {
10365 if (c
->argc
== 1) {
10366 pubsubUnsubscribeAllPatterns(c
,1);
10371 for (j
= 1; j
< c
->argc
; j
++)
10372 pubsubUnsubscribePattern(c
,c
->argv
[j
],1);
10376 static void publishCommand(redisClient
*c
) {
10377 int receivers
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]);
10378 addReplyLongLong(c
,receivers
);
10381 /* ===================== WATCH (CAS alike for MULTI/EXEC) ===================
10383 * The implementation uses a per-DB hash table mapping keys to list of clients
10384 * WATCHing those keys, so that given a key that is going to be modified
10385 * we can mark all the associated clients as dirty.
10387 * Also every client contains a list of WATCHed keys so that's possible to
10388 * un-watch such keys when the client is freed or when UNWATCH is called. */
10390 /* In the client->watched_keys list we need to use watchedKey structures
10391 * as in order to identify a key in Redis we need both the key name and the
10393 typedef struct watchedKey
{
10398 /* Watch for the specified key */
10399 static void watchForKey(redisClient
*c
, robj
*key
) {
10400 list
*clients
= NULL
;
10405 /* Check if we are already watching for this key */
10406 listRewind(c
->watched_keys
,&li
);
10407 while((ln
= listNext(&li
))) {
10408 wk
= listNodeValue(ln
);
10409 if (wk
->db
== c
->db
&& equalStringObjects(key
,wk
->key
))
10410 return; /* Key already watched */
10412 /* This key is not already watched in this DB. Let's add it */
10413 clients
= dictFetchValue(c
->db
->watched_keys
,key
);
10415 clients
= listCreate();
10416 dictAdd(c
->db
->watched_keys
,key
,clients
);
10419 listAddNodeTail(clients
,c
);
10420 /* Add the new key to the lits of keys watched by this client */
10421 wk
= zmalloc(sizeof(*wk
));
10425 listAddNodeTail(c
->watched_keys
,wk
);
10428 /* Unwatch all the keys watched by this client. To clean the EXEC dirty
10429 * flag is up to the caller. */
10430 static void unwatchAllKeys(redisClient
*c
) {
10434 if (listLength(c
->watched_keys
) == 0) return;
10435 listRewind(c
->watched_keys
,&li
);
10436 while((ln
= listNext(&li
))) {
10440 /* Lookup the watched key -> clients list and remove the client
10442 wk
= listNodeValue(ln
);
10443 clients
= dictFetchValue(wk
->db
->watched_keys
, wk
->key
);
10444 assert(clients
!= NULL
);
10445 listDelNode(clients
,listSearchKey(clients
,c
));
10446 /* Kill the entry at all if this was the only client */
10447 if (listLength(clients
) == 0)
10448 dictDelete(wk
->db
->watched_keys
, wk
->key
);
10449 /* Remove this watched key from the client->watched list */
10450 listDelNode(c
->watched_keys
,ln
);
10451 decrRefCount(wk
->key
);
10456 /* "Touch" a key, so that if this key is being WATCHed by soem client the
10457 * next EXEC will fail. */
10458 static void touchWatchedKey(redisDb
*db
, robj
*key
) {
10463 if (dictSize(db
->watched_keys
) == 0) return;
10464 clients
= dictFetchValue(db
->watched_keys
, key
);
10465 if (!clients
) return;
10467 /* Mark all the clients watching this key as REDIS_DIRTY_CAS */
10468 /* Check if we are already watching for this key */
10469 listRewind(clients
,&li
);
10470 while((ln
= listNext(&li
))) {
10471 redisClient
*c
= listNodeValue(ln
);
10473 c
->flags
|= REDIS_DIRTY_CAS
;
10477 static void watchCommand(redisClient
*c
) {
10480 for (j
= 1; j
< c
->argc
; j
++)
10481 watchForKey(c
,c
->argv
[j
]);
10482 addReply(c
,shared
.ok
);
10485 static void unwatchCommand(redisClient
*c
) {
10487 c
->flags
&= (~REDIS_DIRTY_CAS
);
10488 addReply(c
,shared
.ok
);
10491 /* ================================= Debugging ============================== */
10493 /* Compute the sha1 of string at 's' with 'len' bytes long.
10494 * The SHA1 is then xored againt the string pointed by digest.
10495 * Since xor is commutative, this operation is used in order to
10496 * "add" digests relative to unordered elements.
10498 * So digest(a,b,c,d) will be the same of digest(b,a,c,d) */
10499 static void xorDigest(unsigned char *digest
, void *ptr
, size_t len
) {
10501 unsigned char hash
[20], *s
= ptr
;
10505 SHA1Update(&ctx
,s
,len
);
10506 SHA1Final(hash
,&ctx
);
10508 for (j
= 0; j
< 20; j
++)
10509 digest
[j
] ^= hash
[j
];
10512 static void xorObjectDigest(unsigned char *digest
, robj
*o
) {
10513 o
= getDecodedObject(o
);
10514 xorDigest(digest
,o
->ptr
,sdslen(o
->ptr
));
10518 /* This function instead of just computing the SHA1 and xoring it
10519 * against diget, also perform the digest of "digest" itself and
10520 * replace the old value with the new one.
10522 * So the final digest will be:
10524 * digest = SHA1(digest xor SHA1(data))
10526 * This function is used every time we want to preserve the order so
10527 * that digest(a,b,c,d) will be different than digest(b,c,d,a)
10529 * Also note that mixdigest("foo") followed by mixdigest("bar")
10530 * will lead to a different digest compared to "fo", "obar".
10532 static void mixDigest(unsigned char *digest
, void *ptr
, size_t len
) {
10536 xorDigest(digest
,s
,len
);
10538 SHA1Update(&ctx
,digest
,20);
10539 SHA1Final(digest
,&ctx
);
10542 static void mixObjectDigest(unsigned char *digest
, robj
*o
) {
10543 o
= getDecodedObject(o
);
10544 mixDigest(digest
,o
->ptr
,sdslen(o
->ptr
));
10548 /* Compute the dataset digest. Since keys, sets elements, hashes elements
10549 * are not ordered, we use a trick: every aggregate digest is the xor
10550 * of the digests of their elements. This way the order will not change
10551 * the result. For list instead we use a feedback entering the output digest
10552 * as input in order to ensure that a different ordered list will result in
10553 * a different digest. */
10554 static void computeDatasetDigest(unsigned char *final
) {
10555 unsigned char digest
[20];
10557 dictIterator
*di
= NULL
;
10562 memset(final
,0,20); /* Start with a clean result */
10564 for (j
= 0; j
< server
.dbnum
; j
++) {
10565 redisDb
*db
= server
.db
+j
;
10567 if (dictSize(db
->dict
) == 0) continue;
10568 di
= dictGetIterator(db
->dict
);
10570 /* hash the DB id, so the same dataset moved in a different
10571 * DB will lead to a different digest */
10573 mixDigest(final
,&aux
,sizeof(aux
));
10575 /* Iterate this DB writing every entry */
10576 while((de
= dictNext(di
)) != NULL
) {
10577 robj
*key
, *o
, *kcopy
;
10580 memset(digest
,0,20); /* This key-val digest */
10581 key
= dictGetEntryKey(de
);
10583 if (!server
.vm_enabled
) {
10584 mixObjectDigest(digest
,key
);
10585 o
= dictGetEntryVal(de
);
10587 /* Don't work with the key directly as when VM is active
10588 * this is unsafe: TODO: fix decrRefCount to check if the
10589 * count really reached 0 to avoid this mess */
10590 kcopy
= dupStringObject(key
);
10591 mixObjectDigest(digest
,kcopy
);
10592 o
= lookupKeyRead(db
,kcopy
);
10593 decrRefCount(kcopy
);
10595 aux
= htonl(o
->type
);
10596 mixDigest(digest
,&aux
,sizeof(aux
));
10597 expiretime
= getExpire(db
,key
);
10599 /* Save the key and associated value */
10600 if (o
->type
== REDIS_STRING
) {
10601 mixObjectDigest(digest
,o
);
10602 } else if (o
->type
== REDIS_LIST
) {
10603 list
*list
= o
->ptr
;
10607 listRewind(list
,&li
);
10608 while((ln
= listNext(&li
))) {
10609 robj
*eleobj
= listNodeValue(ln
);
10611 mixObjectDigest(digest
,eleobj
);
10613 } else if (o
->type
== REDIS_SET
) {
10614 dict
*set
= o
->ptr
;
10615 dictIterator
*di
= dictGetIterator(set
);
10618 while((de
= dictNext(di
)) != NULL
) {
10619 robj
*eleobj
= dictGetEntryKey(de
);
10621 xorObjectDigest(digest
,eleobj
);
10623 dictReleaseIterator(di
);
10624 } else if (o
->type
== REDIS_ZSET
) {
10626 dictIterator
*di
= dictGetIterator(zs
->dict
);
10629 while((de
= dictNext(di
)) != NULL
) {
10630 robj
*eleobj
= dictGetEntryKey(de
);
10631 double *score
= dictGetEntryVal(de
);
10632 unsigned char eledigest
[20];
10634 snprintf(buf
,sizeof(buf
),"%.17g",*score
);
10635 memset(eledigest
,0,20);
10636 mixObjectDigest(eledigest
,eleobj
);
10637 mixDigest(eledigest
,buf
,strlen(buf
));
10638 xorDigest(digest
,eledigest
,20);
10640 dictReleaseIterator(di
);
10641 } else if (o
->type
== REDIS_HASH
) {
10645 hi
= hashInitIterator(o
);
10646 while (hashNext(hi
) != REDIS_ERR
) {
10647 unsigned char eledigest
[20];
10649 memset(eledigest
,0,20);
10650 obj
= hashCurrent(hi
,REDIS_HASH_KEY
);
10651 mixObjectDigest(eledigest
,obj
);
10653 obj
= hashCurrent(hi
,REDIS_HASH_VALUE
);
10654 mixObjectDigest(eledigest
,obj
);
10656 xorDigest(digest
,eledigest
,20);
10658 hashReleaseIterator(hi
);
10660 redisPanic("Unknown object type");
10662 /* If the key has an expire, add it to the mix */
10663 if (expiretime
!= -1) xorDigest(digest
,"!!expire!!",10);
10664 /* We can finally xor the key-val digest to the final digest */
10665 xorDigest(final
,digest
,20);
10667 dictReleaseIterator(di
);
10671 static void debugCommand(redisClient
*c
) {
10672 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
10673 *((char*)-1) = 'x';
10674 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
10675 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
10676 addReply(c
,shared
.err
);
10680 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
10681 addReply(c
,shared
.err
);
10684 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
10685 addReply(c
,shared
.ok
);
10686 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
10688 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
10689 addReply(c
,shared
.err
);
10692 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
10693 addReply(c
,shared
.ok
);
10694 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
10695 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
10699 addReply(c
,shared
.nokeyerr
);
10702 key
= dictGetEntryKey(de
);
10703 val
= dictGetEntryVal(de
);
10704 if (!server
.vm_enabled
|| (key
->storage
== REDIS_VM_MEMORY
||
10705 key
->storage
== REDIS_VM_SWAPPING
)) {
10709 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
10710 strenc
= strencoding
[val
->encoding
];
10712 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
10715 addReplySds(c
,sdscatprintf(sdsempty(),
10716 "+Key at:%p refcount:%d, value at:%p refcount:%d "
10717 "encoding:%s serializedlength:%lld\r\n",
10718 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
,
10719 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
10721 addReplySds(c
,sdscatprintf(sdsempty(),
10722 "+Key at:%p refcount:%d, value swapped at: page %llu "
10723 "using %llu pages\r\n",
10724 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
,
10725 (unsigned long long) key
->vm
.usedpages
));
10727 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapin") && c
->argc
== 3) {
10728 lookupKeyRead(c
->db
,c
->argv
[2]);
10729 addReply(c
,shared
.ok
);
10730 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
10731 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
10734 if (!server
.vm_enabled
) {
10735 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
10739 addReply(c
,shared
.nokeyerr
);
10742 key
= dictGetEntryKey(de
);
10743 val
= dictGetEntryVal(de
);
10744 /* If the key is shared we want to create a copy */
10745 if (key
->refcount
> 1) {
10746 robj
*newkey
= dupStringObject(key
);
10748 key
= dictGetEntryKey(de
) = newkey
;
10751 if (key
->storage
!= REDIS_VM_MEMORY
) {
10752 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
10753 } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
10754 dictGetEntryVal(de
) = NULL
;
10755 addReply(c
,shared
.ok
);
10757 addReply(c
,shared
.err
);
10759 } else if (!strcasecmp(c
->argv
[1]->ptr
,"populate") && c
->argc
== 3) {
10764 if (getLongFromObjectOrReply(c
, c
->argv
[2], &keys
, NULL
) != REDIS_OK
)
10766 for (j
= 0; j
< keys
; j
++) {
10767 snprintf(buf
,sizeof(buf
),"key:%lu",j
);
10768 key
= createStringObject(buf
,strlen(buf
));
10769 if (lookupKeyRead(c
->db
,key
) != NULL
) {
10773 snprintf(buf
,sizeof(buf
),"value:%lu",j
);
10774 val
= createStringObject(buf
,strlen(buf
));
10775 dictAdd(c
->db
->dict
,key
,val
);
10777 addReply(c
,shared
.ok
);
10778 } else if (!strcasecmp(c
->argv
[1]->ptr
,"digest") && c
->argc
== 2) {
10779 unsigned char digest
[20];
10780 sds d
= sdsnew("+");
10783 computeDatasetDigest(digest
);
10784 for (j
= 0; j
< 20; j
++)
10785 d
= sdscatprintf(d
, "%02x",digest
[j
]);
10787 d
= sdscatlen(d
,"\r\n",2);
10790 addReplySds(c
,sdsnew(
10791 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n"));
10795 static void _redisAssert(char *estr
, char *file
, int line
) {
10796 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
10797 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true",file
,line
,estr
);
10798 #ifdef HAVE_BACKTRACE
10799 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
10800 *((char*)-1) = 'x';
10804 static void _redisPanic(char *msg
, char *file
, int line
) {
10805 redisLog(REDIS_WARNING
,"!!! Software Failure. Press left mouse button to continue");
10806 redisLog(REDIS_WARNING
,"Guru Meditation: %s #%s:%d",msg
,file
,line
);
10807 #ifdef HAVE_BACKTRACE
10808 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
10809 *((char*)-1) = 'x';
10813 /* =================================== Main! ================================ */
10816 int linuxOvercommitMemoryValue(void) {
10817 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
10820 if (!fp
) return -1;
10821 if (fgets(buf
,64,fp
) == NULL
) {
10830 void linuxOvercommitMemoryWarning(void) {
10831 if (linuxOvercommitMemoryValue() == 0) {
10832 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
10835 #endif /* __linux__ */
10837 static void daemonize(void) {
10841 if (fork() != 0) exit(0); /* parent exits */
10842 setsid(); /* create a new session */
10844 /* Every output goes to /dev/null. If Redis is daemonized but
10845 * the 'logfile' is set to 'stdout' in the configuration file
10846 * it will not log at all. */
10847 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
10848 dup2(fd
, STDIN_FILENO
);
10849 dup2(fd
, STDOUT_FILENO
);
10850 dup2(fd
, STDERR_FILENO
);
10851 if (fd
> STDERR_FILENO
) close(fd
);
10853 /* Try to write the pid file */
10854 fp
= fopen(server
.pidfile
,"w");
10856 fprintf(fp
,"%d\n",getpid());
10861 static void version() {
10862 printf("Redis server version %s\n", REDIS_VERSION
);
10866 static void usage() {
10867 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
10868 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
10872 int main(int argc
, char **argv
) {
10875 initServerConfig();
10877 if (strcmp(argv
[1], "-v") == 0 ||
10878 strcmp(argv
[1], "--version") == 0) version();
10879 if (strcmp(argv
[1], "--help") == 0) usage();
10880 resetServerSaveParams();
10881 loadServerConfig(argv
[1]);
10882 } else if ((argc
> 2)) {
10885 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
10887 if (server
.daemonize
) daemonize();
10889 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
10891 linuxOvercommitMemoryWarning();
10893 start
= time(NULL
);
10894 if (server
.appendonly
) {
10895 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
10896 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
10898 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
10899 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
10901 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
10902 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
10904 aeDeleteEventLoop(server
.el
);
10908 /* ============================= Backtrace support ========================= */
10910 #ifdef HAVE_BACKTRACE
10911 static char *findFuncName(void *pointer
, unsigned long *offset
);
10913 static void *getMcontextEip(ucontext_t
*uc
) {
10914 #if defined(__FreeBSD__)
10915 return (void*) uc
->uc_mcontext
.mc_eip
;
10916 #elif defined(__dietlibc__)
10917 return (void*) uc
->uc_mcontext
.eip
;
10918 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
10920 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
10922 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
10924 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
10925 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
10926 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
10928 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
10930 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
10931 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
10932 #elif defined(__ia64__) /* Linux IA64 */
10933 return (void*) uc
->uc_mcontext
.sc_ip
;
10939 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
10941 char **messages
= NULL
;
10942 int i
, trace_size
= 0;
10943 unsigned long offset
=0;
10944 ucontext_t
*uc
= (ucontext_t
*) secret
;
10946 REDIS_NOTUSED(info
);
10948 redisLog(REDIS_WARNING
,
10949 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
10950 infostring
= genRedisInfoString();
10951 redisLog(REDIS_WARNING
, "%s",infostring
);
10952 /* It's not safe to sdsfree() the returned string under memory
10953 * corruption conditions. Let it leak as we are going to abort */
10955 trace_size
= backtrace(trace
, 100);
10956 /* overwrite sigaction with caller's address */
10957 if (getMcontextEip(uc
) != NULL
) {
10958 trace
[1] = getMcontextEip(uc
);
10960 messages
= backtrace_symbols(trace
, trace_size
);
10962 for (i
=1; i
<trace_size
; ++i
) {
10963 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
10965 p
= strchr(messages
[i
],'+');
10966 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
10967 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
10969 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
10972 /* free(messages); Don't call free() with possibly corrupted memory. */
10976 static void sigtermHandler(int sig
) {
10977 REDIS_NOTUSED(sig
);
10979 redisLog(REDIS_WARNING
,"SIGTERM received, scheduling shutting down...");
10980 server
.shutdown_asap
= 1;
10983 static void setupSigSegvAction(void) {
10984 struct sigaction act
;
10986 sigemptyset (&act
.sa_mask
);
10987 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
10988 * is used. Otherwise, sa_handler is used */
10989 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
10990 act
.sa_sigaction
= segvHandler
;
10991 sigaction (SIGSEGV
, &act
, NULL
);
10992 sigaction (SIGBUS
, &act
, NULL
);
10993 sigaction (SIGFPE
, &act
, NULL
);
10994 sigaction (SIGILL
, &act
, NULL
);
10995 sigaction (SIGBUS
, &act
, NULL
);
10997 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
;
10998 act
.sa_handler
= sigtermHandler
;
10999 sigaction (SIGTERM
, &act
, NULL
);
11003 #include "staticsymbols.h"
11004 /* This function try to convert a pointer into a function name. It's used in
11005 * oreder to provide a backtrace under segmentation fault that's able to
11006 * display functions declared as static (otherwise the backtrace is useless). */
11007 static char *findFuncName(void *pointer
, unsigned long *offset
){
11009 unsigned long off
, minoff
= 0;
11011 /* Try to match against the Symbol with the smallest offset */
11012 for (i
=0; symsTable
[i
].pointer
; i
++) {
11013 unsigned long lp
= (unsigned long) pointer
;
11015 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
11016 off
=lp
-symsTable
[i
].pointer
;
11017 if (ret
< 0 || off
< minoff
) {
11023 if (ret
== -1) return NULL
;
11025 return symsTable
[ret
].name
;
11027 #else /* HAVE_BACKTRACE */
11028 static void setupSigSegvAction(void) {
11030 #endif /* HAVE_BACKTRACE */