2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "2.1.1"
45 #endif /* HAVE_BACKTRACE */
53 #include <arpa/inet.h>
57 #include <sys/resource.h>
65 #include "solarisfixes.h"
69 #include "ae.h" /* Event driven programming library */
70 #include "sds.h" /* Dynamic safe strings */
71 #include "anet.h" /* Networking the easy way */
72 #include "dict.h" /* Hash tables */
73 #include "adlist.h" /* Linked lists */
74 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
75 #include "lzf.h" /* LZF compression library */
76 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
77 #include "zipmap.h" /* Compact dictionary-alike data structure */
78 #include "sha1.h" /* SHA1 is used for DEBUG DIGEST */
79 #include "release.h" /* Release and/or git repository information */
85 /* Static server configuration */
86 #define REDIS_SERVERPORT 6379 /* TCP port */
87 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
88 #define REDIS_IOBUF_LEN 1024
89 #define REDIS_LOADBUF_LEN 1024
90 #define REDIS_STATIC_ARGS 8
91 #define REDIS_DEFAULT_DBNUM 16
92 #define REDIS_CONFIGLINE_MAX 1024
93 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
94 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
95 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* lookup 10 expires per loop */
96 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
97 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
99 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
100 #define REDIS_WRITEV_THRESHOLD 3
101 /* Max number of iovecs used for each writev call */
102 #define REDIS_WRITEV_IOVEC_COUNT 256
104 /* Hash table parameters */
105 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
108 #define REDIS_CMD_BULK 1 /* Bulk write command */
109 #define REDIS_CMD_INLINE 2 /* Inline command */
110 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
111 this flags will return an error when the 'maxmemory' option is set in the
112 config file and the server is using more than maxmemory bytes of memory.
113 In short this commands are denied on low memory conditions. */
114 #define REDIS_CMD_DENYOOM 4
115 #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */
118 #define REDIS_STRING 0
124 /* Objects encoding. Some kind of objects like Strings and Hashes can be
125 * internally represented in multiple ways. The 'encoding' field of the object
126 * is set to one of this fields for this object. */
127 #define REDIS_ENCODING_RAW 0 /* Raw representation */
128 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
129 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
130 #define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
132 static char* strencoding
[] = {
133 "raw", "int", "zipmap", "hashtable"
136 /* Object types only used for dumping to disk */
137 #define REDIS_EXPIRETIME 253
138 #define REDIS_SELECTDB 254
139 #define REDIS_EOF 255
141 /* Defines related to the dump file format. To store 32 bits lengths for short
142 * keys requires a lot of space, so we check the most significant 2 bits of
143 * the first byte to interpreter the length:
145 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
146 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
147 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
148 * 11|000000 this means: specially encoded object will follow. The six bits
149 * number specify the kind of object that follows.
150 * See the REDIS_RDB_ENC_* defines.
152 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
153 * values, will fit inside. */
154 #define REDIS_RDB_6BITLEN 0
155 #define REDIS_RDB_14BITLEN 1
156 #define REDIS_RDB_32BITLEN 2
157 #define REDIS_RDB_ENCVAL 3
158 #define REDIS_RDB_LENERR UINT_MAX
160 /* When a length of a string object stored on disk has the first two bits
161 * set, the remaining two bits specify a special encoding for the object
162 * accordingly to the following defines: */
163 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
164 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
165 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
166 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
168 /* Virtual memory object->where field. */
169 #define REDIS_VM_MEMORY 0 /* The object is on memory */
170 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
171 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
172 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
174 /* Virtual memory static configuration stuff.
175 * Check vmFindContiguousPages() to know more about this magic numbers. */
176 #define REDIS_VM_MAX_NEAR_PAGES 65536
177 #define REDIS_VM_MAX_RANDOM_JUMP 4096
178 #define REDIS_VM_MAX_THREADS 32
179 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
180 /* The following is the *percentage* of completed I/O jobs to process when the
181 * handelr is called. While Virtual Memory I/O operations are performed by
182 * threads, this operations must be processed by the main thread when completed
183 * in order to take effect. */
184 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
187 #define REDIS_SLAVE 1 /* This client is a slave server */
188 #define REDIS_MASTER 2 /* This client is a master server */
189 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
190 #define REDIS_MULTI 8 /* This client is in a MULTI context */
191 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
192 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
193 #define REDIS_DIRTY_CAS 64 /* Watched keys modified. EXEC will fail. */
195 /* Slave replication state - slave side */
196 #define REDIS_REPL_NONE 0 /* No active replication */
197 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
198 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
200 /* Slave replication state - from the point of view of master
201 * Note that in SEND_BULK and ONLINE state the slave receives new updates
202 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
203 * to start the next background saving in order to send updates to it. */
204 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
205 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
206 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
207 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
209 /* List related stuff */
213 /* Sort operations */
214 #define REDIS_SORT_GET 0
215 #define REDIS_SORT_ASC 1
216 #define REDIS_SORT_DESC 2
217 #define REDIS_SORTKEY_MAX 1024
220 #define REDIS_DEBUG 0
221 #define REDIS_VERBOSE 1
222 #define REDIS_NOTICE 2
223 #define REDIS_WARNING 3
225 /* Anti-warning macro... */
226 #define REDIS_NOTUSED(V) ((void) V)
228 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
229 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
231 /* Append only defines */
232 #define APPENDFSYNC_NO 0
233 #define APPENDFSYNC_ALWAYS 1
234 #define APPENDFSYNC_EVERYSEC 2
236 /* Hashes related defaults */
237 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
238 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
240 /* We can print the stacktrace, so our assert is defined this way: */
241 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
242 #define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1)
243 static void _redisAssert(char *estr
, char *file
, int line
);
244 static void _redisPanic(char *msg
, char *file
, int line
);
246 /*================================= Data types ============================== */
248 /* A redis object, that is a type able to hold a string / list / set */
250 /* The VM object structure */
251 struct redisObjectVM
{
252 off_t page
; /* the page at witch the object is stored on disk */
253 off_t usedpages
; /* number of pages used on disk */
254 time_t atime
; /* Last access time */
257 /* The actual Redis Object */
258 typedef struct redisObject
{
261 unsigned char encoding
;
262 unsigned char storage
; /* If this object is a key, where is the value?
263 * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
264 unsigned char vtype
; /* If this object is a key, and value is swapped out,
265 * this is the type of the swapped out object. */
267 /* VM fields, this are only allocated if VM is active, otherwise the
268 * object allocation function will just allocate
269 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
270 * Redis without VM active will not have any overhead. */
271 struct redisObjectVM vm
;
274 /* Macro used to initalize a Redis object allocated on the stack.
275 * Note that this macro is taken near the structure definition to make sure
276 * we'll update it when the structure is changed, to avoid bugs like
277 * bug #85 introduced exactly in this way. */
278 #define initStaticStringObject(_var,_ptr) do { \
280 _var.type = REDIS_STRING; \
281 _var.encoding = REDIS_ENCODING_RAW; \
283 if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \
286 typedef struct redisDb
{
287 dict
*dict
; /* The keyspace for this DB */
288 dict
*expires
; /* Timeout of keys with a timeout set */
289 dict
*blocking_keys
; /* Keys with clients waiting for data (BLPOP) */
290 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
291 dict
*watched_keys
; /* WATCHED keys for MULTI/EXEC CAS */
295 /* Client MULTI/EXEC state */
296 typedef struct multiCmd
{
299 struct redisCommand
*cmd
;
302 typedef struct multiState
{
303 multiCmd
*commands
; /* Array of MULTI commands */
304 int count
; /* Total number of MULTI commands */
307 /* With multiplexing we need to take per-clinet state.
308 * Clients are taken in a liked list. */
309 typedef struct redisClient
{
314 robj
**argv
, **mbargv
;
316 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
317 int multibulk
; /* multi bulk command format active */
320 time_t lastinteraction
; /* time of the last interaction, used for timeout */
321 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
322 int slaveseldb
; /* slave selected db, if this client is a slave */
323 int authenticated
; /* when requirepass is non-NULL */
324 int replstate
; /* replication state if this is a slave */
325 int repldbfd
; /* replication DB file descriptor */
326 long repldboff
; /* replication DB file offset */
327 off_t repldbsize
; /* replication DB file size */
328 multiState mstate
; /* MULTI/EXEC state */
329 robj
**blocking_keys
; /* The key we are waiting to terminate a blocking
330 * operation such as BLPOP. Otherwise NULL. */
331 int blocking_keys_num
; /* Number of blocking keys */
332 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
333 * is >= blockingto then the operation timed out. */
334 list
*io_keys
; /* Keys this client is waiting to be loaded from the
335 * swap file in order to continue. */
336 list
*watched_keys
; /* Keys WATCHED for MULTI/EXEC CAS */
337 dict
*pubsub_channels
; /* channels a client is interested in (SUBSCRIBE) */
338 list
*pubsub_patterns
; /* patterns a client is interested in (SUBSCRIBE) */
346 /* Global server state structure */
351 long long dirty
; /* changes to DB from the last save */
353 list
*slaves
, *monitors
;
354 char neterr
[ANET_ERR_LEN
];
356 int cronloops
; /* number of times the cron function run */
357 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
358 time_t lastsave
; /* Unix time of last save succeeede */
359 /* Fields used only for stats */
360 time_t stat_starttime
; /* server start time */
361 long long stat_numcommands
; /* number of processed commands */
362 long long stat_numconnections
; /* number of connections received */
363 long long stat_expiredkeys
; /* number of expired keys */
377 pid_t bgsavechildpid
;
378 pid_t bgrewritechildpid
;
379 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
380 sds aofbuf
; /* AOF buffer, written before entering the event loop */
381 struct saveparam
*saveparams
;
386 char *appendfilename
;
390 /* Replication related */
395 redisClient
*master
; /* client that is master for this slave */
397 unsigned int maxclients
;
398 unsigned long long maxmemory
;
399 unsigned int blpop_blocked_clients
;
400 unsigned int vm_blocked_clients
;
401 /* Sort parameters - qsort_r() is only available under BSD so we
402 * have to take this state global, in order to pass it to sortCompare() */
406 /* Virtual memory configuration */
411 unsigned long long vm_max_memory
;
413 size_t hash_max_zipmap_entries
;
414 size_t hash_max_zipmap_value
;
415 /* Virtual memory state */
418 off_t vm_next_page
; /* Next probably empty page */
419 off_t vm_near_pages
; /* Number of pages allocated sequentially */
420 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
421 time_t unixtime
; /* Unix time sampled every second. */
422 /* Virtual memory I/O threads stuff */
423 /* An I/O thread process an element taken from the io_jobs queue and
424 * put the result of the operation in the io_done list. While the
425 * job is being processed, it's put on io_processing queue. */
426 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
427 list
*io_processing
; /* List of VM I/O jobs being processed */
428 list
*io_processed
; /* List of VM I/O jobs already processed */
429 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
430 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
431 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
432 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
433 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
434 int io_active_threads
; /* Number of running I/O threads */
435 int vm_max_threads
; /* Max number of I/O threads running at the same time */
436 /* Our main thread is blocked on the event loop, locking for sockets ready
437 * to be read or written, so when a threaded I/O operation is ready to be
438 * processed by the main thread, the I/O thread will use a unix pipe to
439 * awake the main thread. The followings are the two pipe FDs. */
440 int io_ready_pipe_read
;
441 int io_ready_pipe_write
;
442 /* Virtual memory stats */
443 unsigned long long vm_stats_used_pages
;
444 unsigned long long vm_stats_swapped_objects
;
445 unsigned long long vm_stats_swapouts
;
446 unsigned long long vm_stats_swapins
;
448 dict
*pubsub_channels
; /* Map channels to list of subscribed clients */
449 list
*pubsub_patterns
; /* A list of pubsub_patterns */
454 typedef struct pubsubPattern
{
459 typedef void redisCommandProc(redisClient
*c
);
460 typedef void redisVmPreloadProc(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
461 struct redisCommand
{
463 redisCommandProc
*proc
;
466 /* Use a function to determine which keys need to be loaded
467 * in the background prior to executing this command. Takes precedence
468 * over vm_firstkey and others, ignored when NULL */
469 redisVmPreloadProc
*vm_preload_proc
;
470 /* What keys should be loaded in background when calling this command? */
471 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
472 int vm_lastkey
; /* THe last argument that's a key */
473 int vm_keystep
; /* The step between first and last key */
476 struct redisFunctionSym
{
478 unsigned long pointer
;
481 typedef struct _redisSortObject
{
489 typedef struct _redisSortOperation
{
492 } redisSortOperation
;
494 /* ZSETs use a specialized version of Skiplists */
496 typedef struct zskiplistNode
{
497 struct zskiplistNode
**forward
;
498 struct zskiplistNode
*backward
;
504 typedef struct zskiplist
{
505 struct zskiplistNode
*header
, *tail
;
506 unsigned long length
;
510 typedef struct zset
{
515 /* Our shared "common" objects */
517 #define REDIS_SHARED_INTEGERS 10000
518 struct sharedObjectsStruct
{
519 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
520 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
521 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
522 *outofrangeerr
, *plus
,
523 *select0
, *select1
, *select2
, *select3
, *select4
,
524 *select5
, *select6
, *select7
, *select8
, *select9
,
525 *messagebulk
, *pmessagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
,
526 *mbulk4
, *psubscribebulk
, *punsubscribebulk
,
527 *integers
[REDIS_SHARED_INTEGERS
];
530 /* Global vars that are actally used as constants. The following double
531 * values are used for double on-disk serialization, and are initialized
532 * at runtime to avoid strange compiler optimizations. */
534 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
536 /* VM threaded I/O request message */
537 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
538 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
539 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
540 typedef struct iojob
{
541 int type
; /* Request type, REDIS_IOJOB_* */
542 redisDb
*db
;/* Redis database */
543 robj
*key
; /* This I/O request is about swapping this key */
544 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
545 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
546 off_t page
; /* Swap page where to read/write the object */
547 off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */
548 int canceled
; /* True if this command was canceled by blocking side of VM */
549 pthread_t thread
; /* ID of the thread processing this entry */
552 /*================================ Prototypes =============================== */
554 static void freeStringObject(robj
*o
);
555 static void freeListObject(robj
*o
);
556 static void freeSetObject(robj
*o
);
557 static void decrRefCount(void *o
);
558 static robj
*createObject(int type
, void *ptr
);
559 static void freeClient(redisClient
*c
);
560 static int rdbLoad(char *filename
);
561 static void addReply(redisClient
*c
, robj
*obj
);
562 static void addReplySds(redisClient
*c
, sds s
);
563 static void incrRefCount(robj
*o
);
564 static int rdbSaveBackground(char *filename
);
565 static robj
*createStringObject(char *ptr
, size_t len
);
566 static robj
*dupStringObject(robj
*o
);
567 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
);
568 static void replicationFeedMonitors(list
*monitors
, int dictid
, robj
**argv
, int argc
);
569 static void flushAppendOnlyFile(void);
570 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
571 static int syncWithMaster(void);
572 static robj
*tryObjectEncoding(robj
*o
);
573 static robj
*getDecodedObject(robj
*o
);
574 static int removeExpire(redisDb
*db
, robj
*key
);
575 static int expireIfNeeded(redisDb
*db
, robj
*key
);
576 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
577 static int deleteIfSwapped(redisDb
*db
, robj
*key
);
578 static int deleteKey(redisDb
*db
, robj
*key
);
579 static time_t getExpire(redisDb
*db
, robj
*key
);
580 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
581 static void updateSlavesWaitingBgsave(int bgsaveerr
);
582 static void freeMemoryIfNeeded(void);
583 static int processCommand(redisClient
*c
);
584 static void setupSigSegvAction(void);
585 static void rdbRemoveTempFile(pid_t childpid
);
586 static void aofRemoveTempFile(pid_t childpid
);
587 static size_t stringObjectLen(robj
*o
);
588 static void processInputBuffer(redisClient
*c
);
589 static zskiplist
*zslCreate(void);
590 static void zslFree(zskiplist
*zsl
);
591 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
592 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
593 static void initClientMultiState(redisClient
*c
);
594 static void freeClientMultiState(redisClient
*c
);
595 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
596 static void unblockClientWaitingData(redisClient
*c
);
597 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
598 static void vmInit(void);
599 static void vmMarkPagesFree(off_t page
, off_t count
);
600 static robj
*vmLoadObject(robj
*key
);
601 static robj
*vmPreviewObject(robj
*key
);
602 static int vmSwapOneObjectBlocking(void);
603 static int vmSwapOneObjectThreaded(void);
604 static int vmCanSwapOut(void);
605 static int tryFreeOneObjectFromFreelist(void);
606 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
607 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
608 static void vmCancelThreadedIOJob(robj
*o
);
609 static void lockThreadedIO(void);
610 static void unlockThreadedIO(void);
611 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
612 static void freeIOJob(iojob
*j
);
613 static void queueIOJob(iojob
*j
);
614 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
615 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
616 static void waitEmptyIOJobsQueue(void);
617 static void vmReopenSwapFile(void);
618 static int vmFreePage(off_t page
);
619 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
620 static void execBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
621 static int blockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
);
622 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
623 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
624 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
625 static struct redisCommand
*lookupCommand(char *name
);
626 static void call(redisClient
*c
, struct redisCommand
*cmd
);
627 static void resetClient(redisClient
*c
);
628 static void convertToRealHash(robj
*o
);
629 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
);
630 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
);
631 static void freePubsubPattern(void *p
);
632 static int listMatchPubsubPattern(void *a
, void *b
);
633 static int compareStringObjects(robj
*a
, robj
*b
);
634 static int equalStringObjects(robj
*a
, robj
*b
);
636 static int rewriteAppendOnlyFileBackground(void);
637 static int vmSwapObjectBlocking(robj
*key
, robj
*val
);
638 static int prepareForShutdown();
639 static void touchWatchedKey(redisDb
*db
, robj
*key
);
640 static void touchWatchedKeysOnFlush(int dbid
);
641 static void unwatchAllKeys(redisClient
*c
);
643 static void authCommand(redisClient
*c
);
644 static void pingCommand(redisClient
*c
);
645 static void echoCommand(redisClient
*c
);
646 static void setCommand(redisClient
*c
);
647 static void setnxCommand(redisClient
*c
);
648 static void setexCommand(redisClient
*c
);
649 static void getCommand(redisClient
*c
);
650 static void delCommand(redisClient
*c
);
651 static void existsCommand(redisClient
*c
);
652 static void incrCommand(redisClient
*c
);
653 static void decrCommand(redisClient
*c
);
654 static void incrbyCommand(redisClient
*c
);
655 static void decrbyCommand(redisClient
*c
);
656 static void selectCommand(redisClient
*c
);
657 static void randomkeyCommand(redisClient
*c
);
658 static void keysCommand(redisClient
*c
);
659 static void dbsizeCommand(redisClient
*c
);
660 static void lastsaveCommand(redisClient
*c
);
661 static void saveCommand(redisClient
*c
);
662 static void bgsaveCommand(redisClient
*c
);
663 static void bgrewriteaofCommand(redisClient
*c
);
664 static void shutdownCommand(redisClient
*c
);
665 static void moveCommand(redisClient
*c
);
666 static void renameCommand(redisClient
*c
);
667 static void renamenxCommand(redisClient
*c
);
668 static void lpushCommand(redisClient
*c
);
669 static void rpushCommand(redisClient
*c
);
670 static void lpopCommand(redisClient
*c
);
671 static void rpopCommand(redisClient
*c
);
672 static void llenCommand(redisClient
*c
);
673 static void lindexCommand(redisClient
*c
);
674 static void lrangeCommand(redisClient
*c
);
675 static void ltrimCommand(redisClient
*c
);
676 static void typeCommand(redisClient
*c
);
677 static void lsetCommand(redisClient
*c
);
678 static void saddCommand(redisClient
*c
);
679 static void sremCommand(redisClient
*c
);
680 static void smoveCommand(redisClient
*c
);
681 static void sismemberCommand(redisClient
*c
);
682 static void scardCommand(redisClient
*c
);
683 static void spopCommand(redisClient
*c
);
684 static void srandmemberCommand(redisClient
*c
);
685 static void sinterCommand(redisClient
*c
);
686 static void sinterstoreCommand(redisClient
*c
);
687 static void sunionCommand(redisClient
*c
);
688 static void sunionstoreCommand(redisClient
*c
);
689 static void sdiffCommand(redisClient
*c
);
690 static void sdiffstoreCommand(redisClient
*c
);
691 static void syncCommand(redisClient
*c
);
692 static void flushdbCommand(redisClient
*c
);
693 static void flushallCommand(redisClient
*c
);
694 static void sortCommand(redisClient
*c
);
695 static void lremCommand(redisClient
*c
);
696 static void rpoplpushcommand(redisClient
*c
);
697 static void infoCommand(redisClient
*c
);
698 static void mgetCommand(redisClient
*c
);
699 static void monitorCommand(redisClient
*c
);
700 static void expireCommand(redisClient
*c
);
701 static void expireatCommand(redisClient
*c
);
702 static void getsetCommand(redisClient
*c
);
703 static void ttlCommand(redisClient
*c
);
704 static void slaveofCommand(redisClient
*c
);
705 static void debugCommand(redisClient
*c
);
706 static void msetCommand(redisClient
*c
);
707 static void msetnxCommand(redisClient
*c
);
708 static void zaddCommand(redisClient
*c
);
709 static void zincrbyCommand(redisClient
*c
);
710 static void zrangeCommand(redisClient
*c
);
711 static void zrangebyscoreCommand(redisClient
*c
);
712 static void zcountCommand(redisClient
*c
);
713 static void zrevrangeCommand(redisClient
*c
);
714 static void zcardCommand(redisClient
*c
);
715 static void zremCommand(redisClient
*c
);
716 static void zscoreCommand(redisClient
*c
);
717 static void zremrangebyscoreCommand(redisClient
*c
);
718 static void multiCommand(redisClient
*c
);
719 static void execCommand(redisClient
*c
);
720 static void discardCommand(redisClient
*c
);
721 static void blpopCommand(redisClient
*c
);
722 static void brpopCommand(redisClient
*c
);
723 static void appendCommand(redisClient
*c
);
724 static void substrCommand(redisClient
*c
);
725 static void zrankCommand(redisClient
*c
);
726 static void zrevrankCommand(redisClient
*c
);
727 static void hsetCommand(redisClient
*c
);
728 static void hsetnxCommand(redisClient
*c
);
729 static void hgetCommand(redisClient
*c
);
730 static void hmsetCommand(redisClient
*c
);
731 static void hmgetCommand(redisClient
*c
);
732 static void hdelCommand(redisClient
*c
);
733 static void hlenCommand(redisClient
*c
);
734 static void zremrangebyrankCommand(redisClient
*c
);
735 static void zunionstoreCommand(redisClient
*c
);
736 static void zinterstoreCommand(redisClient
*c
);
737 static void hkeysCommand(redisClient
*c
);
738 static void hvalsCommand(redisClient
*c
);
739 static void hgetallCommand(redisClient
*c
);
740 static void hexistsCommand(redisClient
*c
);
741 static void configCommand(redisClient
*c
);
742 static void hincrbyCommand(redisClient
*c
);
743 static void subscribeCommand(redisClient
*c
);
744 static void unsubscribeCommand(redisClient
*c
);
745 static void psubscribeCommand(redisClient
*c
);
746 static void punsubscribeCommand(redisClient
*c
);
747 static void publishCommand(redisClient
*c
);
748 static void watchCommand(redisClient
*c
);
749 static void unwatchCommand(redisClient
*c
);
751 /*================================= Globals ================================= */
754 static struct redisServer server
; /* server global state */
755 static struct redisCommand
*commandTable
;
756 static unsigned int commandTableSize
;
757 static struct redisCommand readonlyCommandTable
[] = {
758 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
759 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
760 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
761 {"setex",setexCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
762 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
763 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
764 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
765 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
766 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
767 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
768 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
769 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
770 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
771 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
772 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
773 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
774 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
775 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
776 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
777 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
778 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
779 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
780 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
781 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
782 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
783 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
784 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
785 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
786 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
787 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
788 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
789 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
790 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
791 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
792 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
793 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
794 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
795 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
796 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
797 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
798 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
799 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
800 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
801 {"zunionstore",zunionstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
802 {"zinterstore",zinterstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
803 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
804 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
805 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
806 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
807 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
808 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
809 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
810 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
811 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
812 {"hsetnx",hsetnxCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
813 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
814 {"hmset",hmsetCommand
,-4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
815 {"hmget",hmgetCommand
,-3,REDIS_CMD_BULK
,NULL
,1,1,1},
816 {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
817 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
818 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
819 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
820 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
821 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
822 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
823 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
824 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
825 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
826 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
827 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
828 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
829 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
830 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
831 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
832 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
833 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
834 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
835 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
836 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
837 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
838 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
839 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
840 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
841 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
842 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
843 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
844 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
845 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
846 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
847 {"exec",execCommand
,1,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,execBlockClientOnSwappedKeys
,0,0,0},
848 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
849 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
850 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
851 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
852 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
853 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
854 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
855 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
856 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
857 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
858 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
859 {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
860 {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
861 {"psubscribe",psubscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
862 {"punsubscribe",punsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
863 {"publish",publishCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_FORCE_REPLICATION
,NULL
,0,0,0},
864 {"watch",watchCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
865 {"unwatch",unwatchCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
866 {NULL
,NULL
,0,0,NULL
,0,0,0}
869 /*============================ Utility functions ============================ */
871 /* Glob-style pattern matching. */
872 static int stringmatchlen(const char *pattern
, int patternLen
,
873 const char *string
, int stringLen
, int nocase
)
878 while (pattern
[1] == '*') {
883 return 1; /* match */
885 if (stringmatchlen(pattern
+1, patternLen
-1,
886 string
, stringLen
, nocase
))
887 return 1; /* match */
891 return 0; /* no match */
895 return 0; /* no match */
905 not = pattern
[0] == '^';
912 if (pattern
[0] == '\\') {
915 if (pattern
[0] == string
[0])
917 } else if (pattern
[0] == ']') {
919 } else if (patternLen
== 0) {
923 } else if (pattern
[1] == '-' && patternLen
>= 3) {
924 int start
= pattern
[0];
925 int end
= pattern
[2];
933 start
= tolower(start
);
939 if (c
>= start
&& c
<= end
)
943 if (pattern
[0] == string
[0])
946 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
956 return 0; /* no match */
962 if (patternLen
>= 2) {
969 if (pattern
[0] != string
[0])
970 return 0; /* no match */
972 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
973 return 0; /* no match */
981 if (stringLen
== 0) {
982 while(*pattern
== '*') {
989 if (patternLen
== 0 && stringLen
== 0)
994 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
995 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
998 /* Convert a string representing an amount of memory into the number of
999 * bytes, so for instance memtoll("1Gi") will return 1073741824 that is
1002 * On parsing error, if *err is not NULL, it's set to 1, otherwise it's
1004 static long long memtoll(const char *p
, int *err
) {
1007 long mul
; /* unit multiplier */
1009 unsigned int digits
;
1012 /* Search the first non digit character. */
1015 while(*u
&& isdigit(*u
)) u
++;
1016 if (*u
== '\0' || !strcasecmp(u
,"b")) {
1018 } else if (!strcasecmp(u
,"k")) {
1020 } else if (!strcasecmp(u
,"kb")) {
1022 } else if (!strcasecmp(u
,"m")) {
1024 } else if (!strcasecmp(u
,"mb")) {
1026 } else if (!strcasecmp(u
,"g")) {
1027 mul
= 1000L*1000*1000;
1028 } else if (!strcasecmp(u
,"gb")) {
1029 mul
= 1024L*1024*1024;
1035 if (digits
>= sizeof(buf
)) {
1039 memcpy(buf
,p
,digits
);
1041 val
= strtoll(buf
,NULL
,10);
1045 /* Convert a long long into a string. Returns the number of
1046 * characters needed to represent the number, that can be shorter if passed
1047 * buffer length is not enough to store the whole number. */
1048 static int ll2string(char *s
, size_t len
, long long value
) {
1050 unsigned long long v
;
1053 if (len
== 0) return 0;
1054 v
= (value
< 0) ? -value
: value
;
1055 p
= buf
+31; /* point to the last character */
1060 if (value
< 0) *p
-- = '-';
1063 if (l
+1 > len
) l
= len
-1; /* Make sure it fits, including the nul term */
1069 static void redisLog(int level
, const char *fmt
, ...) {
1073 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
1077 if (level
>= server
.verbosity
) {
1083 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
1084 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
1085 vfprintf(fp
, fmt
, ap
);
1091 if (server
.logfile
) fclose(fp
);
1094 /*====================== Hash table type implementation ==================== */
1096 /* This is an hash table type that uses the SDS dynamic strings libary as
1097 * keys and radis objects as values (objects can hold SDS strings,
1100 static void dictVanillaFree(void *privdata
, void *val
)
1102 DICT_NOTUSED(privdata
);
1106 static void dictListDestructor(void *privdata
, void *val
)
1108 DICT_NOTUSED(privdata
);
1109 listRelease((list
*)val
);
1112 static int sdsDictKeyCompare(void *privdata
, const void *key1
,
1116 DICT_NOTUSED(privdata
);
1118 l1
= sdslen((sds
)key1
);
1119 l2
= sdslen((sds
)key2
);
1120 if (l1
!= l2
) return 0;
1121 return memcmp(key1
, key2
, l1
) == 0;
1124 static void dictRedisObjectDestructor(void *privdata
, void *val
)
1126 DICT_NOTUSED(privdata
);
1128 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
1132 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1135 const robj
*o1
= key1
, *o2
= key2
;
1136 return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1139 static unsigned int dictObjHash(const void *key
) {
1140 const robj
*o
= key
;
1141 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1144 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1147 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1150 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1151 o2
->encoding
== REDIS_ENCODING_INT
)
1152 return o1
->ptr
== o2
->ptr
;
1154 o1
= getDecodedObject(o1
);
1155 o2
= getDecodedObject(o2
);
1156 cmp
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1162 static unsigned int dictEncObjHash(const void *key
) {
1163 robj
*o
= (robj
*) key
;
1165 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1166 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1168 if (o
->encoding
== REDIS_ENCODING_INT
) {
1172 len
= ll2string(buf
,32,(long)o
->ptr
);
1173 return dictGenHashFunction((unsigned char*)buf
, len
);
1177 o
= getDecodedObject(o
);
1178 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1185 /* Sets type and expires */
1186 static dictType setDictType
= {
1187 dictEncObjHash
, /* hash function */
1190 dictEncObjKeyCompare
, /* key compare */
1191 dictRedisObjectDestructor
, /* key destructor */
1192 NULL
/* val destructor */
1195 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1196 static dictType zsetDictType
= {
1197 dictEncObjHash
, /* hash function */
1200 dictEncObjKeyCompare
, /* key compare */
1201 dictRedisObjectDestructor
, /* key destructor */
1202 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1206 static dictType dbDictType
= {
1207 dictObjHash
, /* hash function */
1210 dictObjKeyCompare
, /* key compare */
1211 dictRedisObjectDestructor
, /* key destructor */
1212 dictRedisObjectDestructor
/* val destructor */
1216 static dictType keyptrDictType
= {
1217 dictObjHash
, /* hash function */
1220 dictObjKeyCompare
, /* key compare */
1221 dictRedisObjectDestructor
, /* key destructor */
1222 NULL
/* val destructor */
1225 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1226 static dictType hashDictType
= {
1227 dictEncObjHash
, /* hash function */
1230 dictEncObjKeyCompare
, /* key compare */
1231 dictRedisObjectDestructor
, /* key destructor */
1232 dictRedisObjectDestructor
/* val destructor */
1235 /* Keylist hash table type has unencoded redis objects as keys and
1236 * lists as values. It's used for blocking operations (BLPOP) and to
1237 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1238 static dictType keylistDictType
= {
1239 dictObjHash
, /* hash function */
1242 dictObjKeyCompare
, /* key compare */
1243 dictRedisObjectDestructor
, /* key destructor */
1244 dictListDestructor
/* val destructor */
1247 static void version();
1249 /* ========================= Random utility functions ======================= */
1251 /* Redis generally does not try to recover from out of memory conditions
1252 * when allocating objects or strings, it is not clear if it will be possible
1253 * to report this condition to the client since the networking layer itself
1254 * is based on heap allocation for send buffers, so we simply abort.
1255 * At least the code will be simpler to read... */
1256 static void oom(const char *msg
) {
1257 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1262 /* ====================== Redis server networking stuff ===================== */
1263 static void closeTimedoutClients(void) {
1266 time_t now
= time(NULL
);
1269 listRewind(server
.clients
,&li
);
1270 while ((ln
= listNext(&li
)) != NULL
) {
1271 c
= listNodeValue(ln
);
1272 if (server
.maxidletime
&&
1273 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1274 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1275 dictSize(c
->pubsub_channels
) == 0 && /* no timeout for pubsub */
1276 listLength(c
->pubsub_patterns
) == 0 &&
1277 (now
- c
->lastinteraction
> server
.maxidletime
))
1279 redisLog(REDIS_VERBOSE
,"Closing idle client");
1281 } else if (c
->flags
& REDIS_BLOCKED
) {
1282 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1283 addReply(c
,shared
.nullmultibulk
);
1284 unblockClientWaitingData(c
);
1290 static int htNeedsResize(dict
*dict
) {
1291 long long size
, used
;
1293 size
= dictSlots(dict
);
1294 used
= dictSize(dict
);
1295 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1296 (used
*100/size
< REDIS_HT_MINFILL
));
1299 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1300 * we resize the hash table to save memory */
1301 static void tryResizeHashTables(void) {
1304 for (j
= 0; j
< server
.dbnum
; j
++) {
1305 if (htNeedsResize(server
.db
[j
].dict
))
1306 dictResize(server
.db
[j
].dict
);
1307 if (htNeedsResize(server
.db
[j
].expires
))
1308 dictResize(server
.db
[j
].expires
);
1312 /* Our hash table implementation performs rehashing incrementally while
1313 * we write/read from the hash table. Still if the server is idle, the hash
1314 * table will use two tables for a long time. So we try to use 1 millisecond
1315 * of CPU time at every serverCron() loop in order to rehash some key. */
1316 static void incrementallyRehash(void) {
1319 for (j
= 0; j
< server
.dbnum
; j
++) {
1320 if (dictIsRehashing(server
.db
[j
].dict
)) {
1321 dictRehashMilliseconds(server
.db
[j
].dict
,1);
1322 break; /* already used our millisecond for this loop... */
1327 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1328 void backgroundSaveDoneHandler(int statloc
) {
1329 int exitcode
= WEXITSTATUS(statloc
);
1330 int bysignal
= WIFSIGNALED(statloc
);
1332 if (!bysignal
&& exitcode
== 0) {
1333 redisLog(REDIS_NOTICE
,
1334 "Background saving terminated with success");
1336 server
.lastsave
= time(NULL
);
1337 } else if (!bysignal
&& exitcode
!= 0) {
1338 redisLog(REDIS_WARNING
, "Background saving error");
1340 redisLog(REDIS_WARNING
,
1341 "Background saving terminated by signal %d", WTERMSIG(statloc
));
1342 rdbRemoveTempFile(server
.bgsavechildpid
);
1344 server
.bgsavechildpid
= -1;
1345 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1346 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1347 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1350 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1352 void backgroundRewriteDoneHandler(int statloc
) {
1353 int exitcode
= WEXITSTATUS(statloc
);
1354 int bysignal
= WIFSIGNALED(statloc
);
1356 if (!bysignal
&& exitcode
== 0) {
1360 redisLog(REDIS_NOTICE
,
1361 "Background append only file rewriting terminated with success");
1362 /* Now it's time to flush the differences accumulated by the parent */
1363 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1364 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1366 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1369 /* Flush our data... */
1370 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1371 (signed) sdslen(server
.bgrewritebuf
)) {
1372 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1376 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1377 /* Now our work is to rename the temp file into the stable file. And
1378 * switch the file descriptor used by the server for append only. */
1379 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1380 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1384 /* Mission completed... almost */
1385 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1386 if (server
.appendfd
!= -1) {
1387 /* If append only is actually enabled... */
1388 close(server
.appendfd
);
1389 server
.appendfd
= fd
;
1391 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1392 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1394 /* If append only is disabled we just generate a dump in this
1395 * format. Why not? */
1398 } else if (!bysignal
&& exitcode
!= 0) {
1399 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1401 redisLog(REDIS_WARNING
,
1402 "Background append only file rewriting terminated by signal %d",
1406 sdsfree(server
.bgrewritebuf
);
1407 server
.bgrewritebuf
= sdsempty();
1408 aofRemoveTempFile(server
.bgrewritechildpid
);
1409 server
.bgrewritechildpid
= -1;
1412 /* This function is called once a background process of some kind terminates,
1413 * as we want to avoid resizing the hash tables when there is a child in order
1414 * to play well with copy-on-write (otherwise when a resize happens lots of
1415 * memory pages are copied). The goal of this function is to update the ability
1416 * for dict.c to resize the hash tables accordingly to the fact we have o not
1417 * running childs. */
1418 static void updateDictResizePolicy(void) {
1419 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1)
1422 dictDisableResize();
1425 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1426 int j
, loops
= server
.cronloops
++;
1427 REDIS_NOTUSED(eventLoop
);
1429 REDIS_NOTUSED(clientData
);
1431 /* We take a cached value of the unix time in the global state because
1432 * with virtual memory and aging there is to store the current time
1433 * in objects at every object access, and accuracy is not needed.
1434 * To access a global var is faster than calling time(NULL) */
1435 server
.unixtime
= time(NULL
);
1437 /* We received a SIGTERM, shutting down here in a safe way, as it is
1438 * not ok doing so inside the signal handler. */
1439 if (server
.shutdown_asap
) {
1440 if (prepareForShutdown() == REDIS_OK
) exit(0);
1441 redisLog(REDIS_WARNING
,"SIGTERM received but errors trying to shut down the server, check the logs for more information");
1444 /* Show some info about non-empty databases */
1445 for (j
= 0; j
< server
.dbnum
; j
++) {
1446 long long size
, used
, vkeys
;
1448 size
= dictSlots(server
.db
[j
].dict
);
1449 used
= dictSize(server
.db
[j
].dict
);
1450 vkeys
= dictSize(server
.db
[j
].expires
);
1451 if (!(loops
% 50) && (used
|| vkeys
)) {
1452 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1453 /* dictPrintStats(server.dict); */
1457 /* We don't want to resize the hash tables while a bacground saving
1458 * is in progress: the saving child is created using fork() that is
1459 * implemented with a copy-on-write semantic in most modern systems, so
1460 * if we resize the HT while there is the saving child at work actually
1461 * a lot of memory movements in the parent will cause a lot of pages
1463 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1) {
1464 if (!(loops
% 10)) tryResizeHashTables();
1465 if (server
.activerehashing
) incrementallyRehash();
1468 /* Show information about connected clients */
1469 if (!(loops
% 50)) {
1470 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use",
1471 listLength(server
.clients
)-listLength(server
.slaves
),
1472 listLength(server
.slaves
),
1473 zmalloc_used_memory());
1476 /* Close connections of timedout clients */
1477 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1478 closeTimedoutClients();
1480 /* Check if a background saving or AOF rewrite in progress terminated */
1481 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1485 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1486 if (pid
== server
.bgsavechildpid
) {
1487 backgroundSaveDoneHandler(statloc
);
1489 backgroundRewriteDoneHandler(statloc
);
1491 updateDictResizePolicy();
1494 /* If there is not a background saving in progress check if
1495 * we have to save now */
1496 time_t now
= time(NULL
);
1497 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1498 struct saveparam
*sp
= server
.saveparams
+j
;
1500 if (server
.dirty
>= sp
->changes
&&
1501 now
-server
.lastsave
> sp
->seconds
) {
1502 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1503 sp
->changes
, sp
->seconds
);
1504 rdbSaveBackground(server
.dbfilename
);
1510 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1511 * will use few CPU cycles if there are few expiring keys, otherwise
1512 * it will get more aggressive to avoid that too much memory is used by
1513 * keys that can be removed from the keyspace. */
1514 for (j
= 0; j
< server
.dbnum
; j
++) {
1516 redisDb
*db
= server
.db
+j
;
1518 /* Continue to expire if at the end of the cycle more than 25%
1519 * of the keys were expired. */
1521 long num
= dictSize(db
->expires
);
1522 time_t now
= time(NULL
);
1525 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1526 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1531 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1532 t
= (time_t) dictGetEntryVal(de
);
1534 deleteKey(db
,dictGetEntryKey(de
));
1536 server
.stat_expiredkeys
++;
1539 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1542 /* Swap a few keys on disk if we are over the memory limit and VM
1543 * is enbled. Try to free objects from the free list first. */
1544 if (vmCanSwapOut()) {
1545 while (server
.vm_enabled
&& zmalloc_used_memory() >
1546 server
.vm_max_memory
)
1550 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1551 retval
= (server
.vm_max_threads
== 0) ?
1552 vmSwapOneObjectBlocking() :
1553 vmSwapOneObjectThreaded();
1554 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1555 zmalloc_used_memory() >
1556 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1558 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1560 /* Note that when using threade I/O we free just one object,
1561 * because anyway when the I/O thread in charge to swap this
1562 * object out will finish, the handler of completed jobs
1563 * will try to swap more objects if we are still out of memory. */
1564 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1568 /* Check if we should connect to a MASTER */
1569 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1570 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1571 if (syncWithMaster() == REDIS_OK
) {
1572 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1573 if (server
.appendonly
) rewriteAppendOnlyFileBackground();
1579 /* This function gets called every time Redis is entering the
1580 * main loop of the event driven library, that is, before to sleep
1581 * for ready file descriptors. */
1582 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1583 REDIS_NOTUSED(eventLoop
);
1585 /* Awake clients that got all the swapped keys they requested */
1586 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1590 listRewind(server
.io_ready_clients
,&li
);
1591 while((ln
= listNext(&li
))) {
1592 redisClient
*c
= ln
->value
;
1593 struct redisCommand
*cmd
;
1595 /* Resume the client. */
1596 listDelNode(server
.io_ready_clients
,ln
);
1597 c
->flags
&= (~REDIS_IO_WAIT
);
1598 server
.vm_blocked_clients
--;
1599 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1600 readQueryFromClient
, c
);
1601 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1602 assert(cmd
!= NULL
);
1605 /* There may be more data to process in the input buffer. */
1606 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1607 processInputBuffer(c
);
1610 /* Write the AOF buffer on disk */
1611 flushAppendOnlyFile();
1614 static void createSharedObjects(void) {
1617 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1618 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1619 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1620 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1621 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1622 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1623 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1624 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1625 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1626 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1627 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1628 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1629 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1630 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1631 "-ERR no such key\r\n"));
1632 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1633 "-ERR syntax error\r\n"));
1634 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1635 "-ERR source and destination objects are the same\r\n"));
1636 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1637 "-ERR index out of range\r\n"));
1638 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1639 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1640 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1641 shared
.select0
= createStringObject("select 0\r\n",10);
1642 shared
.select1
= createStringObject("select 1\r\n",10);
1643 shared
.select2
= createStringObject("select 2\r\n",10);
1644 shared
.select3
= createStringObject("select 3\r\n",10);
1645 shared
.select4
= createStringObject("select 4\r\n",10);
1646 shared
.select5
= createStringObject("select 5\r\n",10);
1647 shared
.select6
= createStringObject("select 6\r\n",10);
1648 shared
.select7
= createStringObject("select 7\r\n",10);
1649 shared
.select8
= createStringObject("select 8\r\n",10);
1650 shared
.select9
= createStringObject("select 9\r\n",10);
1651 shared
.messagebulk
= createStringObject("$7\r\nmessage\r\n",13);
1652 shared
.pmessagebulk
= createStringObject("$8\r\npmessage\r\n",14);
1653 shared
.subscribebulk
= createStringObject("$9\r\nsubscribe\r\n",15);
1654 shared
.unsubscribebulk
= createStringObject("$11\r\nunsubscribe\r\n",18);
1655 shared
.psubscribebulk
= createStringObject("$10\r\npsubscribe\r\n",17);
1656 shared
.punsubscribebulk
= createStringObject("$12\r\npunsubscribe\r\n",19);
1657 shared
.mbulk3
= createStringObject("*3\r\n",4);
1658 shared
.mbulk4
= createStringObject("*4\r\n",4);
1659 for (j
= 0; j
< REDIS_SHARED_INTEGERS
; j
++) {
1660 shared
.integers
[j
] = createObject(REDIS_STRING
,(void*)(long)j
);
1661 shared
.integers
[j
]->encoding
= REDIS_ENCODING_INT
;
1665 static void appendServerSaveParams(time_t seconds
, int changes
) {
1666 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1667 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1668 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1669 server
.saveparamslen
++;
1672 static void resetServerSaveParams() {
1673 zfree(server
.saveparams
);
1674 server
.saveparams
= NULL
;
1675 server
.saveparamslen
= 0;
1678 static void initServerConfig() {
1679 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1680 server
.port
= REDIS_SERVERPORT
;
1681 server
.verbosity
= REDIS_VERBOSE
;
1682 server
.maxidletime
= REDIS_MAXIDLETIME
;
1683 server
.saveparams
= NULL
;
1684 server
.logfile
= NULL
; /* NULL = log on standard output */
1685 server
.bindaddr
= NULL
;
1686 server
.glueoutputbuf
= 1;
1687 server
.daemonize
= 0;
1688 server
.appendonly
= 0;
1689 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1690 server
.lastfsync
= time(NULL
);
1691 server
.appendfd
= -1;
1692 server
.appendseldb
= -1; /* Make sure the first time will not match */
1693 server
.pidfile
= zstrdup("/var/run/redis.pid");
1694 server
.dbfilename
= zstrdup("dump.rdb");
1695 server
.appendfilename
= zstrdup("appendonly.aof");
1696 server
.requirepass
= NULL
;
1697 server
.rdbcompression
= 1;
1698 server
.activerehashing
= 1;
1699 server
.maxclients
= 0;
1700 server
.blpop_blocked_clients
= 0;
1701 server
.maxmemory
= 0;
1702 server
.vm_enabled
= 0;
1703 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1704 server
.vm_page_size
= 256; /* 256 bytes per page */
1705 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1706 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1707 server
.vm_max_threads
= 4;
1708 server
.vm_blocked_clients
= 0;
1709 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1710 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1711 server
.shutdown_asap
= 0;
1713 resetServerSaveParams();
1715 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1716 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1717 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1718 /* Replication related */
1720 server
.masterauth
= NULL
;
1721 server
.masterhost
= NULL
;
1722 server
.masterport
= 6379;
1723 server
.master
= NULL
;
1724 server
.replstate
= REDIS_REPL_NONE
;
1726 /* Double constants initialization */
1728 R_PosInf
= 1.0/R_Zero
;
1729 R_NegInf
= -1.0/R_Zero
;
1730 R_Nan
= R_Zero
/R_Zero
;
1733 static void initServer() {
1736 signal(SIGHUP
, SIG_IGN
);
1737 signal(SIGPIPE
, SIG_IGN
);
1738 setupSigSegvAction();
1740 server
.devnull
= fopen("/dev/null","w");
1741 if (server
.devnull
== NULL
) {
1742 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1745 server
.clients
= listCreate();
1746 server
.slaves
= listCreate();
1747 server
.monitors
= listCreate();
1748 server
.objfreelist
= listCreate();
1749 createSharedObjects();
1750 server
.el
= aeCreateEventLoop();
1751 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1752 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1753 if (server
.fd
== -1) {
1754 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1757 for (j
= 0; j
< server
.dbnum
; j
++) {
1758 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1759 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1760 server
.db
[j
].blocking_keys
= dictCreate(&keylistDictType
,NULL
);
1761 server
.db
[j
].watched_keys
= dictCreate(&keylistDictType
,NULL
);
1762 if (server
.vm_enabled
)
1763 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1764 server
.db
[j
].id
= j
;
1766 server
.pubsub_channels
= dictCreate(&keylistDictType
,NULL
);
1767 server
.pubsub_patterns
= listCreate();
1768 listSetFreeMethod(server
.pubsub_patterns
,freePubsubPattern
);
1769 listSetMatchMethod(server
.pubsub_patterns
,listMatchPubsubPattern
);
1770 server
.cronloops
= 0;
1771 server
.bgsavechildpid
= -1;
1772 server
.bgrewritechildpid
= -1;
1773 server
.bgrewritebuf
= sdsempty();
1774 server
.aofbuf
= sdsempty();
1775 server
.lastsave
= time(NULL
);
1777 server
.stat_numcommands
= 0;
1778 server
.stat_numconnections
= 0;
1779 server
.stat_expiredkeys
= 0;
1780 server
.stat_starttime
= time(NULL
);
1781 server
.unixtime
= time(NULL
);
1782 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1783 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1784 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1786 if (server
.appendonly
) {
1787 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1788 if (server
.appendfd
== -1) {
1789 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1795 if (server
.vm_enabled
) vmInit();
1798 /* Empty the whole database */
1799 static long long emptyDb() {
1801 long long removed
= 0;
1803 for (j
= 0; j
< server
.dbnum
; j
++) {
1804 removed
+= dictSize(server
.db
[j
].dict
);
1805 dictEmpty(server
.db
[j
].dict
);
1806 dictEmpty(server
.db
[j
].expires
);
1811 static int yesnotoi(char *s
) {
1812 if (!strcasecmp(s
,"yes")) return 1;
1813 else if (!strcasecmp(s
,"no")) return 0;
1817 /* I agree, this is a very rudimental way to load a configuration...
1818 will improve later if the config gets more complex */
1819 static void loadServerConfig(char *filename
) {
1821 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1825 if (filename
[0] == '-' && filename
[1] == '\0')
1828 if ((fp
= fopen(filename
,"r")) == NULL
) {
1829 redisLog(REDIS_WARNING
, "Fatal error, can't open config file '%s'", filename
);
1834 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1840 line
= sdstrim(line
," \t\r\n");
1842 /* Skip comments and blank lines*/
1843 if (line
[0] == '#' || line
[0] == '\0') {
1848 /* Split into arguments */
1849 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1850 sdstolower(argv
[0]);
1852 /* Execute config directives */
1853 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1854 server
.maxidletime
= atoi(argv
[1]);
1855 if (server
.maxidletime
< 0) {
1856 err
= "Invalid timeout value"; goto loaderr
;
1858 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1859 server
.port
= atoi(argv
[1]);
1860 if (server
.port
< 1 || server
.port
> 65535) {
1861 err
= "Invalid port"; goto loaderr
;
1863 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1864 server
.bindaddr
= zstrdup(argv
[1]);
1865 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1866 int seconds
= atoi(argv
[1]);
1867 int changes
= atoi(argv
[2]);
1868 if (seconds
< 1 || changes
< 0) {
1869 err
= "Invalid save parameters"; goto loaderr
;
1871 appendServerSaveParams(seconds
,changes
);
1872 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1873 if (chdir(argv
[1]) == -1) {
1874 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1875 argv
[1], strerror(errno
));
1878 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1879 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1880 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1881 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1882 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1884 err
= "Invalid log level. Must be one of debug, notice, warning";
1887 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1890 server
.logfile
= zstrdup(argv
[1]);
1891 if (!strcasecmp(server
.logfile
,"stdout")) {
1892 zfree(server
.logfile
);
1893 server
.logfile
= NULL
;
1895 if (server
.logfile
) {
1896 /* Test if we are able to open the file. The server will not
1897 * be able to abort just for this problem later... */
1898 logfp
= fopen(server
.logfile
,"a");
1899 if (logfp
== NULL
) {
1900 err
= sdscatprintf(sdsempty(),
1901 "Can't open the log file: %s", strerror(errno
));
1906 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1907 server
.dbnum
= atoi(argv
[1]);
1908 if (server
.dbnum
< 1) {
1909 err
= "Invalid number of databases"; goto loaderr
;
1911 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1912 loadServerConfig(argv
[1]);
1913 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1914 server
.maxclients
= atoi(argv
[1]);
1915 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1916 server
.maxmemory
= memtoll(argv
[1],NULL
);
1917 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1918 server
.masterhost
= sdsnew(argv
[1]);
1919 server
.masterport
= atoi(argv
[2]);
1920 server
.replstate
= REDIS_REPL_CONNECT
;
1921 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1922 server
.masterauth
= zstrdup(argv
[1]);
1923 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1924 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1925 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1927 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1928 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1929 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1931 } else if (!strcasecmp(argv
[0],"activerehashing") && argc
== 2) {
1932 if ((server
.activerehashing
= yesnotoi(argv
[1])) == -1) {
1933 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1935 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1936 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1937 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1939 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1940 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1941 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1943 } else if (!strcasecmp(argv
[0],"appendfilename") && argc
== 2) {
1944 zfree(server
.appendfilename
);
1945 server
.appendfilename
= zstrdup(argv
[1]);
1946 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
1947 if (!strcasecmp(argv
[1],"no")) {
1948 server
.appendfsync
= APPENDFSYNC_NO
;
1949 } else if (!strcasecmp(argv
[1],"always")) {
1950 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1951 } else if (!strcasecmp(argv
[1],"everysec")) {
1952 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1954 err
= "argument must be 'no', 'always' or 'everysec'";
1957 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
1958 server
.requirepass
= zstrdup(argv
[1]);
1959 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
1960 zfree(server
.pidfile
);
1961 server
.pidfile
= zstrdup(argv
[1]);
1962 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
1963 zfree(server
.dbfilename
);
1964 server
.dbfilename
= zstrdup(argv
[1]);
1965 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
1966 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
1967 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1969 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
1970 zfree(server
.vm_swap_file
);
1971 server
.vm_swap_file
= zstrdup(argv
[1]);
1972 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
1973 server
.vm_max_memory
= memtoll(argv
[1],NULL
);
1974 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
1975 server
.vm_page_size
= memtoll(argv
[1], NULL
);
1976 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
1977 server
.vm_pages
= memtoll(argv
[1], NULL
);
1978 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1979 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1980 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
1981 server
.hash_max_zipmap_entries
= memtoll(argv
[1], NULL
);
1982 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
1983 server
.hash_max_zipmap_value
= memtoll(argv
[1], NULL
);
1985 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
1987 for (j
= 0; j
< argc
; j
++)
1992 if (fp
!= stdin
) fclose(fp
);
1996 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
1997 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
1998 fprintf(stderr
, ">>> '%s'\n", line
);
1999 fprintf(stderr
, "%s\n", err
);
2003 static void freeClientArgv(redisClient
*c
) {
2006 for (j
= 0; j
< c
->argc
; j
++)
2007 decrRefCount(c
->argv
[j
]);
2008 for (j
= 0; j
< c
->mbargc
; j
++)
2009 decrRefCount(c
->mbargv
[j
]);
2014 static void freeClient(redisClient
*c
) {
2017 /* Note that if the client we are freeing is blocked into a blocking
2018 * call, we have to set querybuf to NULL *before* to call
2019 * unblockClientWaitingData() to avoid processInputBuffer() will get
2020 * called. Also it is important to remove the file events after
2021 * this, because this call adds the READABLE event. */
2022 sdsfree(c
->querybuf
);
2024 if (c
->flags
& REDIS_BLOCKED
)
2025 unblockClientWaitingData(c
);
2027 /* UNWATCH all the keys */
2029 listRelease(c
->watched_keys
);
2030 /* Unsubscribe from all the pubsub channels */
2031 pubsubUnsubscribeAllChannels(c
,0);
2032 pubsubUnsubscribeAllPatterns(c
,0);
2033 dictRelease(c
->pubsub_channels
);
2034 listRelease(c
->pubsub_patterns
);
2035 /* Obvious cleanup */
2036 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
2037 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2038 listRelease(c
->reply
);
2041 /* Remove from the list of clients */
2042 ln
= listSearchKey(server
.clients
,c
);
2043 redisAssert(ln
!= NULL
);
2044 listDelNode(server
.clients
,ln
);
2045 /* Remove from the list of clients that are now ready to be restarted
2046 * after waiting for swapped keys */
2047 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
2048 ln
= listSearchKey(server
.io_ready_clients
,c
);
2050 listDelNode(server
.io_ready_clients
,ln
);
2051 server
.vm_blocked_clients
--;
2054 /* Remove from the list of clients waiting for swapped keys */
2055 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
2056 ln
= listFirst(c
->io_keys
);
2057 dontWaitForSwappedKey(c
,ln
->value
);
2059 listRelease(c
->io_keys
);
2060 /* Master/slave cleanup */
2061 if (c
->flags
& REDIS_SLAVE
) {
2062 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
2064 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
2065 ln
= listSearchKey(l
,c
);
2066 redisAssert(ln
!= NULL
);
2069 if (c
->flags
& REDIS_MASTER
) {
2070 server
.master
= NULL
;
2071 server
.replstate
= REDIS_REPL_CONNECT
;
2073 /* Release memory */
2076 freeClientMultiState(c
);
2080 #define GLUEREPLY_UP_TO (1024)
2081 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
2083 char buf
[GLUEREPLY_UP_TO
];
2088 listRewind(c
->reply
,&li
);
2089 while((ln
= listNext(&li
))) {
2093 objlen
= sdslen(o
->ptr
);
2094 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
2095 memcpy(buf
+copylen
,o
->ptr
,objlen
);
2097 listDelNode(c
->reply
,ln
);
2099 if (copylen
== 0) return;
2103 /* Now the output buffer is empty, add the new single element */
2104 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
2105 listAddNodeHead(c
->reply
,o
);
2108 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2109 redisClient
*c
= privdata
;
2110 int nwritten
= 0, totwritten
= 0, objlen
;
2113 REDIS_NOTUSED(mask
);
2115 /* Use writev() if we have enough buffers to send */
2116 if (!server
.glueoutputbuf
&&
2117 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
2118 !(c
->flags
& REDIS_MASTER
))
2120 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
2124 while(listLength(c
->reply
)) {
2125 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
2126 glueReplyBuffersIfNeeded(c
);
2128 o
= listNodeValue(listFirst(c
->reply
));
2129 objlen
= sdslen(o
->ptr
);
2132 listDelNode(c
->reply
,listFirst(c
->reply
));
2136 if (c
->flags
& REDIS_MASTER
) {
2137 /* Don't reply to a master */
2138 nwritten
= objlen
- c
->sentlen
;
2140 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
2141 if (nwritten
<= 0) break;
2143 c
->sentlen
+= nwritten
;
2144 totwritten
+= nwritten
;
2145 /* If we fully sent the object on head go to the next one */
2146 if (c
->sentlen
== objlen
) {
2147 listDelNode(c
->reply
,listFirst(c
->reply
));
2150 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
2151 * bytes, in a single threaded server it's a good idea to serve
2152 * other clients as well, even if a very large request comes from
2153 * super fast link that is always able to accept data (in real world
2154 * scenario think about 'KEYS *' against the loopback interfae) */
2155 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
2157 if (nwritten
== -1) {
2158 if (errno
== EAGAIN
) {
2161 redisLog(REDIS_VERBOSE
,
2162 "Error writing to client: %s", strerror(errno
));
2167 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
2168 if (listLength(c
->reply
) == 0) {
2170 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2174 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
2176 redisClient
*c
= privdata
;
2177 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
2179 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
2180 int offset
, ion
= 0;
2182 REDIS_NOTUSED(mask
);
2185 while (listLength(c
->reply
)) {
2186 offset
= c
->sentlen
;
2190 /* fill-in the iov[] array */
2191 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
2192 o
= listNodeValue(node
);
2193 objlen
= sdslen(o
->ptr
);
2195 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
2198 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2199 break; /* no more iovecs */
2201 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2202 iov
[ion
].iov_len
= objlen
- offset
;
2203 willwrite
+= objlen
- offset
;
2204 offset
= 0; /* just for the first item */
2211 /* write all collected blocks at once */
2212 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2213 if (errno
!= EAGAIN
) {
2214 redisLog(REDIS_VERBOSE
,
2215 "Error writing to client: %s", strerror(errno
));
2222 totwritten
+= nwritten
;
2223 offset
= c
->sentlen
;
2225 /* remove written robjs from c->reply */
2226 while (nwritten
&& listLength(c
->reply
)) {
2227 o
= listNodeValue(listFirst(c
->reply
));
2228 objlen
= sdslen(o
->ptr
);
2230 if(nwritten
>= objlen
- offset
) {
2231 listDelNode(c
->reply
, listFirst(c
->reply
));
2232 nwritten
-= objlen
- offset
;
2236 c
->sentlen
+= nwritten
;
2244 c
->lastinteraction
= time(NULL
);
2246 if (listLength(c
->reply
) == 0) {
2248 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2252 static int qsortRedisCommands(const void *r1
, const void *r2
) {
2254 ((struct redisCommand
*)r1
)->name
,
2255 ((struct redisCommand
*)r2
)->name
);
2258 static void sortCommandTable() {
2259 int i
= 0, size
= 0;
2261 /* Determine and store the size of the command table */
2262 while(readonlyCommandTable
[i
++].name
!= NULL
) size
++;
2263 commandTableSize
= size
;
2265 /* Copy and sort the read-only version of the command table */
2266 commandTable
= (struct redisCommand
*)malloc(sizeof(readonlyCommandTable
));
2267 memcpy(commandTable
,readonlyCommandTable
,sizeof(readonlyCommandTable
));
2268 qsort(commandTable
,size
,sizeof(struct redisCommand
),qsortRedisCommands
);
2271 static struct redisCommand
*lookupCommand(char *name
) {
2272 struct redisCommand tmp
= {name
,NULL
,0,0,NULL
,0,0,0};
2277 sizeof(struct redisCommand
),
2278 qsortRedisCommands
);
2281 /* resetClient prepare the client to process the next command */
2282 static void resetClient(redisClient
*c
) {
2288 /* Call() is the core of Redis execution of a command */
2289 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2292 dirty
= server
.dirty
;
2294 dirty
= server
.dirty
-dirty
;
2296 if (server
.appendonly
&& dirty
)
2297 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2298 if ((dirty
|| cmd
->flags
& REDIS_CMD_FORCE_REPLICATION
) &&
2299 listLength(server
.slaves
))
2300 replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
);
2301 if (listLength(server
.monitors
))
2302 replicationFeedMonitors(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
);
2303 server
.stat_numcommands
++;
2306 /* If this function gets called we already read a whole
2307 * command, argments are in the client argv/argc fields.
2308 * processCommand() execute the command or prepare the
2309 * server for a bulk read from the client.
2311 * If 1 is returned the client is still alive and valid and
2312 * and other operations can be performed by the caller. Otherwise
2313 * if 0 is returned the client was destroied (i.e. after QUIT). */
2314 static int processCommand(redisClient
*c
) {
2315 struct redisCommand
*cmd
;
2317 /* Free some memory if needed (maxmemory setting) */
2318 if (server
.maxmemory
) freeMemoryIfNeeded();
2320 /* Handle the multi bulk command type. This is an alternative protocol
2321 * supported by Redis in order to receive commands that are composed of
2322 * multiple binary-safe "bulk" arguments. The latency of processing is
2323 * a bit higher but this allows things like multi-sets, so if this
2324 * protocol is used only for MSET and similar commands this is a big win. */
2325 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2326 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2327 if (c
->multibulk
<= 0) {
2331 decrRefCount(c
->argv
[c
->argc
-1]);
2335 } else if (c
->multibulk
) {
2336 if (c
->bulklen
== -1) {
2337 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2338 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2342 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2343 decrRefCount(c
->argv
[0]);
2344 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2346 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2351 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2355 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2356 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2360 if (c
->multibulk
== 0) {
2364 /* Here we need to swap the multi-bulk argc/argv with the
2365 * normal argc/argv of the client structure. */
2367 c
->argv
= c
->mbargv
;
2368 c
->mbargv
= auxargv
;
2371 c
->argc
= c
->mbargc
;
2372 c
->mbargc
= auxargc
;
2374 /* We need to set bulklen to something different than -1
2375 * in order for the code below to process the command without
2376 * to try to read the last argument of a bulk command as
2377 * a special argument. */
2379 /* continue below and process the command */
2386 /* -- end of multi bulk commands processing -- */
2388 /* The QUIT command is handled as a special case. Normal command
2389 * procs are unable to close the client connection safely */
2390 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2395 /* Now lookup the command and check ASAP about trivial error conditions
2396 * such wrong arity, bad command name and so forth. */
2397 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2400 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2401 (char*)c
->argv
[0]->ptr
));
2404 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2405 (c
->argc
< -cmd
->arity
)) {
2407 sdscatprintf(sdsempty(),
2408 "-ERR wrong number of arguments for '%s' command\r\n",
2412 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2413 /* This is a bulk command, we have to read the last argument yet. */
2414 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2416 decrRefCount(c
->argv
[c
->argc
-1]);
2417 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2419 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2424 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2425 /* It is possible that the bulk read is already in the
2426 * buffer. Check this condition and handle it accordingly.
2427 * This is just a fast path, alternative to call processInputBuffer().
2428 * It's a good idea since the code is small and this condition
2429 * happens most of the times. */
2430 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2431 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2433 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2435 /* Otherwise return... there is to read the last argument
2436 * from the socket. */
2440 /* Let's try to encode the bulk object to save space. */
2441 if (cmd
->flags
& REDIS_CMD_BULK
)
2442 c
->argv
[c
->argc
-1] = tryObjectEncoding(c
->argv
[c
->argc
-1]);
2444 /* Check if the user is authenticated */
2445 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2446 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2451 /* Handle the maxmemory directive */
2452 if (server
.maxmemory
&& (cmd
->flags
& REDIS_CMD_DENYOOM
) &&
2453 zmalloc_used_memory() > server
.maxmemory
)
2455 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2460 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
2461 if ((dictSize(c
->pubsub_channels
) > 0 || listLength(c
->pubsub_patterns
) > 0)
2463 cmd
->proc
!= subscribeCommand
&& cmd
->proc
!= unsubscribeCommand
&&
2464 cmd
->proc
!= psubscribeCommand
&& cmd
->proc
!= punsubscribeCommand
) {
2465 addReplySds(c
,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n"));
2470 /* Exec the command */
2471 if (c
->flags
& REDIS_MULTI
&&
2472 cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
&&
2473 cmd
->proc
!= multiCommand
&& cmd
->proc
!= watchCommand
)
2475 queueMultiCommand(c
,cmd
);
2476 addReply(c
,shared
.queued
);
2478 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2479 blockClientOnSwappedKeys(c
,cmd
)) return 1;
2483 /* Prepare the client for the next command */
2488 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
) {
2493 /* We need 1+(ARGS*3) objects since commands are using the new protocol
2494 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
2495 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
2496 robj
*static_outv
[REDIS_STATIC_ARGS
*3+1];
2499 if (argc
<= REDIS_STATIC_ARGS
) {
2502 outv
= zmalloc(sizeof(robj
*)*(argc
*3+1));
2505 lenobj
= createObject(REDIS_STRING
,
2506 sdscatprintf(sdsempty(), "*%d\r\n", argc
));
2507 lenobj
->refcount
= 0;
2508 outv
[outc
++] = lenobj
;
2509 for (j
= 0; j
< argc
; j
++) {
2510 lenobj
= createObject(REDIS_STRING
,
2511 sdscatprintf(sdsempty(),"$%lu\r\n",
2512 (unsigned long) stringObjectLen(argv
[j
])));
2513 lenobj
->refcount
= 0;
2514 outv
[outc
++] = lenobj
;
2515 outv
[outc
++] = argv
[j
];
2516 outv
[outc
++] = shared
.crlf
;
2519 /* Increment all the refcounts at start and decrement at end in order to
2520 * be sure to free objects if there is no slave in a replication state
2521 * able to be feed with commands */
2522 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2523 listRewind(slaves
,&li
);
2524 while((ln
= listNext(&li
))) {
2525 redisClient
*slave
= ln
->value
;
2527 /* Don't feed slaves that are still waiting for BGSAVE to start */
2528 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2530 /* Feed all the other slaves, MONITORs and so on */
2531 if (slave
->slaveseldb
!= dictid
) {
2535 case 0: selectcmd
= shared
.select0
; break;
2536 case 1: selectcmd
= shared
.select1
; break;
2537 case 2: selectcmd
= shared
.select2
; break;
2538 case 3: selectcmd
= shared
.select3
; break;
2539 case 4: selectcmd
= shared
.select4
; break;
2540 case 5: selectcmd
= shared
.select5
; break;
2541 case 6: selectcmd
= shared
.select6
; break;
2542 case 7: selectcmd
= shared
.select7
; break;
2543 case 8: selectcmd
= shared
.select8
; break;
2544 case 9: selectcmd
= shared
.select9
; break;
2546 selectcmd
= createObject(REDIS_STRING
,
2547 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2548 selectcmd
->refcount
= 0;
2551 addReply(slave
,selectcmd
);
2552 slave
->slaveseldb
= dictid
;
2554 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2556 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2557 if (outv
!= static_outv
) zfree(outv
);
2560 static sds
sdscatrepr(sds s
, char *p
, size_t len
) {
2561 s
= sdscatlen(s
,"\"",1);
2566 s
= sdscatprintf(s
,"\\%c",*p
);
2568 case '\n': s
= sdscatlen(s
,"\\n",1); break;
2569 case '\r': s
= sdscatlen(s
,"\\r",1); break;
2570 case '\t': s
= sdscatlen(s
,"\\t",1); break;
2571 case '\a': s
= sdscatlen(s
,"\\a",1); break;
2572 case '\b': s
= sdscatlen(s
,"\\b",1); break;
2575 s
= sdscatprintf(s
,"%c",*p
);
2577 s
= sdscatprintf(s
,"\\x%02x",(unsigned char)*p
);
2582 return sdscatlen(s
,"\"",1);
2585 static void replicationFeedMonitors(list
*monitors
, int dictid
, robj
**argv
, int argc
) {
2589 sds cmdrepr
= sdsnew("+");
2593 gettimeofday(&tv
,NULL
);
2594 cmdrepr
= sdscatprintf(cmdrepr
,"%ld.%ld ",(long)tv
.tv_sec
,(long)tv
.tv_usec
);
2595 if (dictid
!= 0) cmdrepr
= sdscatprintf(cmdrepr
,"(db %d) ", dictid
);
2597 for (j
= 0; j
< argc
; j
++) {
2598 if (argv
[j
]->encoding
== REDIS_ENCODING_INT
) {
2599 cmdrepr
= sdscatprintf(cmdrepr
, "%ld", (long)argv
[j
]->ptr
);
2601 cmdrepr
= sdscatrepr(cmdrepr
,(char*)argv
[j
]->ptr
,
2602 sdslen(argv
[j
]->ptr
));
2605 cmdrepr
= sdscatlen(cmdrepr
," ",1);
2607 cmdrepr
= sdscatlen(cmdrepr
,"\r\n",2);
2608 cmdobj
= createObject(REDIS_STRING
,cmdrepr
);
2610 listRewind(monitors
,&li
);
2611 while((ln
= listNext(&li
))) {
2612 redisClient
*monitor
= ln
->value
;
2613 addReply(monitor
,cmdobj
);
2615 decrRefCount(cmdobj
);
2618 static void processInputBuffer(redisClient
*c
) {
2620 /* Before to process the input buffer, make sure the client is not
2621 * waitig for a blocking operation such as BLPOP. Note that the first
2622 * iteration the client is never blocked, otherwise the processInputBuffer
2623 * would not be called at all, but after the execution of the first commands
2624 * in the input buffer the client may be blocked, and the "goto again"
2625 * will try to reiterate. The following line will make it return asap. */
2626 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2627 if (c
->bulklen
== -1) {
2628 /* Read the first line of the query */
2629 char *p
= strchr(c
->querybuf
,'\n');
2636 query
= c
->querybuf
;
2637 c
->querybuf
= sdsempty();
2638 querylen
= 1+(p
-(query
));
2639 if (sdslen(query
) > querylen
) {
2640 /* leave data after the first line of the query in the buffer */
2641 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2643 *p
= '\0'; /* remove "\n" */
2644 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2645 sdsupdatelen(query
);
2647 /* Now we can split the query in arguments */
2648 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2651 if (c
->argv
) zfree(c
->argv
);
2652 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2654 for (j
= 0; j
< argc
; j
++) {
2655 if (sdslen(argv
[j
])) {
2656 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2664 /* Execute the command. If the client is still valid
2665 * after processCommand() return and there is something
2666 * on the query buffer try to process the next command. */
2667 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2669 /* Nothing to process, argc == 0. Just process the query
2670 * buffer if it's not empty or return to the caller */
2671 if (sdslen(c
->querybuf
)) goto again
;
2674 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2675 redisLog(REDIS_VERBOSE
, "Client protocol error");
2680 /* Bulk read handling. Note that if we are at this point
2681 the client already sent a command terminated with a newline,
2682 we are reading the bulk data that is actually the last
2683 argument of the command. */
2684 int qbl
= sdslen(c
->querybuf
);
2686 if (c
->bulklen
<= qbl
) {
2687 /* Copy everything but the final CRLF as final argument */
2688 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2690 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2691 /* Process the command. If the client is still valid after
2692 * the processing and there is more data in the buffer
2693 * try to parse it. */
2694 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2700 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2701 redisClient
*c
= (redisClient
*) privdata
;
2702 char buf
[REDIS_IOBUF_LEN
];
2705 REDIS_NOTUSED(mask
);
2707 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2709 if (errno
== EAGAIN
) {
2712 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2716 } else if (nread
== 0) {
2717 redisLog(REDIS_VERBOSE
, "Client closed connection");
2722 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2723 c
->lastinteraction
= time(NULL
);
2727 processInputBuffer(c
);
2730 static int selectDb(redisClient
*c
, int id
) {
2731 if (id
< 0 || id
>= server
.dbnum
)
2733 c
->db
= &server
.db
[id
];
2737 static void *dupClientReplyValue(void *o
) {
2738 incrRefCount((robj
*)o
);
2742 static int listMatchObjects(void *a
, void *b
) {
2743 return equalStringObjects(a
,b
);
2746 static redisClient
*createClient(int fd
) {
2747 redisClient
*c
= zmalloc(sizeof(*c
));
2749 anetNonBlock(NULL
,fd
);
2750 anetTcpNoDelay(NULL
,fd
);
2751 if (!c
) return NULL
;
2754 c
->querybuf
= sdsempty();
2763 c
->lastinteraction
= time(NULL
);
2764 c
->authenticated
= 0;
2765 c
->replstate
= REDIS_REPL_NONE
;
2766 c
->reply
= listCreate();
2767 listSetFreeMethod(c
->reply
,decrRefCount
);
2768 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2769 c
->blocking_keys
= NULL
;
2770 c
->blocking_keys_num
= 0;
2771 c
->io_keys
= listCreate();
2772 c
->watched_keys
= listCreate();
2773 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2774 c
->pubsub_channels
= dictCreate(&setDictType
,NULL
);
2775 c
->pubsub_patterns
= listCreate();
2776 listSetFreeMethod(c
->pubsub_patterns
,decrRefCount
);
2777 listSetMatchMethod(c
->pubsub_patterns
,listMatchObjects
);
2778 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2779 readQueryFromClient
, c
) == AE_ERR
) {
2783 listAddNodeTail(server
.clients
,c
);
2784 initClientMultiState(c
);
2788 static void addReply(redisClient
*c
, robj
*obj
) {
2789 if (listLength(c
->reply
) == 0 &&
2790 (c
->replstate
== REDIS_REPL_NONE
||
2791 c
->replstate
== REDIS_REPL_ONLINE
) &&
2792 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2793 sendReplyToClient
, c
) == AE_ERR
) return;
2795 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2796 obj
= dupStringObject(obj
);
2797 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2799 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2802 static void addReplySds(redisClient
*c
, sds s
) {
2803 robj
*o
= createObject(REDIS_STRING
,s
);
2808 static void addReplyDouble(redisClient
*c
, double d
) {
2811 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2812 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2813 (unsigned long) strlen(buf
),buf
));
2816 static void addReplyLongLong(redisClient
*c
, long long ll
) {
2821 addReply(c
,shared
.czero
);
2823 } else if (ll
== 1) {
2824 addReply(c
,shared
.cone
);
2828 len
= ll2string(buf
+1,sizeof(buf
)-1,ll
);
2831 addReplySds(c
,sdsnewlen(buf
,len
+3));
2834 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2839 addReply(c
,shared
.czero
);
2841 } else if (ul
== 1) {
2842 addReply(c
,shared
.cone
);
2845 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2846 addReplySds(c
,sdsnewlen(buf
,len
));
2849 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2853 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2854 len
= sdslen(obj
->ptr
);
2856 long n
= (long)obj
->ptr
;
2858 /* Compute how many bytes will take this integer as a radix 10 string */
2864 while((n
= n
/10) != 0) {
2869 intlen
= ll2string(buf
+1,sizeof(buf
)-1,(long long)len
);
2870 buf
[intlen
+1] = '\r';
2871 buf
[intlen
+2] = '\n';
2872 addReplySds(c
,sdsnewlen(buf
,intlen
+3));
2875 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2876 addReplyBulkLen(c
,obj
);
2878 addReply(c
,shared
.crlf
);
2881 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2882 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2884 addReply(c
,shared
.nullbulk
);
2886 robj
*o
= createStringObject(s
,strlen(s
));
2892 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2897 REDIS_NOTUSED(mask
);
2898 REDIS_NOTUSED(privdata
);
2900 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2901 if (cfd
== AE_ERR
) {
2902 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2905 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2906 if ((c
= createClient(cfd
)) == NULL
) {
2907 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2908 close(cfd
); /* May be already closed, just ingore errors */
2911 /* If maxclient directive is set and this is one client more... close the
2912 * connection. Note that we create the client instead to check before
2913 * for this condition, since now the socket is already set in nonblocking
2914 * mode and we can send an error for free using the Kernel I/O */
2915 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2916 char *err
= "-ERR max number of clients reached\r\n";
2918 /* That's a best effort error message, don't check write errors */
2919 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2920 /* Nothing to do, Just to avoid the warning... */
2925 server
.stat_numconnections
++;
2928 /* ======================= Redis objects implementation ===================== */
2930 static robj
*createObject(int type
, void *ptr
) {
2933 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2934 if (listLength(server
.objfreelist
)) {
2935 listNode
*head
= listFirst(server
.objfreelist
);
2936 o
= listNodeValue(head
);
2937 listDelNode(server
.objfreelist
,head
);
2938 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2940 if (server
.vm_enabled
) {
2941 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2942 o
= zmalloc(sizeof(*o
));
2944 o
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
));
2948 o
->encoding
= REDIS_ENCODING_RAW
;
2951 if (server
.vm_enabled
) {
2952 /* Note that this code may run in the context of an I/O thread
2953 * and accessing to server.unixtime in theory is an error
2954 * (no locks). But in practice this is safe, and even if we read
2955 * garbage Redis will not fail, as it's just a statistical info */
2956 o
->vm
.atime
= server
.unixtime
;
2957 o
->storage
= REDIS_VM_MEMORY
;
2962 static robj
*createStringObject(char *ptr
, size_t len
) {
2963 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
2966 static robj
*createStringObjectFromLongLong(long long value
) {
2968 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
2969 incrRefCount(shared
.integers
[value
]);
2970 o
= shared
.integers
[value
];
2972 if (value
>= LONG_MIN
&& value
<= LONG_MAX
) {
2973 o
= createObject(REDIS_STRING
, NULL
);
2974 o
->encoding
= REDIS_ENCODING_INT
;
2975 o
->ptr
= (void*)((long)value
);
2977 o
= createObject(REDIS_STRING
,sdsfromlonglong(value
));
2983 static robj
*dupStringObject(robj
*o
) {
2984 assert(o
->encoding
== REDIS_ENCODING_RAW
);
2985 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
2988 static robj
*createListObject(void) {
2989 list
*l
= listCreate();
2991 listSetFreeMethod(l
,decrRefCount
);
2992 return createObject(REDIS_LIST
,l
);
2995 static robj
*createSetObject(void) {
2996 dict
*d
= dictCreate(&setDictType
,NULL
);
2997 return createObject(REDIS_SET
,d
);
3000 static robj
*createHashObject(void) {
3001 /* All the Hashes start as zipmaps. Will be automatically converted
3002 * into hash tables if there are enough elements or big elements
3004 unsigned char *zm
= zipmapNew();
3005 robj
*o
= createObject(REDIS_HASH
,zm
);
3006 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
3010 static robj
*createZsetObject(void) {
3011 zset
*zs
= zmalloc(sizeof(*zs
));
3013 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
3014 zs
->zsl
= zslCreate();
3015 return createObject(REDIS_ZSET
,zs
);
3018 static void freeStringObject(robj
*o
) {
3019 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3024 static void freeListObject(robj
*o
) {
3025 listRelease((list
*) o
->ptr
);
3028 static void freeSetObject(robj
*o
) {
3029 dictRelease((dict
*) o
->ptr
);
3032 static void freeZsetObject(robj
*o
) {
3035 dictRelease(zs
->dict
);
3040 static void freeHashObject(robj
*o
) {
3041 switch (o
->encoding
) {
3042 case REDIS_ENCODING_HT
:
3043 dictRelease((dict
*) o
->ptr
);
3045 case REDIS_ENCODING_ZIPMAP
:
3049 redisPanic("Unknown hash encoding type");
3054 static void incrRefCount(robj
*o
) {
3058 static void decrRefCount(void *obj
) {
3061 if (o
->refcount
<= 0) redisPanic("decrRefCount against refcount <= 0");
3062 /* Object is a key of a swapped out value, or in the process of being
3064 if (server
.vm_enabled
&&
3065 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
3067 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
);
3068 redisAssert(o
->type
== REDIS_STRING
);
3069 freeStringObject(o
);
3070 vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
);
3071 pthread_mutex_lock(&server
.obj_freelist_mutex
);
3072 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
3073 !listAddNodeHead(server
.objfreelist
,o
))
3075 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
3076 server
.vm_stats_swapped_objects
--;
3079 /* Object is in memory, or in the process of being swapped out. */
3080 if (--(o
->refcount
) == 0) {
3081 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
3082 vmCancelThreadedIOJob(obj
);
3084 case REDIS_STRING
: freeStringObject(o
); break;
3085 case REDIS_LIST
: freeListObject(o
); break;
3086 case REDIS_SET
: freeSetObject(o
); break;
3087 case REDIS_ZSET
: freeZsetObject(o
); break;
3088 case REDIS_HASH
: freeHashObject(o
); break;
3089 default: redisPanic("Unknown object type"); break;
3091 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
3092 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
3093 !listAddNodeHead(server
.objfreelist
,o
))
3095 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
3099 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
3100 dictEntry
*de
= dictFind(db
->dict
,key
);
3102 robj
*key
= dictGetEntryKey(de
);
3103 robj
*val
= dictGetEntryVal(de
);
3105 if (server
.vm_enabled
) {
3106 if (key
->storage
== REDIS_VM_MEMORY
||
3107 key
->storage
== REDIS_VM_SWAPPING
)
3109 /* If we were swapping the object out, stop it, this key
3111 if (key
->storage
== REDIS_VM_SWAPPING
)
3112 vmCancelThreadedIOJob(key
);
3113 /* Update the access time of the key for the aging algorithm. */
3114 key
->vm
.atime
= server
.unixtime
;
3116 int notify
= (key
->storage
== REDIS_VM_LOADING
);
3118 /* Our value was swapped on disk. Bring it at home. */
3119 redisAssert(val
== NULL
);
3120 val
= vmLoadObject(key
);
3121 dictGetEntryVal(de
) = val
;
3123 /* Clients blocked by the VM subsystem may be waiting for
3125 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
3134 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
3135 expireIfNeeded(db
,key
);
3136 return lookupKey(db
,key
);
3139 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
3140 deleteIfVolatile(db
,key
);
3141 touchWatchedKey(db
,key
);
3142 return lookupKey(db
,key
);
3145 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3146 robj
*o
= lookupKeyRead(c
->db
, key
);
3147 if (!o
) addReply(c
,reply
);
3151 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3152 robj
*o
= lookupKeyWrite(c
->db
, key
);
3153 if (!o
) addReply(c
,reply
);
3157 static int checkType(redisClient
*c
, robj
*o
, int type
) {
3158 if (o
->type
!= type
) {
3159 addReply(c
,shared
.wrongtypeerr
);
3165 static int deleteKey(redisDb
*db
, robj
*key
) {
3168 /* We need to protect key from destruction: after the first dictDelete()
3169 * it may happen that 'key' is no longer valid if we don't increment
3170 * it's count. This may happen when we get the object reference directly
3171 * from the hash table with dictRandomKey() or dict iterators */
3173 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
);
3174 retval
= dictDelete(db
->dict
,key
);
3177 return retval
== DICT_OK
;
3180 /* Check if the nul-terminated string 's' can be represented by a long
3181 * (that is, is a number that fits into long without any other space or
3182 * character before or after the digits).
3184 * If so, the function returns REDIS_OK and *longval is set to the value
3185 * of the number. Otherwise REDIS_ERR is returned */
3186 static int isStringRepresentableAsLong(sds s
, long *longval
) {
3187 char buf
[32], *endptr
;
3191 value
= strtol(s
, &endptr
, 10);
3192 if (endptr
[0] != '\0') return REDIS_ERR
;
3193 slen
= ll2string(buf
,32,value
);
3195 /* If the number converted back into a string is not identical
3196 * then it's not possible to encode the string as integer */
3197 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
3198 if (longval
) *longval
= value
;
3202 /* Try to encode a string object in order to save space */
3203 static robj
*tryObjectEncoding(robj
*o
) {
3207 if (o
->encoding
!= REDIS_ENCODING_RAW
)
3208 return o
; /* Already encoded */
3210 /* It's not safe to encode shared objects: shared objects can be shared
3211 * everywhere in the "object space" of Redis. Encoded objects can only
3212 * appear as "values" (and not, for instance, as keys) */
3213 if (o
->refcount
> 1) return o
;
3215 /* Currently we try to encode only strings */
3216 redisAssert(o
->type
== REDIS_STRING
);
3218 /* Check if we can represent this string as a long integer */
3219 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return o
;
3221 /* Ok, this object can be encoded */
3222 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
3224 incrRefCount(shared
.integers
[value
]);
3225 return shared
.integers
[value
];
3227 o
->encoding
= REDIS_ENCODING_INT
;
3229 o
->ptr
= (void*) value
;
3234 /* Get a decoded version of an encoded object (returned as a new object).
3235 * If the object is already raw-encoded just increment the ref count. */
3236 static robj
*getDecodedObject(robj
*o
) {
3239 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3243 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
3246 ll2string(buf
,32,(long)o
->ptr
);
3247 dec
= createStringObject(buf
,strlen(buf
));
3250 redisPanic("Unknown encoding type");
3254 /* Compare two string objects via strcmp() or alike.
3255 * Note that the objects may be integer-encoded. In such a case we
3256 * use ll2string() to get a string representation of the numbers on the stack
3257 * and compare the strings, it's much faster than calling getDecodedObject().
3259 * Important note: if objects are not integer encoded, but binary-safe strings,
3260 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
3262 static int compareStringObjects(robj
*a
, robj
*b
) {
3263 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
3264 char bufa
[128], bufb
[128], *astr
, *bstr
;
3267 if (a
== b
) return 0;
3268 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
3269 ll2string(bufa
,sizeof(bufa
),(long) a
->ptr
);
3275 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
3276 ll2string(bufb
,sizeof(bufb
),(long) b
->ptr
);
3282 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
3285 /* Equal string objects return 1 if the two objects are the same from the
3286 * point of view of a string comparison, otherwise 0 is returned. Note that
3287 * this function is faster then checking for (compareStringObject(a,b) == 0)
3288 * because it can perform some more optimization. */
3289 static int equalStringObjects(robj
*a
, robj
*b
) {
3290 if (a
->encoding
!= REDIS_ENCODING_RAW
&& b
->encoding
!= REDIS_ENCODING_RAW
){
3291 return a
->ptr
== b
->ptr
;
3293 return compareStringObjects(a
,b
) == 0;
3297 static size_t stringObjectLen(robj
*o
) {
3298 redisAssert(o
->type
== REDIS_STRING
);
3299 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3300 return sdslen(o
->ptr
);
3304 return ll2string(buf
,32,(long)o
->ptr
);
3308 static int getDoubleFromObject(robj
*o
, double *target
) {
3315 redisAssert(o
->type
== REDIS_STRING
);
3316 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3317 value
= strtod(o
->ptr
, &eptr
);
3318 if (eptr
[0] != '\0') return REDIS_ERR
;
3319 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3320 value
= (long)o
->ptr
;
3322 redisPanic("Unknown string encoding");
3330 static int getDoubleFromObjectOrReply(redisClient
*c
, robj
*o
, double *target
, const char *msg
) {
3332 if (getDoubleFromObject(o
, &value
) != REDIS_OK
) {
3334 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3336 addReplySds(c
, sdsnew("-ERR value is not a double\r\n"));
3345 static int getLongLongFromObject(robj
*o
, long long *target
) {
3352 redisAssert(o
->type
== REDIS_STRING
);
3353 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3354 value
= strtoll(o
->ptr
, &eptr
, 10);
3355 if (eptr
[0] != '\0') return REDIS_ERR
;
3356 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3357 value
= (long)o
->ptr
;
3359 redisPanic("Unknown string encoding");
3367 static int getLongLongFromObjectOrReply(redisClient
*c
, robj
*o
, long long *target
, const char *msg
) {
3369 if (getLongLongFromObject(o
, &value
) != REDIS_OK
) {
3371 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3373 addReplySds(c
, sdsnew("-ERR value is not an integer\r\n"));
3382 static int getLongFromObjectOrReply(redisClient
*c
, robj
*o
, long *target
, const char *msg
) {
3385 if (getLongLongFromObjectOrReply(c
, o
, &value
, msg
) != REDIS_OK
) return REDIS_ERR
;
3386 if (value
< LONG_MIN
|| value
> LONG_MAX
) {
3388 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3390 addReplySds(c
, sdsnew("-ERR value is out of range\r\n"));
3399 /*============================ RDB saving/loading =========================== */
3401 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3402 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3406 static int rdbSaveTime(FILE *fp
, time_t t
) {
3407 int32_t t32
= (int32_t) t
;
3408 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3412 /* check rdbLoadLen() comments for more info */
3413 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3414 unsigned char buf
[2];
3417 /* Save a 6 bit len */
3418 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3419 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3420 } else if (len
< (1<<14)) {
3421 /* Save a 14 bit len */
3422 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3424 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3426 /* Save a 32 bit len */
3427 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3428 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3430 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3435 /* Encode 'value' as an integer if possible (if integer will fit the
3436 * supported range). If the function sucessful encoded the integer
3437 * then the (up to 5 bytes) encoded representation is written in the
3438 * string pointed by 'enc' and the length is returned. Otherwise
3440 static int rdbEncodeInteger(long long value
, unsigned char *enc
) {
3441 /* Finally check if it fits in our ranges */
3442 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3443 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3444 enc
[1] = value
&0xFF;
3446 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3447 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3448 enc
[1] = value
&0xFF;
3449 enc
[2] = (value
>>8)&0xFF;
3451 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3452 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3453 enc
[1] = value
&0xFF;
3454 enc
[2] = (value
>>8)&0xFF;
3455 enc
[3] = (value
>>16)&0xFF;
3456 enc
[4] = (value
>>24)&0xFF;
3463 /* String objects in the form "2391" "-100" without any space and with a
3464 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3465 * encoded as integers to save space */
3466 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3468 char *endptr
, buf
[32];
3470 /* Check if it's possible to encode this value as a number */
3471 value
= strtoll(s
, &endptr
, 10);
3472 if (endptr
[0] != '\0') return 0;
3473 ll2string(buf
,32,value
);
3475 /* If the number converted back into a string is not identical
3476 * then it's not possible to encode the string as integer */
3477 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3479 return rdbEncodeInteger(value
,enc
);
3482 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3483 size_t comprlen
, outlen
;
3487 /* We require at least four bytes compression for this to be worth it */
3488 if (len
<= 4) return 0;
3490 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3491 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3492 if (comprlen
== 0) {
3496 /* Data compressed! Let's save it on disk */
3497 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3498 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3499 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3500 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3501 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3510 /* Save a string objet as [len][data] on disk. If the object is a string
3511 * representation of an integer value we try to safe it in a special form */
3512 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3515 /* Try integer encoding */
3517 unsigned char buf
[5];
3518 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3519 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3524 /* Try LZF compression - under 20 bytes it's unable to compress even
3525 * aaaaaaaaaaaaaaaaaa so skip it */
3526 if (server
.rdbcompression
&& len
> 20) {
3529 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3530 if (retval
== -1) return -1;
3531 if (retval
> 0) return 0;
3532 /* retval == 0 means data can't be compressed, save the old way */
3535 /* Store verbatim */
3536 if (rdbSaveLen(fp
,len
) == -1) return -1;
3537 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3541 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3542 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3545 /* Avoid to decode the object, then encode it again, if the
3546 * object is alrady integer encoded. */
3547 if (obj
->encoding
== REDIS_ENCODING_INT
) {
3548 long val
= (long) obj
->ptr
;
3549 unsigned char buf
[5];
3552 if ((enclen
= rdbEncodeInteger(val
,buf
)) > 0) {
3553 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3556 /* otherwise... fall throught and continue with the usual
3560 /* Avoid incr/decr ref count business when possible.
3561 * This plays well with copy-on-write given that we are probably
3562 * in a child process (BGSAVE). Also this makes sure key objects
3563 * of swapped objects are not incRefCount-ed (an assert does not allow
3564 * this in order to avoid bugs) */
3565 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
3566 obj
= getDecodedObject(obj
);
3567 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3570 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3575 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3576 * 8 bit integer specifing the length of the representation.
3577 * This 8 bit integer has special values in order to specify the following
3583 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3584 unsigned char buf
[128];
3590 } else if (!isfinite(val
)) {
3592 buf
[0] = (val
< 0) ? 255 : 254;
3594 #if (DBL_MANT_DIG >= 52) && (LLONG_MAX == 0x7fffffffffffffffLL)
3595 /* Check if the float is in a safe range to be casted into a
3596 * long long. We are assuming that long long is 64 bit here.
3597 * Also we are assuming that there are no implementations around where
3598 * double has precision < 52 bit.
3600 * Under this assumptions we test if a double is inside an interval
3601 * where casting to long long is safe. Then using two castings we
3602 * make sure the decimal part is zero. If all this is true we use
3603 * integer printing function that is much faster. */
3604 double min
= -4503599627370495; /* (2^52)-1 */
3605 double max
= 4503599627370496; /* -(2^52) */
3606 if (val
> min
&& val
< max
&& val
== ((double)((long long)val
)))
3607 ll2string((char*)buf
+1,sizeof(buf
),(long long)val
);
3610 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3611 buf
[0] = strlen((char*)buf
+1);
3614 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3618 /* Save a Redis object. */
3619 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3620 if (o
->type
== REDIS_STRING
) {
3621 /* Save a string value */
3622 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3623 } else if (o
->type
== REDIS_LIST
) {
3624 /* Save a list value */
3625 list
*list
= o
->ptr
;
3629 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3630 listRewind(list
,&li
);
3631 while((ln
= listNext(&li
))) {
3632 robj
*eleobj
= listNodeValue(ln
);
3634 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3636 } else if (o
->type
== REDIS_SET
) {
3637 /* Save a set value */
3639 dictIterator
*di
= dictGetIterator(set
);
3642 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3643 while((de
= dictNext(di
)) != NULL
) {
3644 robj
*eleobj
= dictGetEntryKey(de
);
3646 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3648 dictReleaseIterator(di
);
3649 } else if (o
->type
== REDIS_ZSET
) {
3650 /* Save a set value */
3652 dictIterator
*di
= dictGetIterator(zs
->dict
);
3655 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3656 while((de
= dictNext(di
)) != NULL
) {
3657 robj
*eleobj
= dictGetEntryKey(de
);
3658 double *score
= dictGetEntryVal(de
);
3660 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3661 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3663 dictReleaseIterator(di
);
3664 } else if (o
->type
== REDIS_HASH
) {
3665 /* Save a hash value */
3666 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3667 unsigned char *p
= zipmapRewind(o
->ptr
);
3668 unsigned int count
= zipmapLen(o
->ptr
);
3669 unsigned char *key
, *val
;
3670 unsigned int klen
, vlen
;
3672 if (rdbSaveLen(fp
,count
) == -1) return -1;
3673 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3674 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3675 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3678 dictIterator
*di
= dictGetIterator(o
->ptr
);
3681 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3682 while((de
= dictNext(di
)) != NULL
) {
3683 robj
*key
= dictGetEntryKey(de
);
3684 robj
*val
= dictGetEntryVal(de
);
3686 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3687 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3689 dictReleaseIterator(di
);
3692 redisPanic("Unknown object type");
3697 /* Return the length the object will have on disk if saved with
3698 * the rdbSaveObject() function. Currently we use a trick to get
3699 * this length with very little changes to the code. In the future
3700 * we could switch to a faster solution. */
3701 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3702 if (fp
== NULL
) fp
= server
.devnull
;
3704 assert(rdbSaveObject(fp
,o
) != 1);
3708 /* Return the number of pages required to save this object in the swap file */
3709 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3710 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3712 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3715 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3716 static int rdbSave(char *filename
) {
3717 dictIterator
*di
= NULL
;
3722 time_t now
= time(NULL
);
3724 /* Wait for I/O therads to terminate, just in case this is a
3725 * foreground-saving, to avoid seeking the swap file descriptor at the
3727 if (server
.vm_enabled
)
3728 waitEmptyIOJobsQueue();
3730 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3731 fp
= fopen(tmpfile
,"w");
3733 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3736 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3737 for (j
= 0; j
< server
.dbnum
; j
++) {
3738 redisDb
*db
= server
.db
+j
;
3740 if (dictSize(d
) == 0) continue;
3741 di
= dictGetIterator(d
);
3747 /* Write the SELECT DB opcode */
3748 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3749 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3751 /* Iterate this DB writing every entry */
3752 while((de
= dictNext(di
)) != NULL
) {
3753 robj
*key
= dictGetEntryKey(de
);
3754 robj
*o
= dictGetEntryVal(de
);
3755 time_t expiretime
= getExpire(db
,key
);
3757 /* Save the expire time */
3758 if (expiretime
!= -1) {
3759 /* If this key is already expired skip it */
3760 if (expiretime
< now
) continue;
3761 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3762 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3764 /* Save the key and associated value. This requires special
3765 * handling if the value is swapped out. */
3766 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
3767 key
->storage
== REDIS_VM_SWAPPING
) {
3768 /* Save type, key, value */
3769 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3770 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3771 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3773 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3775 /* Get a preview of the object in memory */
3776 po
= vmPreviewObject(key
);
3777 /* Save type, key, value */
3778 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
;
3779 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3780 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3781 /* Remove the loaded object from memory */
3785 dictReleaseIterator(di
);
3788 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3790 /* Make sure data will not remain on the OS's output buffers */
3795 /* Use RENAME to make sure the DB file is changed atomically only
3796 * if the generate DB file is ok. */
3797 if (rename(tmpfile
,filename
) == -1) {
3798 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3802 redisLog(REDIS_NOTICE
,"DB saved on disk");
3804 server
.lastsave
= time(NULL
);
3810 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3811 if (di
) dictReleaseIterator(di
);
3815 static int rdbSaveBackground(char *filename
) {
3818 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3819 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3820 if ((childpid
= fork()) == 0) {
3822 if (server
.vm_enabled
) vmReopenSwapFile();
3824 if (rdbSave(filename
) == REDIS_OK
) {
3831 if (childpid
== -1) {
3832 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3836 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3837 server
.bgsavechildpid
= childpid
;
3838 updateDictResizePolicy();
3841 return REDIS_OK
; /* unreached */
3844 static void rdbRemoveTempFile(pid_t childpid
) {
3847 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
3851 static int rdbLoadType(FILE *fp
) {
3853 if (fread(&type
,1,1,fp
) == 0) return -1;
3857 static time_t rdbLoadTime(FILE *fp
) {
3859 if (fread(&t32
,4,1,fp
) == 0) return -1;
3860 return (time_t) t32
;
3863 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3864 * of this file for a description of how this are stored on disk.
3866 * isencoded is set to 1 if the readed length is not actually a length but
3867 * an "encoding type", check the above comments for more info */
3868 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
3869 unsigned char buf
[2];
3873 if (isencoded
) *isencoded
= 0;
3874 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3875 type
= (buf
[0]&0xC0)>>6;
3876 if (type
== REDIS_RDB_6BITLEN
) {
3877 /* Read a 6 bit len */
3879 } else if (type
== REDIS_RDB_ENCVAL
) {
3880 /* Read a 6 bit len encoding type */
3881 if (isencoded
) *isencoded
= 1;
3883 } else if (type
== REDIS_RDB_14BITLEN
) {
3884 /* Read a 14 bit len */
3885 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3886 return ((buf
[0]&0x3F)<<8)|buf
[1];
3888 /* Read a 32 bit len */
3889 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
3894 /* Load an integer-encoded object from file 'fp', with the specified
3895 * encoding type 'enctype'. If encode is true the function may return
3896 * an integer-encoded object as reply, otherwise the returned object
3897 * will always be encoded as a raw string. */
3898 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
, int encode
) {
3899 unsigned char enc
[4];
3902 if (enctype
== REDIS_RDB_ENC_INT8
) {
3903 if (fread(enc
,1,1,fp
) == 0) return NULL
;
3904 val
= (signed char)enc
[0];
3905 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
3907 if (fread(enc
,2,1,fp
) == 0) return NULL
;
3908 v
= enc
[0]|(enc
[1]<<8);
3910 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
3912 if (fread(enc
,4,1,fp
) == 0) return NULL
;
3913 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
3916 val
= 0; /* anti-warning */
3917 redisPanic("Unknown RDB integer encoding type");
3920 return createStringObjectFromLongLong(val
);
3922 return createObject(REDIS_STRING
,sdsfromlonglong(val
));
3925 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
3926 unsigned int len
, clen
;
3927 unsigned char *c
= NULL
;
3930 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3931 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3932 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
3933 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
3934 if (fread(c
,clen
,1,fp
) == 0) goto err
;
3935 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
3937 return createObject(REDIS_STRING
,val
);
3944 static robj
*rdbGenericLoadStringObject(FILE*fp
, int encode
) {
3949 len
= rdbLoadLen(fp
,&isencoded
);
3952 case REDIS_RDB_ENC_INT8
:
3953 case REDIS_RDB_ENC_INT16
:
3954 case REDIS_RDB_ENC_INT32
:
3955 return rdbLoadIntegerObject(fp
,len
,encode
);
3956 case REDIS_RDB_ENC_LZF
:
3957 return rdbLoadLzfStringObject(fp
);
3959 redisPanic("Unknown RDB encoding type");
3963 if (len
== REDIS_RDB_LENERR
) return NULL
;
3964 val
= sdsnewlen(NULL
,len
);
3965 if (len
&& fread(val
,len
,1,fp
) == 0) {
3969 return createObject(REDIS_STRING
,val
);
3972 static robj
*rdbLoadStringObject(FILE *fp
) {
3973 return rdbGenericLoadStringObject(fp
,0);
3976 static robj
*rdbLoadEncodedStringObject(FILE *fp
) {
3977 return rdbGenericLoadStringObject(fp
,1);
3980 /* For information about double serialization check rdbSaveDoubleValue() */
3981 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
3985 if (fread(&len
,1,1,fp
) == 0) return -1;
3987 case 255: *val
= R_NegInf
; return 0;
3988 case 254: *val
= R_PosInf
; return 0;
3989 case 253: *val
= R_Nan
; return 0;
3991 if (fread(buf
,len
,1,fp
) == 0) return -1;
3993 sscanf(buf
, "%lg", val
);
3998 /* Load a Redis object of the specified type from the specified file.
3999 * On success a newly allocated object is returned, otherwise NULL. */
4000 static robj
*rdbLoadObject(int type
, FILE *fp
) {
4003 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
4004 if (type
== REDIS_STRING
) {
4005 /* Read string value */
4006 if ((o
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4007 o
= tryObjectEncoding(o
);
4008 } else if (type
== REDIS_LIST
|| type
== REDIS_SET
) {
4009 /* Read list/set value */
4012 if ((listlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4013 o
= (type
== REDIS_LIST
) ? createListObject() : createSetObject();
4014 /* It's faster to expand the dict to the right size asap in order
4015 * to avoid rehashing */
4016 if (type
== REDIS_SET
&& listlen
> DICT_HT_INITIAL_SIZE
)
4017 dictExpand(o
->ptr
,listlen
);
4018 /* Load every single element of the list/set */
4022 if ((ele
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4023 ele
= tryObjectEncoding(ele
);
4024 if (type
== REDIS_LIST
) {
4025 listAddNodeTail((list
*)o
->ptr
,ele
);
4027 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
4030 } else if (type
== REDIS_ZSET
) {
4031 /* Read list/set value */
4035 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4036 o
= createZsetObject();
4038 /* Load every single element of the list/set */
4041 double *score
= zmalloc(sizeof(double));
4043 if ((ele
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4044 ele
= tryObjectEncoding(ele
);
4045 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
4046 dictAdd(zs
->dict
,ele
,score
);
4047 zslInsert(zs
->zsl
,*score
,ele
);
4048 incrRefCount(ele
); /* added to skiplist */
4050 } else if (type
== REDIS_HASH
) {
4053 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4054 o
= createHashObject();
4055 /* Too many entries? Use an hash table. */
4056 if (hashlen
> server
.hash_max_zipmap_entries
)
4057 convertToRealHash(o
);
4058 /* Load every key/value, then set it into the zipmap or hash
4059 * table, as needed. */
4063 if ((key
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
4064 if ((val
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
4065 /* If we are using a zipmap and there are too big values
4066 * the object is converted to real hash table encoding. */
4067 if (o
->encoding
!= REDIS_ENCODING_HT
&&
4068 (sdslen(key
->ptr
) > server
.hash_max_zipmap_value
||
4069 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
))
4071 convertToRealHash(o
);
4074 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
4075 unsigned char *zm
= o
->ptr
;
4077 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
4078 val
->ptr
,sdslen(val
->ptr
),NULL
);
4083 key
= tryObjectEncoding(key
);
4084 val
= tryObjectEncoding(val
);
4085 dictAdd((dict
*)o
->ptr
,key
,val
);
4089 redisPanic("Unknown object type");
4094 static int rdbLoad(char *filename
) {
4097 int type
, retval
, rdbver
;
4098 int swap_all_values
= 0;
4099 dict
*d
= server
.db
[0].dict
;
4100 redisDb
*db
= server
.db
+0;
4102 time_t expiretime
, now
= time(NULL
);
4103 long long loadedkeys
= 0;
4105 fp
= fopen(filename
,"r");
4106 if (!fp
) return REDIS_ERR
;
4107 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
4109 if (memcmp(buf
,"REDIS",5) != 0) {
4111 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
4114 rdbver
= atoi(buf
+5);
4117 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
4125 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
4126 if (type
== REDIS_EXPIRETIME
) {
4127 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
4128 /* We read the time so we need to read the object type again */
4129 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
4131 if (type
== REDIS_EOF
) break;
4132 /* Handle SELECT DB opcode as a special case */
4133 if (type
== REDIS_SELECTDB
) {
4134 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
4136 if (dbid
>= (unsigned)server
.dbnum
) {
4137 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
4140 db
= server
.db
+dbid
;
4145 if ((key
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
4147 if ((val
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
4148 /* Check if the key already expired */
4149 if (expiretime
!= -1 && expiretime
< now
) {
4154 /* Add the new object in the hash table */
4155 retval
= dictAdd(d
,key
,val
);
4156 if (retval
== DICT_ERR
) {
4157 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", key
->ptr
);
4161 /* Set the expire time if needed */
4162 if (expiretime
!= -1) setExpire(db
,key
,expiretime
);
4164 /* Handle swapping while loading big datasets when VM is on */
4166 /* If we detecter we are hopeless about fitting something in memory
4167 * we just swap every new key on disk. Directly...
4168 * Note that's important to check for this condition before resorting
4169 * to random sampling, otherwise we may try to swap already
4171 if (swap_all_values
) {
4172 dictEntry
*de
= dictFind(d
,key
);
4174 /* de may be NULL since the key already expired */
4176 key
= dictGetEntryKey(de
);
4177 val
= dictGetEntryVal(de
);
4179 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
4180 dictGetEntryVal(de
) = NULL
;
4186 /* If we have still some hope of having some value fitting memory
4187 * then we try random sampling. */
4188 if (!swap_all_values
&& server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
4189 while (zmalloc_used_memory() > server
.vm_max_memory
) {
4190 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
4192 if (zmalloc_used_memory() > server
.vm_max_memory
)
4193 swap_all_values
= 1; /* We are already using too much mem */
4199 eoferr
: /* unexpected end of file is handled here with a fatal exit */
4200 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
4202 return REDIS_ERR
; /* Just to avoid warning */
4205 /*================================== Shutdown =============================== */
4206 static int prepareForShutdown() {
4207 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4208 /* Kill the saving child if there is a background saving in progress.
4209 We want to avoid race conditions, for instance our saving child may
4210 overwrite the synchronous saving did by SHUTDOWN. */
4211 if (server
.bgsavechildpid
!= -1) {
4212 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4213 kill(server
.bgsavechildpid
,SIGKILL
);
4214 rdbRemoveTempFile(server
.bgsavechildpid
);
4216 if (server
.appendonly
) {
4217 /* Append only file: fsync() the AOF and exit */
4218 fsync(server
.appendfd
);
4219 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4221 /* Snapshotting. Perform a SYNC SAVE and exit */
4222 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4223 if (server
.daemonize
)
4224 unlink(server
.pidfile
);
4225 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4227 /* Ooops.. error saving! The best we can do is to continue
4228 * operating. Note that if there was a background saving process,
4229 * in the next cron() Redis will be notified that the background
4230 * saving aborted, handling special stuff like slaves pending for
4231 * synchronization... */
4232 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4236 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4240 /*================================== Commands =============================== */
4242 static void authCommand(redisClient
*c
) {
4243 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
4244 c
->authenticated
= 1;
4245 addReply(c
,shared
.ok
);
4247 c
->authenticated
= 0;
4248 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
4252 static void pingCommand(redisClient
*c
) {
4253 addReply(c
,shared
.pong
);
4256 static void echoCommand(redisClient
*c
) {
4257 addReplyBulk(c
,c
->argv
[1]);
4260 /*=================================== Strings =============================== */
4262 static void setGenericCommand(redisClient
*c
, int nx
, robj
*key
, robj
*val
, robj
*expire
) {
4264 long seconds
= 0; /* initialized to avoid an harmness warning */
4267 if (getLongFromObjectOrReply(c
, expire
, &seconds
, NULL
) != REDIS_OK
)
4270 addReplySds(c
,sdsnew("-ERR invalid expire time in SETEX\r\n"));
4275 touchWatchedKey(c
->db
,key
);
4276 if (nx
) deleteIfVolatile(c
->db
,key
);
4277 retval
= dictAdd(c
->db
->dict
,key
,val
);
4278 if (retval
== DICT_ERR
) {
4280 /* If the key is about a swapped value, we want a new key object
4281 * to overwrite the old. So we delete the old key in the database.
4282 * This will also make sure that swap pages about the old object
4283 * will be marked as free. */
4284 if (server
.vm_enabled
&& deleteIfSwapped(c
->db
,key
))
4286 dictReplace(c
->db
->dict
,key
,val
);
4289 addReply(c
,shared
.czero
);
4297 removeExpire(c
->db
,key
);
4298 if (expire
) setExpire(c
->db
,key
,time(NULL
)+seconds
);
4299 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4302 static void setCommand(redisClient
*c
) {
4303 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[2],NULL
);
4306 static void setnxCommand(redisClient
*c
) {
4307 setGenericCommand(c
,1,c
->argv
[1],c
->argv
[2],NULL
);
4310 static void setexCommand(redisClient
*c
) {
4311 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[3],c
->argv
[2]);
4314 static int getGenericCommand(redisClient
*c
) {
4317 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
4320 if (o
->type
!= REDIS_STRING
) {
4321 addReply(c
,shared
.wrongtypeerr
);
4329 static void getCommand(redisClient
*c
) {
4330 getGenericCommand(c
);
4333 static void getsetCommand(redisClient
*c
) {
4334 if (getGenericCommand(c
) == REDIS_ERR
) return;
4335 if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) {
4336 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4338 incrRefCount(c
->argv
[1]);
4340 incrRefCount(c
->argv
[2]);
4342 removeExpire(c
->db
,c
->argv
[1]);
4345 static void mgetCommand(redisClient
*c
) {
4348 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
4349 for (j
= 1; j
< c
->argc
; j
++) {
4350 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
4352 addReply(c
,shared
.nullbulk
);
4354 if (o
->type
!= REDIS_STRING
) {
4355 addReply(c
,shared
.nullbulk
);
4363 static void msetGenericCommand(redisClient
*c
, int nx
) {
4364 int j
, busykeys
= 0;
4366 if ((c
->argc
% 2) == 0) {
4367 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
4370 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
4371 * set nothing at all if at least one already key exists. */
4373 for (j
= 1; j
< c
->argc
; j
+= 2) {
4374 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
4380 addReply(c
, shared
.czero
);
4384 for (j
= 1; j
< c
->argc
; j
+= 2) {
4387 c
->argv
[j
+1] = tryObjectEncoding(c
->argv
[j
+1]);
4388 retval
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
4389 if (retval
== DICT_ERR
) {
4390 dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
4391 incrRefCount(c
->argv
[j
+1]);
4393 incrRefCount(c
->argv
[j
]);
4394 incrRefCount(c
->argv
[j
+1]);
4396 removeExpire(c
->db
,c
->argv
[j
]);
4398 server
.dirty
+= (c
->argc
-1)/2;
4399 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4402 static void msetCommand(redisClient
*c
) {
4403 msetGenericCommand(c
,0);
4406 static void msetnxCommand(redisClient
*c
) {
4407 msetGenericCommand(c
,1);
4410 static void incrDecrCommand(redisClient
*c
, long long incr
) {
4415 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4416 if (o
!= NULL
&& checkType(c
,o
,REDIS_STRING
)) return;
4417 if (getLongLongFromObjectOrReply(c
,o
,&value
,NULL
) != REDIS_OK
) return;
4420 o
= createStringObjectFromLongLong(value
);
4421 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],o
);
4422 if (retval
== DICT_ERR
) {
4423 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4424 removeExpire(c
->db
,c
->argv
[1]);
4426 incrRefCount(c
->argv
[1]);
4429 addReply(c
,shared
.colon
);
4431 addReply(c
,shared
.crlf
);
4434 static void incrCommand(redisClient
*c
) {
4435 incrDecrCommand(c
,1);
4438 static void decrCommand(redisClient
*c
) {
4439 incrDecrCommand(c
,-1);
4442 static void incrbyCommand(redisClient
*c
) {
4445 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4446 incrDecrCommand(c
,incr
);
4449 static void decrbyCommand(redisClient
*c
) {
4452 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4453 incrDecrCommand(c
,-incr
);
4456 static void appendCommand(redisClient
*c
) {
4461 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4463 /* Create the key */
4464 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4465 incrRefCount(c
->argv
[1]);
4466 incrRefCount(c
->argv
[2]);
4467 totlen
= stringObjectLen(c
->argv
[2]);
4471 de
= dictFind(c
->db
->dict
,c
->argv
[1]);
4474 o
= dictGetEntryVal(de
);
4475 if (o
->type
!= REDIS_STRING
) {
4476 addReply(c
,shared
.wrongtypeerr
);
4479 /* If the object is specially encoded or shared we have to make
4481 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
4482 robj
*decoded
= getDecodedObject(o
);
4484 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
4485 decrRefCount(decoded
);
4486 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4489 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
4490 o
->ptr
= sdscatlen(o
->ptr
,
4491 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
4493 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
4494 (unsigned long) c
->argv
[2]->ptr
);
4496 totlen
= sdslen(o
->ptr
);
4499 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
4502 static void substrCommand(redisClient
*c
) {
4504 long start
= atoi(c
->argv
[2]->ptr
);
4505 long end
= atoi(c
->argv
[3]->ptr
);
4506 size_t rangelen
, strlen
;
4509 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4510 checkType(c
,o
,REDIS_STRING
)) return;
4512 o
= getDecodedObject(o
);
4513 strlen
= sdslen(o
->ptr
);
4515 /* convert negative indexes */
4516 if (start
< 0) start
= strlen
+start
;
4517 if (end
< 0) end
= strlen
+end
;
4518 if (start
< 0) start
= 0;
4519 if (end
< 0) end
= 0;
4521 /* indexes sanity checks */
4522 if (start
> end
|| (size_t)start
>= strlen
) {
4523 /* Out of range start or start > end result in null reply */
4524 addReply(c
,shared
.nullbulk
);
4528 if ((size_t)end
>= strlen
) end
= strlen
-1;
4529 rangelen
= (end
-start
)+1;
4531 /* Return the result */
4532 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4533 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4534 addReplySds(c
,range
);
4535 addReply(c
,shared
.crlf
);
4539 /* ========================= Type agnostic commands ========================= */
4541 static void delCommand(redisClient
*c
) {
4544 for (j
= 1; j
< c
->argc
; j
++) {
4545 if (deleteKey(c
->db
,c
->argv
[j
])) {
4546 touchWatchedKey(c
->db
,c
->argv
[j
]);
4551 addReplyLongLong(c
,deleted
);
4554 static void existsCommand(redisClient
*c
) {
4555 expireIfNeeded(c
->db
,c
->argv
[1]);
4556 if (dictFind(c
->db
->dict
,c
->argv
[1])) {
4557 addReply(c
, shared
.cone
);
4559 addReply(c
, shared
.czero
);
4563 static void selectCommand(redisClient
*c
) {
4564 int id
= atoi(c
->argv
[1]->ptr
);
4566 if (selectDb(c
,id
) == REDIS_ERR
) {
4567 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4569 addReply(c
,shared
.ok
);
4573 static void randomkeyCommand(redisClient
*c
) {
4578 de
= dictGetRandomKey(c
->db
->dict
);
4579 if (!de
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break;
4583 addReply(c
,shared
.nullbulk
);
4587 key
= dictGetEntryKey(de
);
4588 if (server
.vm_enabled
) {
4589 key
= dupStringObject(key
);
4590 addReplyBulk(c
,key
);
4593 addReplyBulk(c
,key
);
4597 static void keysCommand(redisClient
*c
) {
4600 sds pattern
= c
->argv
[1]->ptr
;
4601 int plen
= sdslen(pattern
);
4602 unsigned long numkeys
= 0;
4603 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4605 di
= dictGetIterator(c
->db
->dict
);
4607 decrRefCount(lenobj
);
4608 while((de
= dictNext(di
)) != NULL
) {
4609 robj
*keyobj
= dictGetEntryKey(de
);
4611 sds key
= keyobj
->ptr
;
4612 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4613 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4614 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4615 addReplyBulk(c
,keyobj
);
4620 dictReleaseIterator(di
);
4621 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4624 static void dbsizeCommand(redisClient
*c
) {
4626 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4629 static void lastsaveCommand(redisClient
*c
) {
4631 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4634 static void typeCommand(redisClient
*c
) {
4638 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4643 case REDIS_STRING
: type
= "+string"; break;
4644 case REDIS_LIST
: type
= "+list"; break;
4645 case REDIS_SET
: type
= "+set"; break;
4646 case REDIS_ZSET
: type
= "+zset"; break;
4647 case REDIS_HASH
: type
= "+hash"; break;
4648 default: type
= "+unknown"; break;
4651 addReplySds(c
,sdsnew(type
));
4652 addReply(c
,shared
.crlf
);
4655 static void saveCommand(redisClient
*c
) {
4656 if (server
.bgsavechildpid
!= -1) {
4657 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4660 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4661 addReply(c
,shared
.ok
);
4663 addReply(c
,shared
.err
);
4667 static void bgsaveCommand(redisClient
*c
) {
4668 if (server
.bgsavechildpid
!= -1) {
4669 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4672 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4673 char *status
= "+Background saving started\r\n";
4674 addReplySds(c
,sdsnew(status
));
4676 addReply(c
,shared
.err
);
4680 static void shutdownCommand(redisClient
*c
) {
4681 if (prepareForShutdown() == REDIS_OK
)
4683 addReplySds(c
, sdsnew("-ERR Errors trying to SHUTDOWN. Check logs.\r\n"));
4686 static void renameGenericCommand(redisClient
*c
, int nx
) {
4689 /* To use the same key as src and dst is probably an error */
4690 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4691 addReply(c
,shared
.sameobjecterr
);
4695 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4699 deleteIfVolatile(c
->db
,c
->argv
[2]);
4700 if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) {
4703 addReply(c
,shared
.czero
);
4706 dictReplace(c
->db
->dict
,c
->argv
[2],o
);
4708 incrRefCount(c
->argv
[2]);
4710 deleteKey(c
->db
,c
->argv
[1]);
4711 touchWatchedKey(c
->db
,c
->argv
[2]);
4713 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4716 static void renameCommand(redisClient
*c
) {
4717 renameGenericCommand(c
,0);
4720 static void renamenxCommand(redisClient
*c
) {
4721 renameGenericCommand(c
,1);
4724 static void moveCommand(redisClient
*c
) {
4729 /* Obtain source and target DB pointers */
4732 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4733 addReply(c
,shared
.outofrangeerr
);
4737 selectDb(c
,srcid
); /* Back to the source DB */
4739 /* If the user is moving using as target the same
4740 * DB as the source DB it is probably an error. */
4742 addReply(c
,shared
.sameobjecterr
);
4746 /* Check if the element exists and get a reference */
4747 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4749 addReply(c
,shared
.czero
);
4753 /* Try to add the element to the target DB */
4754 deleteIfVolatile(dst
,c
->argv
[1]);
4755 if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) {
4756 addReply(c
,shared
.czero
);
4759 incrRefCount(c
->argv
[1]);
4762 /* OK! key moved, free the entry in the source DB */
4763 deleteKey(src
,c
->argv
[1]);
4765 addReply(c
,shared
.cone
);
4768 /* =================================== Lists ================================ */
4769 static void pushGenericCommand(redisClient
*c
, int where
) {
4773 lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4775 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4776 addReply(c
,shared
.cone
);
4779 lobj
= createListObject();
4781 if (where
== REDIS_HEAD
) {
4782 listAddNodeHead(list
,c
->argv
[2]);
4784 listAddNodeTail(list
,c
->argv
[2]);
4786 dictAdd(c
->db
->dict
,c
->argv
[1],lobj
);
4787 incrRefCount(c
->argv
[1]);
4788 incrRefCount(c
->argv
[2]);
4790 if (lobj
->type
!= REDIS_LIST
) {
4791 addReply(c
,shared
.wrongtypeerr
);
4794 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4795 addReply(c
,shared
.cone
);
4799 if (where
== REDIS_HEAD
) {
4800 listAddNodeHead(list
,c
->argv
[2]);
4802 listAddNodeTail(list
,c
->argv
[2]);
4804 incrRefCount(c
->argv
[2]);
4807 addReplyLongLong(c
,listLength(list
));
4810 static void lpushCommand(redisClient
*c
) {
4811 pushGenericCommand(c
,REDIS_HEAD
);
4814 static void rpushCommand(redisClient
*c
) {
4815 pushGenericCommand(c
,REDIS_TAIL
);
4818 static void llenCommand(redisClient
*c
) {
4822 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4823 checkType(c
,o
,REDIS_LIST
)) return;
4826 addReplyUlong(c
,listLength(l
));
4829 static void lindexCommand(redisClient
*c
) {
4831 int index
= atoi(c
->argv
[2]->ptr
);
4835 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4836 checkType(c
,o
,REDIS_LIST
)) return;
4839 ln
= listIndex(list
, index
);
4841 addReply(c
,shared
.nullbulk
);
4843 robj
*ele
= listNodeValue(ln
);
4844 addReplyBulk(c
,ele
);
4848 static void lsetCommand(redisClient
*c
) {
4850 int index
= atoi(c
->argv
[2]->ptr
);
4854 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
||
4855 checkType(c
,o
,REDIS_LIST
)) return;
4858 ln
= listIndex(list
, index
);
4860 addReply(c
,shared
.outofrangeerr
);
4862 robj
*ele
= listNodeValue(ln
);
4865 listNodeValue(ln
) = c
->argv
[3];
4866 incrRefCount(c
->argv
[3]);
4867 addReply(c
,shared
.ok
);
4872 static void popGenericCommand(redisClient
*c
, int where
) {
4877 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4878 checkType(c
,o
,REDIS_LIST
)) return;
4881 if (where
== REDIS_HEAD
)
4882 ln
= listFirst(list
);
4884 ln
= listLast(list
);
4887 addReply(c
,shared
.nullbulk
);
4889 robj
*ele
= listNodeValue(ln
);
4890 addReplyBulk(c
,ele
);
4891 listDelNode(list
,ln
);
4892 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4897 static void lpopCommand(redisClient
*c
) {
4898 popGenericCommand(c
,REDIS_HEAD
);
4901 static void rpopCommand(redisClient
*c
) {
4902 popGenericCommand(c
,REDIS_TAIL
);
4905 static void lrangeCommand(redisClient
*c
) {
4907 int start
= atoi(c
->argv
[2]->ptr
);
4908 int end
= atoi(c
->argv
[3]->ptr
);
4915 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
4916 || checkType(c
,o
,REDIS_LIST
)) return;
4918 llen
= listLength(list
);
4920 /* convert negative indexes */
4921 if (start
< 0) start
= llen
+start
;
4922 if (end
< 0) end
= llen
+end
;
4923 if (start
< 0) start
= 0;
4924 if (end
< 0) end
= 0;
4926 /* indexes sanity checks */
4927 if (start
> end
|| start
>= llen
) {
4928 /* Out of range start or start > end result in empty list */
4929 addReply(c
,shared
.emptymultibulk
);
4932 if (end
>= llen
) end
= llen
-1;
4933 rangelen
= (end
-start
)+1;
4935 /* Return the result in form of a multi-bulk reply */
4936 ln
= listIndex(list
, start
);
4937 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
4938 for (j
= 0; j
< rangelen
; j
++) {
4939 ele
= listNodeValue(ln
);
4940 addReplyBulk(c
,ele
);
4945 static void ltrimCommand(redisClient
*c
) {
4947 int start
= atoi(c
->argv
[2]->ptr
);
4948 int end
= atoi(c
->argv
[3]->ptr
);
4950 int j
, ltrim
, rtrim
;
4954 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
4955 checkType(c
,o
,REDIS_LIST
)) return;
4957 llen
= listLength(list
);
4959 /* convert negative indexes */
4960 if (start
< 0) start
= llen
+start
;
4961 if (end
< 0) end
= llen
+end
;
4962 if (start
< 0) start
= 0;
4963 if (end
< 0) end
= 0;
4965 /* indexes sanity checks */
4966 if (start
> end
|| start
>= llen
) {
4967 /* Out of range start or start > end result in empty list */
4971 if (end
>= llen
) end
= llen
-1;
4976 /* Remove list elements to perform the trim */
4977 for (j
= 0; j
< ltrim
; j
++) {
4978 ln
= listFirst(list
);
4979 listDelNode(list
,ln
);
4981 for (j
= 0; j
< rtrim
; j
++) {
4982 ln
= listLast(list
);
4983 listDelNode(list
,ln
);
4985 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4987 addReply(c
,shared
.ok
);
4990 static void lremCommand(redisClient
*c
) {
4993 listNode
*ln
, *next
;
4994 int toremove
= atoi(c
->argv
[2]->ptr
);
4998 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4999 checkType(c
,o
,REDIS_LIST
)) return;
5003 toremove
= -toremove
;
5006 ln
= fromtail
? list
->tail
: list
->head
;
5008 robj
*ele
= listNodeValue(ln
);
5010 next
= fromtail
? ln
->prev
: ln
->next
;
5011 if (equalStringObjects(ele
,c
->argv
[3])) {
5012 listDelNode(list
,ln
);
5015 if (toremove
&& removed
== toremove
) break;
5019 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5020 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
5023 /* This is the semantic of this command:
5024 * RPOPLPUSH srclist dstlist:
5025 * IF LLEN(srclist) > 0
5026 * element = RPOP srclist
5027 * LPUSH dstlist element
5034 * The idea is to be able to get an element from a list in a reliable way
5035 * since the element is not just returned but pushed against another list
5036 * as well. This command was originally proposed by Ezra Zygmuntowicz.
5038 static void rpoplpushcommand(redisClient
*c
) {
5043 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5044 checkType(c
,sobj
,REDIS_LIST
)) return;
5045 srclist
= sobj
->ptr
;
5046 ln
= listLast(srclist
);
5049 addReply(c
,shared
.nullbulk
);
5051 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
5052 robj
*ele
= listNodeValue(ln
);
5055 if (dobj
&& dobj
->type
!= REDIS_LIST
) {
5056 addReply(c
,shared
.wrongtypeerr
);
5060 /* Add the element to the target list (unless it's directly
5061 * passed to some BLPOP-ing client */
5062 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) {
5064 /* Create the list if the key does not exist */
5065 dobj
= createListObject();
5066 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
);
5067 incrRefCount(c
->argv
[2]);
5069 dstlist
= dobj
->ptr
;
5070 listAddNodeHead(dstlist
,ele
);
5074 /* Send the element to the client as reply as well */
5075 addReplyBulk(c
,ele
);
5077 /* Finally remove the element from the source list */
5078 listDelNode(srclist
,ln
);
5079 if (listLength(srclist
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5084 /* ==================================== Sets ================================ */
5086 static void saddCommand(redisClient
*c
) {
5089 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5091 set
= createSetObject();
5092 dictAdd(c
->db
->dict
,c
->argv
[1],set
);
5093 incrRefCount(c
->argv
[1]);
5095 if (set
->type
!= REDIS_SET
) {
5096 addReply(c
,shared
.wrongtypeerr
);
5100 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
5101 incrRefCount(c
->argv
[2]);
5103 addReply(c
,shared
.cone
);
5105 addReply(c
,shared
.czero
);
5109 static void sremCommand(redisClient
*c
) {
5112 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5113 checkType(c
,set
,REDIS_SET
)) return;
5115 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
5117 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
5118 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5119 addReply(c
,shared
.cone
);
5121 addReply(c
,shared
.czero
);
5125 static void smoveCommand(redisClient
*c
) {
5126 robj
*srcset
, *dstset
;
5128 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5129 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
5131 /* If the source key does not exist return 0, if it's of the wrong type
5133 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
5134 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
5137 /* Error if the destination key is not a set as well */
5138 if (dstset
&& dstset
->type
!= REDIS_SET
) {
5139 addReply(c
,shared
.wrongtypeerr
);
5142 /* Remove the element from the source set */
5143 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
5144 /* Key not found in the src set! return zero */
5145 addReply(c
,shared
.czero
);
5148 if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset
!= dstset
)
5149 deleteKey(c
->db
,c
->argv
[1]);
5151 /* Add the element to the destination set */
5153 dstset
= createSetObject();
5154 dictAdd(c
->db
->dict
,c
->argv
[2],dstset
);
5155 incrRefCount(c
->argv
[2]);
5157 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
5158 incrRefCount(c
->argv
[3]);
5159 addReply(c
,shared
.cone
);
5162 static void sismemberCommand(redisClient
*c
) {
5165 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5166 checkType(c
,set
,REDIS_SET
)) return;
5168 if (dictFind(set
->ptr
,c
->argv
[2]))
5169 addReply(c
,shared
.cone
);
5171 addReply(c
,shared
.czero
);
5174 static void scardCommand(redisClient
*c
) {
5178 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5179 checkType(c
,o
,REDIS_SET
)) return;
5182 addReplyUlong(c
,dictSize(s
));
5185 static void spopCommand(redisClient
*c
) {
5189 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5190 checkType(c
,set
,REDIS_SET
)) return;
5192 de
= dictGetRandomKey(set
->ptr
);
5194 addReply(c
,shared
.nullbulk
);
5196 robj
*ele
= dictGetEntryKey(de
);
5198 addReplyBulk(c
,ele
);
5199 dictDelete(set
->ptr
,ele
);
5200 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
5201 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5206 static void srandmemberCommand(redisClient
*c
) {
5210 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5211 checkType(c
,set
,REDIS_SET
)) return;
5213 de
= dictGetRandomKey(set
->ptr
);
5215 addReply(c
,shared
.nullbulk
);
5217 robj
*ele
= dictGetEntryKey(de
);
5219 addReplyBulk(c
,ele
);
5223 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
5224 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
5226 return dictSize(*d1
)-dictSize(*d2
);
5229 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
5230 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
5233 robj
*lenobj
= NULL
, *dstset
= NULL
;
5234 unsigned long j
, cardinality
= 0;
5236 for (j
= 0; j
< setsnum
; j
++) {
5240 lookupKeyWrite(c
->db
,setskeys
[j
]) :
5241 lookupKeyRead(c
->db
,setskeys
[j
]);
5245 if (deleteKey(c
->db
,dstkey
))
5247 addReply(c
,shared
.czero
);
5249 addReply(c
,shared
.emptymultibulk
);
5253 if (setobj
->type
!= REDIS_SET
) {
5255 addReply(c
,shared
.wrongtypeerr
);
5258 dv
[j
] = setobj
->ptr
;
5260 /* Sort sets from the smallest to largest, this will improve our
5261 * algorithm's performace */
5262 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
5264 /* The first thing we should output is the total number of elements...
5265 * since this is a multi-bulk write, but at this stage we don't know
5266 * the intersection set size, so we use a trick, append an empty object
5267 * to the output list and save the pointer to later modify it with the
5270 lenobj
= createObject(REDIS_STRING
,NULL
);
5272 decrRefCount(lenobj
);
5274 /* If we have a target key where to store the resulting set
5275 * create this key with an empty set inside */
5276 dstset
= createSetObject();
5279 /* Iterate all the elements of the first (smallest) set, and test
5280 * the element against all the other sets, if at least one set does
5281 * not include the element it is discarded */
5282 di
= dictGetIterator(dv
[0]);
5284 while((de
= dictNext(di
)) != NULL
) {
5287 for (j
= 1; j
< setsnum
; j
++)
5288 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
5290 continue; /* at least one set does not contain the member */
5291 ele
= dictGetEntryKey(de
);
5293 addReplyBulk(c
,ele
);
5296 dictAdd(dstset
->ptr
,ele
,NULL
);
5300 dictReleaseIterator(di
);
5303 /* Store the resulting set into the target, if the intersection
5304 * is not an empty set. */
5305 deleteKey(c
->db
,dstkey
);
5306 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5307 dictAdd(c
->db
->dict
,dstkey
,dstset
);
5308 incrRefCount(dstkey
);
5309 addReplyLongLong(c
,dictSize((dict
*)dstset
->ptr
));
5311 decrRefCount(dstset
);
5312 addReply(c
,shared
.czero
);
5316 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
5321 static void sinterCommand(redisClient
*c
) {
5322 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
5325 static void sinterstoreCommand(redisClient
*c
) {
5326 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
5329 #define REDIS_OP_UNION 0
5330 #define REDIS_OP_DIFF 1
5331 #define REDIS_OP_INTER 2
5333 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
5334 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
5337 robj
*dstset
= NULL
;
5338 int j
, cardinality
= 0;
5340 for (j
= 0; j
< setsnum
; j
++) {
5344 lookupKeyWrite(c
->db
,setskeys
[j
]) :
5345 lookupKeyRead(c
->db
,setskeys
[j
]);
5350 if (setobj
->type
!= REDIS_SET
) {
5352 addReply(c
,shared
.wrongtypeerr
);
5355 dv
[j
] = setobj
->ptr
;
5358 /* We need a temp set object to store our union. If the dstkey
5359 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
5360 * this set object will be the resulting object to set into the target key*/
5361 dstset
= createSetObject();
5363 /* Iterate all the elements of all the sets, add every element a single
5364 * time to the result set */
5365 for (j
= 0; j
< setsnum
; j
++) {
5366 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
5367 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
5369 di
= dictGetIterator(dv
[j
]);
5371 while((de
= dictNext(di
)) != NULL
) {
5374 /* dictAdd will not add the same element multiple times */
5375 ele
= dictGetEntryKey(de
);
5376 if (op
== REDIS_OP_UNION
|| j
== 0) {
5377 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
5381 } else if (op
== REDIS_OP_DIFF
) {
5382 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
5387 dictReleaseIterator(di
);
5389 /* result set is empty? Exit asap. */
5390 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
5393 /* Output the content of the resulting set, if not in STORE mode */
5395 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
5396 di
= dictGetIterator(dstset
->ptr
);
5397 while((de
= dictNext(di
)) != NULL
) {
5400 ele
= dictGetEntryKey(de
);
5401 addReplyBulk(c
,ele
);
5403 dictReleaseIterator(di
);
5404 decrRefCount(dstset
);
5406 /* If we have a target key where to store the resulting set
5407 * create this key with the result set inside */
5408 deleteKey(c
->db
,dstkey
);
5409 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5410 dictAdd(c
->db
->dict
,dstkey
,dstset
);
5411 incrRefCount(dstkey
);
5412 addReplyLongLong(c
,dictSize((dict
*)dstset
->ptr
));
5414 decrRefCount(dstset
);
5415 addReply(c
,shared
.czero
);
5422 static void sunionCommand(redisClient
*c
) {
5423 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
5426 static void sunionstoreCommand(redisClient
*c
) {
5427 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
5430 static void sdiffCommand(redisClient
*c
) {
5431 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
5434 static void sdiffstoreCommand(redisClient
*c
) {
5435 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
5438 /* ==================================== ZSets =============================== */
5440 /* ZSETs are ordered sets using two data structures to hold the same elements
5441 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
5444 * The elements are added to an hash table mapping Redis objects to scores.
5445 * At the same time the elements are added to a skip list mapping scores
5446 * to Redis objects (so objects are sorted by scores in this "view"). */
5448 /* This skiplist implementation is almost a C translation of the original
5449 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
5450 * Alternative to Balanced Trees", modified in three ways:
5451 * a) this implementation allows for repeated values.
5452 * b) the comparison is not just by key (our 'score') but by satellite data.
5453 * c) there is a back pointer, so it's a doubly linked list with the back
5454 * pointers being only at "level 1". This allows to traverse the list
5455 * from tail to head, useful for ZREVRANGE. */
5457 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
5458 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
5460 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
5462 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
5470 static zskiplist
*zslCreate(void) {
5474 zsl
= zmalloc(sizeof(*zsl
));
5477 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
5478 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
5479 zsl
->header
->forward
[j
] = NULL
;
5481 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
5482 if (j
< ZSKIPLIST_MAXLEVEL
-1)
5483 zsl
->header
->span
[j
] = 0;
5485 zsl
->header
->backward
= NULL
;
5490 static void zslFreeNode(zskiplistNode
*node
) {
5491 decrRefCount(node
->obj
);
5492 zfree(node
->forward
);
5497 static void zslFree(zskiplist
*zsl
) {
5498 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
5500 zfree(zsl
->header
->forward
);
5501 zfree(zsl
->header
->span
);
5504 next
= node
->forward
[0];
5511 static int zslRandomLevel(void) {
5513 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
5515 return (level
<ZSKIPLIST_MAXLEVEL
) ? level
: ZSKIPLIST_MAXLEVEL
;
5518 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
5519 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5520 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
5524 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5525 /* store rank that is crossed to reach the insert position */
5526 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
5528 while (x
->forward
[i
] &&
5529 (x
->forward
[i
]->score
< score
||
5530 (x
->forward
[i
]->score
== score
&&
5531 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
5532 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
5537 /* we assume the key is not already inside, since we allow duplicated
5538 * scores, and the re-insertion of score and redis object should never
5539 * happpen since the caller of zslInsert() should test in the hash table
5540 * if the element is already inside or not. */
5541 level
= zslRandomLevel();
5542 if (level
> zsl
->level
) {
5543 for (i
= zsl
->level
; i
< level
; i
++) {
5545 update
[i
] = zsl
->header
;
5546 update
[i
]->span
[i
-1] = zsl
->length
;
5550 x
= zslCreateNode(level
,score
,obj
);
5551 for (i
= 0; i
< level
; i
++) {
5552 x
->forward
[i
] = update
[i
]->forward
[i
];
5553 update
[i
]->forward
[i
] = x
;
5555 /* update span covered by update[i] as x is inserted here */
5557 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5558 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5562 /* increment span for untouched levels */
5563 for (i
= level
; i
< zsl
->level
; i
++) {
5564 update
[i
]->span
[i
-1]++;
5567 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
5569 x
->forward
[0]->backward
= x
;
5575 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
5576 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
5578 for (i
= 0; i
< zsl
->level
; i
++) {
5579 if (update
[i
]->forward
[i
] == x
) {
5581 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5583 update
[i
]->forward
[i
] = x
->forward
[i
];
5585 /* invariant: i > 0, because update[0]->forward[0]
5586 * is always equal to x */
5587 update
[i
]->span
[i
-1] -= 1;
5590 if (x
->forward
[0]) {
5591 x
->forward
[0]->backward
= x
->backward
;
5593 zsl
->tail
= x
->backward
;
5595 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5600 /* Delete an element with matching score/object from the skiplist. */
5601 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
5602 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5606 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5607 while (x
->forward
[i
] &&
5608 (x
->forward
[i
]->score
< score
||
5609 (x
->forward
[i
]->score
== score
&&
5610 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
5614 /* We may have multiple elements with the same score, what we need
5615 * is to find the element with both the right score and object. */
5617 if (x
&& score
== x
->score
&& equalStringObjects(x
->obj
,obj
)) {
5618 zslDeleteNode(zsl
, x
, update
);
5622 return 0; /* not found */
5624 return 0; /* not found */
5627 /* Delete all the elements with score between min and max from the skiplist.
5628 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5629 * Note that this function takes the reference to the hash table view of the
5630 * sorted set, in order to remove the elements from the hash table too. */
5631 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
5632 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5633 unsigned long removed
= 0;
5637 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5638 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
5642 /* We may have multiple elements with the same score, what we need
5643 * is to find the element with both the right score and object. */
5645 while (x
&& x
->score
<= max
) {
5646 zskiplistNode
*next
= x
->forward
[0];
5647 zslDeleteNode(zsl
, x
, update
);
5648 dictDelete(dict
,x
->obj
);
5653 return removed
; /* not found */
5656 /* Delete all the elements with rank between start and end from the skiplist.
5657 * Start and end are inclusive. Note that start and end need to be 1-based */
5658 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
5659 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5660 unsigned long traversed
= 0, removed
= 0;
5664 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5665 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
5666 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5674 while (x
&& traversed
<= end
) {
5675 zskiplistNode
*next
= x
->forward
[0];
5676 zslDeleteNode(zsl
, x
, update
);
5677 dictDelete(dict
,x
->obj
);
5686 /* Find the first node having a score equal or greater than the specified one.
5687 * Returns NULL if there is no match. */
5688 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
5693 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5694 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
5697 /* We may have multiple elements with the same score, what we need
5698 * is to find the element with both the right score and object. */
5699 return x
->forward
[0];
5702 /* Find the rank for an element by both score and key.
5703 * Returns 0 when the element cannot be found, rank otherwise.
5704 * Note that the rank is 1-based due to the span of zsl->header to the
5706 static unsigned long zslGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
5708 unsigned long rank
= 0;
5712 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5713 while (x
->forward
[i
] &&
5714 (x
->forward
[i
]->score
< score
||
5715 (x
->forward
[i
]->score
== score
&&
5716 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
5717 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
5721 /* x might be equal to zsl->header, so test if obj is non-NULL */
5722 if (x
->obj
&& equalStringObjects(x
->obj
,o
)) {
5729 /* Finds an element by its rank. The rank argument needs to be 1-based. */
5730 zskiplistNode
* zslGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
5732 unsigned long traversed
= 0;
5736 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5737 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
5739 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5742 if (traversed
== rank
) {
5749 /* The actual Z-commands implementations */
5751 /* This generic command implements both ZADD and ZINCRBY.
5752 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
5753 * the increment if the operation is a ZINCRBY (doincrement == 1). */
5754 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
5759 if (isnan(scoreval
)) {
5760 addReplySds(c
,sdsnew("-ERR provide score is Not A Number (nan)\r\n"));
5764 zsetobj
= lookupKeyWrite(c
->db
,key
);
5765 if (zsetobj
== NULL
) {
5766 zsetobj
= createZsetObject();
5767 dictAdd(c
->db
->dict
,key
,zsetobj
);
5770 if (zsetobj
->type
!= REDIS_ZSET
) {
5771 addReply(c
,shared
.wrongtypeerr
);
5777 /* Ok now since we implement both ZADD and ZINCRBY here the code
5778 * needs to handle the two different conditions. It's all about setting
5779 * '*score', that is, the new score to set, to the right value. */
5780 score
= zmalloc(sizeof(double));
5784 /* Read the old score. If the element was not present starts from 0 */
5785 de
= dictFind(zs
->dict
,ele
);
5787 double *oldscore
= dictGetEntryVal(de
);
5788 *score
= *oldscore
+ scoreval
;
5792 if (isnan(*score
)) {
5794 sdsnew("-ERR resulting score is Not A Number (nan)\r\n"));
5796 /* Note that we don't need to check if the zset may be empty and
5797 * should be removed here, as we can only obtain Nan as score if
5798 * there was already an element in the sorted set. */
5805 /* What follows is a simple remove and re-insert operation that is common
5806 * to both ZADD and ZINCRBY... */
5807 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
5808 /* case 1: New element */
5809 incrRefCount(ele
); /* added to hash */
5810 zslInsert(zs
->zsl
,*score
,ele
);
5811 incrRefCount(ele
); /* added to skiplist */
5814 addReplyDouble(c
,*score
);
5816 addReply(c
,shared
.cone
);
5821 /* case 2: Score update operation */
5822 de
= dictFind(zs
->dict
,ele
);
5823 redisAssert(de
!= NULL
);
5824 oldscore
= dictGetEntryVal(de
);
5825 if (*score
!= *oldscore
) {
5828 /* Remove and insert the element in the skip list with new score */
5829 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
5830 redisAssert(deleted
!= 0);
5831 zslInsert(zs
->zsl
,*score
,ele
);
5833 /* Update the score in the hash table */
5834 dictReplace(zs
->dict
,ele
,score
);
5840 addReplyDouble(c
,*score
);
5842 addReply(c
,shared
.czero
);
5846 static void zaddCommand(redisClient
*c
) {
5849 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
5850 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
5853 static void zincrbyCommand(redisClient
*c
) {
5856 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
5857 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
5860 static void zremCommand(redisClient
*c
) {
5867 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5868 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5871 de
= dictFind(zs
->dict
,c
->argv
[2]);
5873 addReply(c
,shared
.czero
);
5876 /* Delete from the skiplist */
5877 oldscore
= dictGetEntryVal(de
);
5878 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
5879 redisAssert(deleted
!= 0);
5881 /* Delete from the hash table */
5882 dictDelete(zs
->dict
,c
->argv
[2]);
5883 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5884 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5886 addReply(c
,shared
.cone
);
5889 static void zremrangebyscoreCommand(redisClient
*c
) {
5896 if ((getDoubleFromObjectOrReply(c
, c
->argv
[2], &min
, NULL
) != REDIS_OK
) ||
5897 (getDoubleFromObjectOrReply(c
, c
->argv
[3], &max
, NULL
) != REDIS_OK
)) return;
5899 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5900 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5903 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
5904 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5905 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5906 server
.dirty
+= deleted
;
5907 addReplyLongLong(c
,deleted
);
5910 static void zremrangebyrankCommand(redisClient
*c
) {
5918 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
5919 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
5921 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5922 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5924 llen
= zs
->zsl
->length
;
5926 /* convert negative indexes */
5927 if (start
< 0) start
= llen
+start
;
5928 if (end
< 0) end
= llen
+end
;
5929 if (start
< 0) start
= 0;
5930 if (end
< 0) end
= 0;
5932 /* indexes sanity checks */
5933 if (start
> end
|| start
>= llen
) {
5934 addReply(c
,shared
.czero
);
5937 if (end
>= llen
) end
= llen
-1;
5939 /* increment start and end because zsl*Rank functions
5940 * use 1-based rank */
5941 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
5942 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5943 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5944 server
.dirty
+= deleted
;
5945 addReplyLongLong(c
, deleted
);
5953 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
5954 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
5955 unsigned long size1
, size2
;
5956 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
5957 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
5958 return size1
- size2
;
5961 #define REDIS_AGGR_SUM 1
5962 #define REDIS_AGGR_MIN 2
5963 #define REDIS_AGGR_MAX 3
5964 #define zunionInterDictValue(_e) (dictGetEntryVal(_e) == NULL ? 1.0 : *(double*)dictGetEntryVal(_e))
5966 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
5967 if (aggregate
== REDIS_AGGR_SUM
) {
5968 *target
= *target
+ val
;
5969 } else if (aggregate
== REDIS_AGGR_MIN
) {
5970 *target
= val
< *target
? val
: *target
;
5971 } else if (aggregate
== REDIS_AGGR_MAX
) {
5972 *target
= val
> *target
? val
: *target
;
5975 redisPanic("Unknown ZUNION/INTER aggregate type");
5979 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
5981 int aggregate
= REDIS_AGGR_SUM
;
5988 /* expect setnum input keys to be given */
5989 setnum
= atoi(c
->argv
[2]->ptr
);
5991 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNIONSTORE/ZINTERSTORE\r\n"));
5995 /* test if the expected number of keys would overflow */
5996 if (3+setnum
> c
->argc
) {
5997 addReply(c
,shared
.syntaxerr
);
6001 /* read keys to be used for input */
6002 src
= zmalloc(sizeof(zsetopsrc
) * setnum
);
6003 for (i
= 0, j
= 3; i
< setnum
; i
++, j
++) {
6004 robj
*obj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
6008 if (obj
->type
== REDIS_ZSET
) {
6009 src
[i
].dict
= ((zset
*)obj
->ptr
)->dict
;
6010 } else if (obj
->type
== REDIS_SET
) {
6011 src
[i
].dict
= (obj
->ptr
);
6014 addReply(c
,shared
.wrongtypeerr
);
6019 /* default all weights to 1 */
6020 src
[i
].weight
= 1.0;
6023 /* parse optional extra arguments */
6025 int remaining
= c
->argc
- j
;
6028 if (remaining
>= (setnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
6030 for (i
= 0; i
< setnum
; i
++, j
++, remaining
--) {
6031 if (getDoubleFromObjectOrReply(c
, c
->argv
[j
], &src
[i
].weight
, NULL
) != REDIS_OK
)
6034 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
6036 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
6037 aggregate
= REDIS_AGGR_SUM
;
6038 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
6039 aggregate
= REDIS_AGGR_MIN
;
6040 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
6041 aggregate
= REDIS_AGGR_MAX
;
6044 addReply(c
,shared
.syntaxerr
);
6050 addReply(c
,shared
.syntaxerr
);
6056 /* sort sets from the smallest to largest, this will improve our
6057 * algorithm's performance */
6058 qsort(src
,setnum
,sizeof(zsetopsrc
),qsortCompareZsetopsrcByCardinality
);
6060 dstobj
= createZsetObject();
6061 dstzset
= dstobj
->ptr
;
6063 if (op
== REDIS_OP_INTER
) {
6064 /* skip going over all entries if the smallest zset is NULL or empty */
6065 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
6066 /* precondition: as src[0].dict is non-empty and the zsets are ordered
6067 * from small to large, all src[i > 0].dict are non-empty too */
6068 di
= dictGetIterator(src
[0].dict
);
6069 while((de
= dictNext(di
)) != NULL
) {
6070 double *score
= zmalloc(sizeof(double)), value
;
6071 *score
= src
[0].weight
* zunionInterDictValue(de
);
6073 for (j
= 1; j
< setnum
; j
++) {
6074 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
6076 value
= src
[j
].weight
* zunionInterDictValue(other
);
6077 zunionInterAggregate(score
, value
, aggregate
);
6083 /* skip entry when not present in every source dict */
6087 robj
*o
= dictGetEntryKey(de
);
6088 dictAdd(dstzset
->dict
,o
,score
);
6089 incrRefCount(o
); /* added to dictionary */
6090 zslInsert(dstzset
->zsl
,*score
,o
);
6091 incrRefCount(o
); /* added to skiplist */
6094 dictReleaseIterator(di
);
6096 } else if (op
== REDIS_OP_UNION
) {
6097 for (i
= 0; i
< setnum
; i
++) {
6098 if (!src
[i
].dict
) continue;
6100 di
= dictGetIterator(src
[i
].dict
);
6101 while((de
= dictNext(di
)) != NULL
) {
6102 /* skip key when already processed */
6103 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
6105 double *score
= zmalloc(sizeof(double)), value
;
6106 *score
= src
[i
].weight
* zunionInterDictValue(de
);
6108 /* because the zsets are sorted by size, its only possible
6109 * for sets at larger indices to hold this entry */
6110 for (j
= (i
+1); j
< setnum
; j
++) {
6111 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
6113 value
= src
[j
].weight
* zunionInterDictValue(other
);
6114 zunionInterAggregate(score
, value
, aggregate
);
6118 robj
*o
= dictGetEntryKey(de
);
6119 dictAdd(dstzset
->dict
,o
,score
);
6120 incrRefCount(o
); /* added to dictionary */
6121 zslInsert(dstzset
->zsl
,*score
,o
);
6122 incrRefCount(o
); /* added to skiplist */
6124 dictReleaseIterator(di
);
6127 /* unknown operator */
6128 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
6131 deleteKey(c
->db
,dstkey
);
6132 if (dstzset
->zsl
->length
) {
6133 dictAdd(c
->db
->dict
,dstkey
,dstobj
);
6134 incrRefCount(dstkey
);
6135 addReplyLongLong(c
, dstzset
->zsl
->length
);
6138 decrRefCount(dstobj
);
6139 addReply(c
, shared
.czero
);
6144 static void zunionstoreCommand(redisClient
*c
) {
6145 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
6148 static void zinterstoreCommand(redisClient
*c
) {
6149 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
6152 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
6164 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
6165 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
6167 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
6169 } else if (c
->argc
>= 5) {
6170 addReply(c
,shared
.syntaxerr
);
6174 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6175 || checkType(c
,o
,REDIS_ZSET
)) return;
6180 /* convert negative indexes */
6181 if (start
< 0) start
= llen
+start
;
6182 if (end
< 0) end
= llen
+end
;
6183 if (start
< 0) start
= 0;
6184 if (end
< 0) end
= 0;
6186 /* indexes sanity checks */
6187 if (start
> end
|| start
>= llen
) {
6188 /* Out of range start or start > end result in empty list */
6189 addReply(c
,shared
.emptymultibulk
);
6192 if (end
>= llen
) end
= llen
-1;
6193 rangelen
= (end
-start
)+1;
6195 /* check if starting point is trivial, before searching
6196 * the element in log(N) time */
6198 ln
= start
== 0 ? zsl
->tail
: zslGetElementByRank(zsl
, llen
-start
);
6201 zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1);
6204 /* Return the result in form of a multi-bulk reply */
6205 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
6206 withscores
? (rangelen
*2) : rangelen
));
6207 for (j
= 0; j
< rangelen
; j
++) {
6209 addReplyBulk(c
,ele
);
6211 addReplyDouble(c
,ln
->score
);
6212 ln
= reverse
? ln
->backward
: ln
->forward
[0];
6216 static void zrangeCommand(redisClient
*c
) {
6217 zrangeGenericCommand(c
,0);
6220 static void zrevrangeCommand(redisClient
*c
) {
6221 zrangeGenericCommand(c
,1);
6224 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
6225 * If justcount is non-zero, just the count is returned. */
6226 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
6229 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
6230 int offset
= 0, limit
= -1;
6234 /* Parse the min-max interval. If one of the values is prefixed
6235 * by the "(" character, it's considered "open". For instance
6236 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
6237 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
6238 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
6239 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
6242 min
= strtod(c
->argv
[2]->ptr
,NULL
);
6244 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
6245 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
6248 max
= strtod(c
->argv
[3]->ptr
,NULL
);
6251 /* Parse "WITHSCORES": note that if the command was called with
6252 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
6253 * enter the following paths to parse WITHSCORES and LIMIT. */
6254 if (c
->argc
== 5 || c
->argc
== 8) {
6255 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
6260 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
6264 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
6269 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
6270 addReply(c
,shared
.syntaxerr
);
6272 } else if (c
->argc
== (7 + withscores
)) {
6273 offset
= atoi(c
->argv
[5]->ptr
);
6274 limit
= atoi(c
->argv
[6]->ptr
);
6275 if (offset
< 0) offset
= 0;
6278 /* Ok, lookup the key and get the range */
6279 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
6281 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6283 if (o
->type
!= REDIS_ZSET
) {
6284 addReply(c
,shared
.wrongtypeerr
);
6286 zset
*zsetobj
= o
->ptr
;
6287 zskiplist
*zsl
= zsetobj
->zsl
;
6289 robj
*ele
, *lenobj
= NULL
;
6290 unsigned long rangelen
= 0;
6292 /* Get the first node with the score >= min, or with
6293 * score > min if 'minex' is true. */
6294 ln
= zslFirstWithScore(zsl
,min
);
6295 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
6298 /* No element matching the speciifed interval */
6299 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6303 /* We don't know in advance how many matching elements there
6304 * are in the list, so we push this object that will represent
6305 * the multi-bulk length in the output buffer, and will "fix"
6308 lenobj
= createObject(REDIS_STRING
,NULL
);
6310 decrRefCount(lenobj
);
6313 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
6316 ln
= ln
->forward
[0];
6319 if (limit
== 0) break;
6322 addReplyBulk(c
,ele
);
6324 addReplyDouble(c
,ln
->score
);
6326 ln
= ln
->forward
[0];
6328 if (limit
> 0) limit
--;
6331 addReplyLongLong(c
,(long)rangelen
);
6333 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
6334 withscores
? (rangelen
*2) : rangelen
);
6340 static void zrangebyscoreCommand(redisClient
*c
) {
6341 genericZrangebyscoreCommand(c
,0);
6344 static void zcountCommand(redisClient
*c
) {
6345 genericZrangebyscoreCommand(c
,1);
6348 static void zcardCommand(redisClient
*c
) {
6352 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6353 checkType(c
,o
,REDIS_ZSET
)) return;
6356 addReplyUlong(c
,zs
->zsl
->length
);
6359 static void zscoreCommand(redisClient
*c
) {
6364 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6365 checkType(c
,o
,REDIS_ZSET
)) return;
6368 de
= dictFind(zs
->dict
,c
->argv
[2]);
6370 addReply(c
,shared
.nullbulk
);
6372 double *score
= dictGetEntryVal(de
);
6374 addReplyDouble(c
,*score
);
6378 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
6386 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6387 checkType(c
,o
,REDIS_ZSET
)) return;
6391 de
= dictFind(zs
->dict
,c
->argv
[2]);
6393 addReply(c
,shared
.nullbulk
);
6397 score
= dictGetEntryVal(de
);
6398 rank
= zslGetRank(zsl
, *score
, c
->argv
[2]);
6401 addReplyLongLong(c
, zsl
->length
- rank
);
6403 addReplyLongLong(c
, rank
-1);
6406 addReply(c
,shared
.nullbulk
);
6410 static void zrankCommand(redisClient
*c
) {
6411 zrankGenericCommand(c
, 0);
6414 static void zrevrankCommand(redisClient
*c
) {
6415 zrankGenericCommand(c
, 1);
6418 /* ========================= Hashes utility functions ======================= */
6419 #define REDIS_HASH_KEY 1
6420 #define REDIS_HASH_VALUE 2
6422 /* Check the length of a number of objects to see if we need to convert a
6423 * zipmap to a real hash. Note that we only check string encoded objects
6424 * as their string length can be queried in constant time. */
6425 static void hashTryConversion(robj
*subject
, robj
**argv
, int start
, int end
) {
6427 if (subject
->encoding
!= REDIS_ENCODING_ZIPMAP
) return;
6429 for (i
= start
; i
<= end
; i
++) {
6430 if (argv
[i
]->encoding
== REDIS_ENCODING_RAW
&&
6431 sdslen(argv
[i
]->ptr
) > server
.hash_max_zipmap_value
)
6433 convertToRealHash(subject
);
6439 /* Encode given objects in-place when the hash uses a dict. */
6440 static void hashTryObjectEncoding(robj
*subject
, robj
**o1
, robj
**o2
) {
6441 if (subject
->encoding
== REDIS_ENCODING_HT
) {
6442 if (o1
) *o1
= tryObjectEncoding(*o1
);
6443 if (o2
) *o2
= tryObjectEncoding(*o2
);
6447 /* Get the value from a hash identified by key. Returns either a string
6448 * object or NULL if the value cannot be found. The refcount of the object
6449 * is always increased by 1 when the value was found. */
6450 static robj
*hashGet(robj
*o
, robj
*key
) {
6452 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6455 key
= getDecodedObject(key
);
6456 if (zipmapGet(o
->ptr
,key
->ptr
,sdslen(key
->ptr
),&v
,&vlen
)) {
6457 value
= createStringObject((char*)v
,vlen
);
6461 dictEntry
*de
= dictFind(o
->ptr
,key
);
6463 value
= dictGetEntryVal(de
);
6464 incrRefCount(value
);
6470 /* Test if the key exists in the given hash. Returns 1 if the key
6471 * exists and 0 when it doesn't. */
6472 static int hashExists(robj
*o
, robj
*key
) {
6473 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6474 key
= getDecodedObject(key
);
6475 if (zipmapExists(o
->ptr
,key
->ptr
,sdslen(key
->ptr
))) {
6481 if (dictFind(o
->ptr
,key
) != NULL
) {
6488 /* Add an element, discard the old if the key already exists.
6489 * Return 0 on insert and 1 on update. */
6490 static int hashSet(robj
*o
, robj
*key
, robj
*value
) {
6492 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6493 key
= getDecodedObject(key
);
6494 value
= getDecodedObject(value
);
6495 o
->ptr
= zipmapSet(o
->ptr
,
6496 key
->ptr
,sdslen(key
->ptr
),
6497 value
->ptr
,sdslen(value
->ptr
), &update
);
6499 decrRefCount(value
);
6501 /* Check if the zipmap needs to be upgraded to a real hash table */
6502 if (zipmapLen(o
->ptr
) > server
.hash_max_zipmap_entries
)
6503 convertToRealHash(o
);
6505 if (dictReplace(o
->ptr
,key
,value
)) {
6512 incrRefCount(value
);
6517 /* Delete an element from a hash.
6518 * Return 1 on deleted and 0 on not found. */
6519 static int hashDelete(robj
*o
, robj
*key
) {
6521 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6522 key
= getDecodedObject(key
);
6523 o
->ptr
= zipmapDel(o
->ptr
,key
->ptr
,sdslen(key
->ptr
), &deleted
);
6526 deleted
= dictDelete((dict
*)o
->ptr
,key
) == DICT_OK
;
6527 /* Always check if the dictionary needs a resize after a delete. */
6528 if (deleted
&& htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
6533 /* Return the number of elements in a hash. */
6534 static unsigned long hashLength(robj
*o
) {
6535 return (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
6536 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
6539 /* Structure to hold hash iteration abstration. Note that iteration over
6540 * hashes involves both fields and values. Because it is possible that
6541 * not both are required, store pointers in the iterator to avoid
6542 * unnecessary memory allocation for fields/values. */
6546 unsigned char *zk
, *zv
;
6547 unsigned int zklen
, zvlen
;
6553 static hashIterator
*hashInitIterator(robj
*subject
) {
6554 hashIterator
*hi
= zmalloc(sizeof(hashIterator
));
6555 hi
->encoding
= subject
->encoding
;
6556 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6557 hi
->zi
= zipmapRewind(subject
->ptr
);
6558 } else if (hi
->encoding
== REDIS_ENCODING_HT
) {
6559 hi
->di
= dictGetIterator(subject
->ptr
);
6566 static void hashReleaseIterator(hashIterator
*hi
) {
6567 if (hi
->encoding
== REDIS_ENCODING_HT
) {
6568 dictReleaseIterator(hi
->di
);
6573 /* Move to the next entry in the hash. Return REDIS_OK when the next entry
6574 * could be found and REDIS_ERR when the iterator reaches the end. */
6575 static int hashNext(hashIterator
*hi
) {
6576 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6577 if ((hi
->zi
= zipmapNext(hi
->zi
, &hi
->zk
, &hi
->zklen
,
6578 &hi
->zv
, &hi
->zvlen
)) == NULL
) return REDIS_ERR
;
6580 if ((hi
->de
= dictNext(hi
->di
)) == NULL
) return REDIS_ERR
;
6585 /* Get key or value object at current iteration position.
6586 * This increases the refcount of the field object by 1. */
6587 static robj
*hashCurrent(hashIterator
*hi
, int what
) {
6589 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6590 if (what
& REDIS_HASH_KEY
) {
6591 o
= createStringObject((char*)hi
->zk
,hi
->zklen
);
6593 o
= createStringObject((char*)hi
->zv
,hi
->zvlen
);
6596 if (what
& REDIS_HASH_KEY
) {
6597 o
= dictGetEntryKey(hi
->de
);
6599 o
= dictGetEntryVal(hi
->de
);
6606 static robj
*hashLookupWriteOrCreate(redisClient
*c
, robj
*key
) {
6607 robj
*o
= lookupKeyWrite(c
->db
,key
);
6609 o
= createHashObject();
6610 dictAdd(c
->db
->dict
,key
,o
);
6613 if (o
->type
!= REDIS_HASH
) {
6614 addReply(c
,shared
.wrongtypeerr
);
6621 /* ============================= Hash commands ============================== */
6622 static void hsetCommand(redisClient
*c
) {
6626 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6627 hashTryConversion(o
,c
->argv
,2,3);
6628 hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
6629 update
= hashSet(o
,c
->argv
[2],c
->argv
[3]);
6630 addReply(c
, update
? shared
.czero
: shared
.cone
);
6634 static void hsetnxCommand(redisClient
*c
) {
6636 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6637 hashTryConversion(o
,c
->argv
,2,3);
6639 if (hashExists(o
, c
->argv
[2])) {
6640 addReply(c
, shared
.czero
);
6642 hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
6643 hashSet(o
,c
->argv
[2],c
->argv
[3]);
6644 addReply(c
, shared
.cone
);
6649 static void hmsetCommand(redisClient
*c
) {
6653 if ((c
->argc
% 2) == 1) {
6654 addReplySds(c
,sdsnew("-ERR wrong number of arguments for HMSET\r\n"));
6658 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6659 hashTryConversion(o
,c
->argv
,2,c
->argc
-1);
6660 for (i
= 2; i
< c
->argc
; i
+= 2) {
6661 hashTryObjectEncoding(o
,&c
->argv
[i
], &c
->argv
[i
+1]);
6662 hashSet(o
,c
->argv
[i
],c
->argv
[i
+1]);
6664 addReply(c
, shared
.ok
);
6668 static void hincrbyCommand(redisClient
*c
) {
6669 long long value
, incr
;
6670 robj
*o
, *current
, *new;
6672 if (getLongLongFromObjectOrReply(c
,c
->argv
[3],&incr
,NULL
) != REDIS_OK
) return;
6673 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6674 if ((current
= hashGet(o
,c
->argv
[2])) != NULL
) {
6675 if (getLongLongFromObjectOrReply(c
,current
,&value
,
6676 "hash value is not an integer") != REDIS_OK
) {
6677 decrRefCount(current
);
6680 decrRefCount(current
);
6686 new = createStringObjectFromLongLong(value
);
6687 hashTryObjectEncoding(o
,&c
->argv
[2],NULL
);
6688 hashSet(o
,c
->argv
[2],new);
6690 addReplyLongLong(c
,value
);
6694 static void hgetCommand(redisClient
*c
) {
6696 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6697 checkType(c
,o
,REDIS_HASH
)) return;
6699 if ((value
= hashGet(o
,c
->argv
[2])) != NULL
) {
6700 addReplyBulk(c
,value
);
6701 decrRefCount(value
);
6703 addReply(c
,shared
.nullbulk
);
6707 static void hmgetCommand(redisClient
*c
) {
6710 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
6711 if (o
!= NULL
&& o
->type
!= REDIS_HASH
) {
6712 addReply(c
,shared
.wrongtypeerr
);
6715 /* Note the check for o != NULL happens inside the loop. This is
6716 * done because objects that cannot be found are considered to be
6717 * an empty hash. The reply should then be a series of NULLs. */
6718 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-2));
6719 for (i
= 2; i
< c
->argc
; i
++) {
6720 if (o
!= NULL
&& (value
= hashGet(o
,c
->argv
[i
])) != NULL
) {
6721 addReplyBulk(c
,value
);
6722 decrRefCount(value
);
6724 addReply(c
,shared
.nullbulk
);
6729 static void hdelCommand(redisClient
*c
) {
6731 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6732 checkType(c
,o
,REDIS_HASH
)) return;
6734 if (hashDelete(o
,c
->argv
[2])) {
6735 if (hashLength(o
) == 0) deleteKey(c
->db
,c
->argv
[1]);
6736 addReply(c
,shared
.cone
);
6739 addReply(c
,shared
.czero
);
6743 static void hlenCommand(redisClient
*c
) {
6745 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6746 checkType(c
,o
,REDIS_HASH
)) return;
6748 addReplyUlong(c
,hashLength(o
));
6751 static void genericHgetallCommand(redisClient
*c
, int flags
) {
6752 robj
*o
, *lenobj
, *obj
;
6753 unsigned long count
= 0;
6756 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6757 || checkType(c
,o
,REDIS_HASH
)) return;
6759 lenobj
= createObject(REDIS_STRING
,NULL
);
6761 decrRefCount(lenobj
);
6763 hi
= hashInitIterator(o
);
6764 while (hashNext(hi
) != REDIS_ERR
) {
6765 if (flags
& REDIS_HASH_KEY
) {
6766 obj
= hashCurrent(hi
,REDIS_HASH_KEY
);
6767 addReplyBulk(c
,obj
);
6771 if (flags
& REDIS_HASH_VALUE
) {
6772 obj
= hashCurrent(hi
,REDIS_HASH_VALUE
);
6773 addReplyBulk(c
,obj
);
6778 hashReleaseIterator(hi
);
6780 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
6783 static void hkeysCommand(redisClient
*c
) {
6784 genericHgetallCommand(c
,REDIS_HASH_KEY
);
6787 static void hvalsCommand(redisClient
*c
) {
6788 genericHgetallCommand(c
,REDIS_HASH_VALUE
);
6791 static void hgetallCommand(redisClient
*c
) {
6792 genericHgetallCommand(c
,REDIS_HASH_KEY
|REDIS_HASH_VALUE
);
6795 static void hexistsCommand(redisClient
*c
) {
6797 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6798 checkType(c
,o
,REDIS_HASH
)) return;
6800 addReply(c
, hashExists(o
,c
->argv
[2]) ? shared
.cone
: shared
.czero
);
6803 static void convertToRealHash(robj
*o
) {
6804 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
6805 unsigned int klen
, vlen
;
6806 dict
*dict
= dictCreate(&hashDictType
,NULL
);
6808 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
6809 p
= zipmapRewind(zm
);
6810 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
6811 robj
*keyobj
, *valobj
;
6813 keyobj
= createStringObject((char*)key
,klen
);
6814 valobj
= createStringObject((char*)val
,vlen
);
6815 keyobj
= tryObjectEncoding(keyobj
);
6816 valobj
= tryObjectEncoding(valobj
);
6817 dictAdd(dict
,keyobj
,valobj
);
6819 o
->encoding
= REDIS_ENCODING_HT
;
6824 /* ========================= Non type-specific commands ==================== */
6826 static void flushdbCommand(redisClient
*c
) {
6827 server
.dirty
+= dictSize(c
->db
->dict
);
6828 touchWatchedKeysOnFlush(c
->db
->id
);
6829 dictEmpty(c
->db
->dict
);
6830 dictEmpty(c
->db
->expires
);
6831 addReply(c
,shared
.ok
);
6834 static void flushallCommand(redisClient
*c
) {
6835 touchWatchedKeysOnFlush(-1);
6836 server
.dirty
+= emptyDb();
6837 addReply(c
,shared
.ok
);
6838 if (server
.bgsavechildpid
!= -1) {
6839 kill(server
.bgsavechildpid
,SIGKILL
);
6840 rdbRemoveTempFile(server
.bgsavechildpid
);
6842 rdbSave(server
.dbfilename
);
6846 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
6847 redisSortOperation
*so
= zmalloc(sizeof(*so
));
6849 so
->pattern
= pattern
;
6853 /* Return the value associated to the key with a name obtained
6854 * substituting the first occurence of '*' in 'pattern' with 'subst'.
6855 * The returned object will always have its refcount increased by 1
6856 * when it is non-NULL. */
6857 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
6860 robj keyobj
, fieldobj
, *o
;
6861 int prefixlen
, sublen
, postfixlen
, fieldlen
;
6862 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
6866 char buf
[REDIS_SORTKEY_MAX
+1];
6867 } keyname
, fieldname
;
6869 /* If the pattern is "#" return the substitution object itself in order
6870 * to implement the "SORT ... GET #" feature. */
6871 spat
= pattern
->ptr
;
6872 if (spat
[0] == '#' && spat
[1] == '\0') {
6873 incrRefCount(subst
);
6877 /* The substitution object may be specially encoded. If so we create
6878 * a decoded object on the fly. Otherwise getDecodedObject will just
6879 * increment the ref count, that we'll decrement later. */
6880 subst
= getDecodedObject(subst
);
6883 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
6884 p
= strchr(spat
,'*');
6886 decrRefCount(subst
);
6890 /* Find out if we're dealing with a hash dereference. */
6891 if ((f
= strstr(p
+1, "->")) != NULL
) {
6892 fieldlen
= sdslen(spat
)-(f
-spat
);
6893 /* this also copies \0 character */
6894 memcpy(fieldname
.buf
,f
+2,fieldlen
-1);
6895 fieldname
.len
= fieldlen
-2;
6901 sublen
= sdslen(ssub
);
6902 postfixlen
= sdslen(spat
)-(prefixlen
+1)-fieldlen
;
6903 memcpy(keyname
.buf
,spat
,prefixlen
);
6904 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
6905 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
6906 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
6907 keyname
.len
= prefixlen
+sublen
+postfixlen
;
6908 decrRefCount(subst
);
6910 /* Lookup substituted key */
6911 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2));
6912 o
= lookupKeyRead(db
,&keyobj
);
6913 if (o
== NULL
) return NULL
;
6916 if (o
->type
!= REDIS_HASH
|| fieldname
.len
< 1) return NULL
;
6918 /* Retrieve value from hash by the field name. This operation
6919 * already increases the refcount of the returned object. */
6920 initStaticStringObject(fieldobj
,((char*)&fieldname
)+(sizeof(long)*2));
6921 o
= hashGet(o
, &fieldobj
);
6923 if (o
->type
!= REDIS_STRING
) return NULL
;
6925 /* Every object that this function returns needs to have its refcount
6926 * increased. sortCommand decreases it again. */
6933 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
6934 * the additional parameter is not standard but a BSD-specific we have to
6935 * pass sorting parameters via the global 'server' structure */
6936 static int sortCompare(const void *s1
, const void *s2
) {
6937 const redisSortObject
*so1
= s1
, *so2
= s2
;
6940 if (!server
.sort_alpha
) {
6941 /* Numeric sorting. Here it's trivial as we precomputed scores */
6942 if (so1
->u
.score
> so2
->u
.score
) {
6944 } else if (so1
->u
.score
< so2
->u
.score
) {
6950 /* Alphanumeric sorting */
6951 if (server
.sort_bypattern
) {
6952 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
6953 /* At least one compare object is NULL */
6954 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
6956 else if (so1
->u
.cmpobj
== NULL
)
6961 /* We have both the objects, use strcoll */
6962 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
6965 /* Compare elements directly. */
6966 cmp
= compareStringObjects(so1
->obj
,so2
->obj
);
6969 return server
.sort_desc
? -cmp
: cmp
;
6972 /* The SORT command is the most complex command in Redis. Warning: this code
6973 * is optimized for speed and a bit less for readability */
6974 static void sortCommand(redisClient
*c
) {
6977 int desc
= 0, alpha
= 0;
6978 int limit_start
= 0, limit_count
= -1, start
, end
;
6979 int j
, dontsort
= 0, vectorlen
;
6980 int getop
= 0; /* GET operation counter */
6981 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
6982 redisSortObject
*vector
; /* Resulting vector to sort */
6984 /* Lookup the key to sort. It must be of the right types */
6985 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
6986 if (sortval
== NULL
) {
6987 addReply(c
,shared
.emptymultibulk
);
6990 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
6991 sortval
->type
!= REDIS_ZSET
)
6993 addReply(c
,shared
.wrongtypeerr
);
6997 /* Create a list of operations to perform for every sorted element.
6998 * Operations can be GET/DEL/INCR/DECR */
6999 operations
= listCreate();
7000 listSetFreeMethod(operations
,zfree
);
7003 /* Now we need to protect sortval incrementing its count, in the future
7004 * SORT may have options able to overwrite/delete keys during the sorting
7005 * and the sorted key itself may get destroied */
7006 incrRefCount(sortval
);
7008 /* The SORT command has an SQL-alike syntax, parse it */
7009 while(j
< c
->argc
) {
7010 int leftargs
= c
->argc
-j
-1;
7011 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
7013 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
7015 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
7017 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
7018 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
7019 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
7021 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
7022 storekey
= c
->argv
[j
+1];
7024 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
7025 sortby
= c
->argv
[j
+1];
7026 /* If the BY pattern does not contain '*', i.e. it is constant,
7027 * we don't need to sort nor to lookup the weight keys. */
7028 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
7030 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
7031 listAddNodeTail(operations
,createSortOperation(
7032 REDIS_SORT_GET
,c
->argv
[j
+1]));
7036 decrRefCount(sortval
);
7037 listRelease(operations
);
7038 addReply(c
,shared
.syntaxerr
);
7044 /* Load the sorting vector with all the objects to sort */
7045 switch(sortval
->type
) {
7046 case REDIS_LIST
: vectorlen
= listLength((list
*)sortval
->ptr
); break;
7047 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
7048 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
7049 default: vectorlen
= 0; redisPanic("Bad SORT type"); /* Avoid GCC warning */
7051 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
7054 if (sortval
->type
== REDIS_LIST
) {
7055 list
*list
= sortval
->ptr
;
7059 listRewind(list
,&li
);
7060 while((ln
= listNext(&li
))) {
7061 robj
*ele
= ln
->value
;
7062 vector
[j
].obj
= ele
;
7063 vector
[j
].u
.score
= 0;
7064 vector
[j
].u
.cmpobj
= NULL
;
7072 if (sortval
->type
== REDIS_SET
) {
7075 zset
*zs
= sortval
->ptr
;
7079 di
= dictGetIterator(set
);
7080 while((setele
= dictNext(di
)) != NULL
) {
7081 vector
[j
].obj
= dictGetEntryKey(setele
);
7082 vector
[j
].u
.score
= 0;
7083 vector
[j
].u
.cmpobj
= NULL
;
7086 dictReleaseIterator(di
);
7088 redisAssert(j
== vectorlen
);
7090 /* Now it's time to load the right scores in the sorting vector */
7091 if (dontsort
== 0) {
7092 for (j
= 0; j
< vectorlen
; j
++) {
7095 /* lookup value to sort by */
7096 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
7097 if (!byval
) continue;
7099 /* use object itself to sort by */
7100 byval
= vector
[j
].obj
;
7104 if (sortby
) vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
7106 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
7107 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
7108 } else if (byval
->encoding
== REDIS_ENCODING_INT
) {
7109 /* Don't need to decode the object if it's
7110 * integer-encoded (the only encoding supported) so
7111 * far. We can just cast it */
7112 vector
[j
].u
.score
= (long)byval
->ptr
;
7114 redisAssert(1 != 1);
7118 /* when the object was retrieved using lookupKeyByPattern,
7119 * its refcount needs to be decreased. */
7121 decrRefCount(byval
);
7126 /* We are ready to sort the vector... perform a bit of sanity check
7127 * on the LIMIT option too. We'll use a partial version of quicksort. */
7128 start
= (limit_start
< 0) ? 0 : limit_start
;
7129 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
7130 if (start
>= vectorlen
) {
7131 start
= vectorlen
-1;
7134 if (end
>= vectorlen
) end
= vectorlen
-1;
7136 if (dontsort
== 0) {
7137 server
.sort_desc
= desc
;
7138 server
.sort_alpha
= alpha
;
7139 server
.sort_bypattern
= sortby
? 1 : 0;
7140 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
7141 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
7143 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
7146 /* Send command output to the output buffer, performing the specified
7147 * GET/DEL/INCR/DECR operations if any. */
7148 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
7149 if (storekey
== NULL
) {
7150 /* STORE option not specified, sent the sorting result to client */
7151 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
7152 for (j
= start
; j
<= end
; j
++) {
7156 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
7157 listRewind(operations
,&li
);
7158 while((ln
= listNext(&li
))) {
7159 redisSortOperation
*sop
= ln
->value
;
7160 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
7163 if (sop
->type
== REDIS_SORT_GET
) {
7165 addReply(c
,shared
.nullbulk
);
7167 addReplyBulk(c
,val
);
7171 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
7176 robj
*listObject
= createListObject();
7177 list
*listPtr
= (list
*) listObject
->ptr
;
7179 /* STORE option specified, set the sorting result as a List object */
7180 for (j
= start
; j
<= end
; j
++) {
7185 listAddNodeTail(listPtr
,vector
[j
].obj
);
7186 incrRefCount(vector
[j
].obj
);
7188 listRewind(operations
,&li
);
7189 while((ln
= listNext(&li
))) {
7190 redisSortOperation
*sop
= ln
->value
;
7191 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
7194 if (sop
->type
== REDIS_SORT_GET
) {
7196 listAddNodeTail(listPtr
,createStringObject("",0));
7198 /* We should do a incrRefCount on val because it is
7199 * added to the list, but also a decrRefCount because
7200 * it is returned by lookupKeyByPattern. This results
7201 * in doing nothing at all. */
7202 listAddNodeTail(listPtr
,val
);
7205 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
7209 if (dictReplace(c
->db
->dict
,storekey
,listObject
)) {
7210 incrRefCount(storekey
);
7212 /* Note: we add 1 because the DB is dirty anyway since even if the
7213 * SORT result is empty a new key is set and maybe the old content
7215 server
.dirty
+= 1+outputlen
;
7216 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
7220 decrRefCount(sortval
);
7221 listRelease(operations
);
7222 for (j
= 0; j
< vectorlen
; j
++) {
7223 if (alpha
&& vector
[j
].u
.cmpobj
)
7224 decrRefCount(vector
[j
].u
.cmpobj
);
7229 /* Convert an amount of bytes into a human readable string in the form
7230 * of 100B, 2G, 100M, 4K, and so forth. */
7231 static void bytesToHuman(char *s
, unsigned long long n
) {
7236 sprintf(s
,"%lluB",n
);
7238 } else if (n
< (1024*1024)) {
7239 d
= (double)n
/(1024);
7240 sprintf(s
,"%.2fK",d
);
7241 } else if (n
< (1024LL*1024*1024)) {
7242 d
= (double)n
/(1024*1024);
7243 sprintf(s
,"%.2fM",d
);
7244 } else if (n
< (1024LL*1024*1024*1024)) {
7245 d
= (double)n
/(1024LL*1024*1024);
7246 sprintf(s
,"%.2fG",d
);
7250 /* Create the string returned by the INFO command. This is decoupled
7251 * by the INFO command itself as we need to report the same information
7252 * on memory corruption problems. */
7253 static sds
genRedisInfoString(void) {
7255 time_t uptime
= time(NULL
)-server
.stat_starttime
;
7259 bytesToHuman(hmem
,zmalloc_used_memory());
7260 info
= sdscatprintf(sdsempty(),
7261 "redis_version:%s\r\n"
7262 "redis_git_sha1:%s\r\n"
7263 "redis_git_dirty:%d\r\n"
7265 "multiplexing_api:%s\r\n"
7266 "process_id:%ld\r\n"
7267 "uptime_in_seconds:%ld\r\n"
7268 "uptime_in_days:%ld\r\n"
7269 "connected_clients:%d\r\n"
7270 "connected_slaves:%d\r\n"
7271 "blocked_clients:%d\r\n"
7272 "used_memory:%zu\r\n"
7273 "used_memory_human:%s\r\n"
7274 "changes_since_last_save:%lld\r\n"
7275 "bgsave_in_progress:%d\r\n"
7276 "last_save_time:%ld\r\n"
7277 "bgrewriteaof_in_progress:%d\r\n"
7278 "total_connections_received:%lld\r\n"
7279 "total_commands_processed:%lld\r\n"
7280 "expired_keys:%lld\r\n"
7281 "hash_max_zipmap_entries:%zu\r\n"
7282 "hash_max_zipmap_value:%zu\r\n"
7283 "pubsub_channels:%ld\r\n"
7284 "pubsub_patterns:%u\r\n"
7289 strtol(REDIS_GIT_DIRTY
,NULL
,10) > 0,
7290 (sizeof(long) == 8) ? "64" : "32",
7295 listLength(server
.clients
)-listLength(server
.slaves
),
7296 listLength(server
.slaves
),
7297 server
.blpop_blocked_clients
,
7298 zmalloc_used_memory(),
7301 server
.bgsavechildpid
!= -1,
7303 server
.bgrewritechildpid
!= -1,
7304 server
.stat_numconnections
,
7305 server
.stat_numcommands
,
7306 server
.stat_expiredkeys
,
7307 server
.hash_max_zipmap_entries
,
7308 server
.hash_max_zipmap_value
,
7309 dictSize(server
.pubsub_channels
),
7310 listLength(server
.pubsub_patterns
),
7311 server
.vm_enabled
!= 0,
7312 server
.masterhost
== NULL
? "master" : "slave"
7314 if (server
.masterhost
) {
7315 info
= sdscatprintf(info
,
7316 "master_host:%s\r\n"
7317 "master_port:%d\r\n"
7318 "master_link_status:%s\r\n"
7319 "master_last_io_seconds_ago:%d\r\n"
7322 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
7324 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
7327 if (server
.vm_enabled
) {
7329 info
= sdscatprintf(info
,
7330 "vm_conf_max_memory:%llu\r\n"
7331 "vm_conf_page_size:%llu\r\n"
7332 "vm_conf_pages:%llu\r\n"
7333 "vm_stats_used_pages:%llu\r\n"
7334 "vm_stats_swapped_objects:%llu\r\n"
7335 "vm_stats_swappin_count:%llu\r\n"
7336 "vm_stats_swappout_count:%llu\r\n"
7337 "vm_stats_io_newjobs_len:%lu\r\n"
7338 "vm_stats_io_processing_len:%lu\r\n"
7339 "vm_stats_io_processed_len:%lu\r\n"
7340 "vm_stats_io_active_threads:%lu\r\n"
7341 "vm_stats_blocked_clients:%lu\r\n"
7342 ,(unsigned long long) server
.vm_max_memory
,
7343 (unsigned long long) server
.vm_page_size
,
7344 (unsigned long long) server
.vm_pages
,
7345 (unsigned long long) server
.vm_stats_used_pages
,
7346 (unsigned long long) server
.vm_stats_swapped_objects
,
7347 (unsigned long long) server
.vm_stats_swapins
,
7348 (unsigned long long) server
.vm_stats_swapouts
,
7349 (unsigned long) listLength(server
.io_newjobs
),
7350 (unsigned long) listLength(server
.io_processing
),
7351 (unsigned long) listLength(server
.io_processed
),
7352 (unsigned long) server
.io_active_threads
,
7353 (unsigned long) server
.vm_blocked_clients
7357 for (j
= 0; j
< server
.dbnum
; j
++) {
7358 long long keys
, vkeys
;
7360 keys
= dictSize(server
.db
[j
].dict
);
7361 vkeys
= dictSize(server
.db
[j
].expires
);
7362 if (keys
|| vkeys
) {
7363 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
7370 static void infoCommand(redisClient
*c
) {
7371 sds info
= genRedisInfoString();
7372 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
7373 (unsigned long)sdslen(info
)));
7374 addReplySds(c
,info
);
7375 addReply(c
,shared
.crlf
);
7378 static void monitorCommand(redisClient
*c
) {
7379 /* ignore MONITOR if aleady slave or in monitor mode */
7380 if (c
->flags
& REDIS_SLAVE
) return;
7382 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
7384 listAddNodeTail(server
.monitors
,c
);
7385 addReply(c
,shared
.ok
);
7388 /* ================================= Expire ================================= */
7389 static int removeExpire(redisDb
*db
, robj
*key
) {
7390 if (dictDelete(db
->expires
,key
) == DICT_OK
) {
7397 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
7398 if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) {
7406 /* Return the expire time of the specified key, or -1 if no expire
7407 * is associated with this key (i.e. the key is non volatile) */
7408 static time_t getExpire(redisDb
*db
, robj
*key
) {
7411 /* No expire? return ASAP */
7412 if (dictSize(db
->expires
) == 0 ||
7413 (de
= dictFind(db
->expires
,key
)) == NULL
) return -1;
7415 return (time_t) dictGetEntryVal(de
);
7418 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
7422 /* No expire? return ASAP */
7423 if (dictSize(db
->expires
) == 0 ||
7424 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7426 /* Lookup the expire */
7427 when
= (time_t) dictGetEntryVal(de
);
7428 if (time(NULL
) <= when
) return 0;
7430 /* Delete the key */
7431 dictDelete(db
->expires
,key
);
7432 server
.stat_expiredkeys
++;
7433 return dictDelete(db
->dict
,key
) == DICT_OK
;
7436 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
7439 /* No expire? return ASAP */
7440 if (dictSize(db
->expires
) == 0 ||
7441 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7443 /* Delete the key */
7445 server
.stat_expiredkeys
++;
7446 dictDelete(db
->expires
,key
);
7447 return dictDelete(db
->dict
,key
) == DICT_OK
;
7450 static void expireGenericCommand(redisClient
*c
, robj
*key
, robj
*param
, long offset
) {
7454 if (getLongFromObjectOrReply(c
, param
, &seconds
, NULL
) != REDIS_OK
) return;
7458 de
= dictFind(c
->db
->dict
,key
);
7460 addReply(c
,shared
.czero
);
7464 if (deleteKey(c
->db
,key
)) server
.dirty
++;
7465 addReply(c
, shared
.cone
);
7468 time_t when
= time(NULL
)+seconds
;
7469 if (setExpire(c
->db
,key
,when
)) {
7470 addReply(c
,shared
.cone
);
7473 addReply(c
,shared
.czero
);
7479 static void expireCommand(redisClient
*c
) {
7480 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],0);
7483 static void expireatCommand(redisClient
*c
) {
7484 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],time(NULL
));
7487 static void ttlCommand(redisClient
*c
) {
7491 expire
= getExpire(c
->db
,c
->argv
[1]);
7493 ttl
= (int) (expire
-time(NULL
));
7494 if (ttl
< 0) ttl
= -1;
7496 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
7499 /* ================================ MULTI/EXEC ============================== */
7501 /* Client state initialization for MULTI/EXEC */
7502 static void initClientMultiState(redisClient
*c
) {
7503 c
->mstate
.commands
= NULL
;
7504 c
->mstate
.count
= 0;
7507 /* Release all the resources associated with MULTI/EXEC state */
7508 static void freeClientMultiState(redisClient
*c
) {
7511 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7513 multiCmd
*mc
= c
->mstate
.commands
+j
;
7515 for (i
= 0; i
< mc
->argc
; i
++)
7516 decrRefCount(mc
->argv
[i
]);
7519 zfree(c
->mstate
.commands
);
7522 /* Add a new command into the MULTI commands queue */
7523 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
7527 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
7528 sizeof(multiCmd
)*(c
->mstate
.count
+1));
7529 mc
= c
->mstate
.commands
+c
->mstate
.count
;
7532 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
7533 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
7534 for (j
= 0; j
< c
->argc
; j
++)
7535 incrRefCount(mc
->argv
[j
]);
7539 static void multiCommand(redisClient
*c
) {
7540 if (c
->flags
& REDIS_MULTI
) {
7541 addReplySds(c
,sdsnew("-ERR MULTI calls can not be nested\r\n"));
7544 c
->flags
|= REDIS_MULTI
;
7545 addReply(c
,shared
.ok
);
7548 static void discardCommand(redisClient
*c
) {
7549 if (!(c
->flags
& REDIS_MULTI
)) {
7550 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
7554 freeClientMultiState(c
);
7555 initClientMultiState(c
);
7556 c
->flags
&= (~REDIS_MULTI
);
7557 addReply(c
,shared
.ok
);
7560 /* Send a MULTI command to all the slaves and AOF file. Check the execCommand
7561 * implememntation for more information. */
7562 static void execCommandReplicateMulti(redisClient
*c
) {
7563 struct redisCommand
*cmd
;
7564 robj
*multistring
= createStringObject("MULTI",5);
7566 cmd
= lookupCommand("multi");
7567 if (server
.appendonly
)
7568 feedAppendOnlyFile(cmd
,c
->db
->id
,&multistring
,1);
7569 if (listLength(server
.slaves
))
7570 replicationFeedSlaves(server
.slaves
,c
->db
->id
,&multistring
,1);
7571 decrRefCount(multistring
);
7574 static void execCommand(redisClient
*c
) {
7579 if (!(c
->flags
& REDIS_MULTI
)) {
7580 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
7584 /* Check if we need to abort the EXEC if some WATCHed key was touched.
7585 * A failed EXEC will return a multi bulk nil object. */
7586 if (c
->flags
& REDIS_DIRTY_CAS
) {
7587 freeClientMultiState(c
);
7588 initClientMultiState(c
);
7589 c
->flags
&= ~(REDIS_MULTI
|REDIS_DIRTY_CAS
);
7591 addReply(c
,shared
.nullmultibulk
);
7595 /* Replicate a MULTI request now that we are sure the block is executed.
7596 * This way we'll deliver the MULTI/..../EXEC block as a whole and
7597 * both the AOF and the replication link will have the same consistency
7598 * and atomicity guarantees. */
7599 execCommandReplicateMulti(c
);
7601 /* Exec all the queued commands */
7602 unwatchAllKeys(c
); /* Unwatch ASAP otherwise we'll waste CPU cycles */
7603 orig_argv
= c
->argv
;
7604 orig_argc
= c
->argc
;
7605 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
7606 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7607 c
->argc
= c
->mstate
.commands
[j
].argc
;
7608 c
->argv
= c
->mstate
.commands
[j
].argv
;
7609 call(c
,c
->mstate
.commands
[j
].cmd
);
7611 c
->argv
= orig_argv
;
7612 c
->argc
= orig_argc
;
7613 freeClientMultiState(c
);
7614 initClientMultiState(c
);
7615 c
->flags
&= ~(REDIS_MULTI
|REDIS_DIRTY_CAS
);
7616 /* Make sure the EXEC command is always replicated / AOF, since we
7617 * always send the MULTI command (we can't know beforehand if the
7618 * next operations will contain at least a modification to the DB). */
7622 /* =========================== Blocking Operations ========================= */
7624 /* Currently Redis blocking operations support is limited to list POP ops,
7625 * so the current implementation is not fully generic, but it is also not
7626 * completely specific so it will not require a rewrite to support new
7627 * kind of blocking operations in the future.
7629 * Still it's important to note that list blocking operations can be already
7630 * used as a notification mechanism in order to implement other blocking
7631 * operations at application level, so there must be a very strong evidence
7632 * of usefulness and generality before new blocking operations are implemented.
7634 * This is how the current blocking POP works, we use BLPOP as example:
7635 * - If the user calls BLPOP and the key exists and contains a non empty list
7636 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
7637 * if there is not to block.
7638 * - If instead BLPOP is called and the key does not exists or the list is
7639 * empty we need to block. In order to do so we remove the notification for
7640 * new data to read in the client socket (so that we'll not serve new
7641 * requests if the blocking request is not served). Also we put the client
7642 * in a dictionary (db->blocking_keys) mapping keys to a list of clients
7643 * blocking for this keys.
7644 * - If a PUSH operation against a key with blocked clients waiting is
7645 * performed, we serve the first in the list: basically instead to push
7646 * the new element inside the list we return it to the (first / oldest)
7647 * blocking client, unblock the client, and remove it form the list.
7649 * The above comment and the source code should be enough in order to understand
7650 * the implementation and modify / fix it later.
7653 /* Set a client in blocking mode for the specified key, with the specified
7655 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
7660 c
->blocking_keys
= zmalloc(sizeof(robj
*)*numkeys
);
7661 c
->blocking_keys_num
= numkeys
;
7662 c
->blockingto
= timeout
;
7663 for (j
= 0; j
< numkeys
; j
++) {
7664 /* Add the key in the client structure, to map clients -> keys */
7665 c
->blocking_keys
[j
] = keys
[j
];
7666 incrRefCount(keys
[j
]);
7668 /* And in the other "side", to map keys -> clients */
7669 de
= dictFind(c
->db
->blocking_keys
,keys
[j
]);
7673 /* For every key we take a list of clients blocked for it */
7675 retval
= dictAdd(c
->db
->blocking_keys
,keys
[j
],l
);
7676 incrRefCount(keys
[j
]);
7677 assert(retval
== DICT_OK
);
7679 l
= dictGetEntryVal(de
);
7681 listAddNodeTail(l
,c
);
7683 /* Mark the client as a blocked client */
7684 c
->flags
|= REDIS_BLOCKED
;
7685 server
.blpop_blocked_clients
++;
7688 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
7689 static void unblockClientWaitingData(redisClient
*c
) {
7694 assert(c
->blocking_keys
!= NULL
);
7695 /* The client may wait for multiple keys, so unblock it for every key. */
7696 for (j
= 0; j
< c
->blocking_keys_num
; j
++) {
7697 /* Remove this client from the list of clients waiting for this key. */
7698 de
= dictFind(c
->db
->blocking_keys
,c
->blocking_keys
[j
]);
7700 l
= dictGetEntryVal(de
);
7701 listDelNode(l
,listSearchKey(l
,c
));
7702 /* If the list is empty we need to remove it to avoid wasting memory */
7703 if (listLength(l
) == 0)
7704 dictDelete(c
->db
->blocking_keys
,c
->blocking_keys
[j
]);
7705 decrRefCount(c
->blocking_keys
[j
]);
7707 /* Cleanup the client structure */
7708 zfree(c
->blocking_keys
);
7709 c
->blocking_keys
= NULL
;
7710 c
->flags
&= (~REDIS_BLOCKED
);
7711 server
.blpop_blocked_clients
--;
7712 /* We want to process data if there is some command waiting
7713 * in the input buffer. Note that this is safe even if
7714 * unblockClientWaitingData() gets called from freeClient() because
7715 * freeClient() will be smart enough to call this function
7716 * *after* c->querybuf was set to NULL. */
7717 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
7720 /* This should be called from any function PUSHing into lists.
7721 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
7722 * 'ele' is the element pushed.
7724 * If the function returns 0 there was no client waiting for a list push
7727 * If the function returns 1 there was a client waiting for a list push
7728 * against this key, the element was passed to this client thus it's not
7729 * needed to actually add it to the list and the caller should return asap. */
7730 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
7731 struct dictEntry
*de
;
7732 redisClient
*receiver
;
7736 de
= dictFind(c
->db
->blocking_keys
,key
);
7737 if (de
== NULL
) return 0;
7738 l
= dictGetEntryVal(de
);
7741 receiver
= ln
->value
;
7743 addReplySds(receiver
,sdsnew("*2\r\n"));
7744 addReplyBulk(receiver
,key
);
7745 addReplyBulk(receiver
,ele
);
7746 unblockClientWaitingData(receiver
);
7750 /* Blocking RPOP/LPOP */
7751 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
7756 for (j
= 1; j
< c
->argc
-1; j
++) {
7757 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
7759 if (o
->type
!= REDIS_LIST
) {
7760 addReply(c
,shared
.wrongtypeerr
);
7763 list
*list
= o
->ptr
;
7764 if (listLength(list
) != 0) {
7765 /* If the list contains elements fall back to the usual
7766 * non-blocking POP operation */
7767 robj
*argv
[2], **orig_argv
;
7770 /* We need to alter the command arguments before to call
7771 * popGenericCommand() as the command takes a single key. */
7772 orig_argv
= c
->argv
;
7773 orig_argc
= c
->argc
;
7774 argv
[1] = c
->argv
[j
];
7778 /* Also the return value is different, we need to output
7779 * the multi bulk reply header and the key name. The
7780 * "real" command will add the last element (the value)
7781 * for us. If this souds like an hack to you it's just
7782 * because it is... */
7783 addReplySds(c
,sdsnew("*2\r\n"));
7784 addReplyBulk(c
,argv
[1]);
7785 popGenericCommand(c
,where
);
7787 /* Fix the client structure with the original stuff */
7788 c
->argv
= orig_argv
;
7789 c
->argc
= orig_argc
;
7795 /* If the list is empty or the key does not exists we must block */
7796 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
7797 if (timeout
> 0) timeout
+= time(NULL
);
7798 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
7801 static void blpopCommand(redisClient
*c
) {
7802 blockingPopGenericCommand(c
,REDIS_HEAD
);
7805 static void brpopCommand(redisClient
*c
) {
7806 blockingPopGenericCommand(c
,REDIS_TAIL
);
7809 /* =============================== Replication ============================= */
7811 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7812 ssize_t nwritten
, ret
= size
;
7813 time_t start
= time(NULL
);
7817 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
7818 nwritten
= write(fd
,ptr
,size
);
7819 if (nwritten
== -1) return -1;
7823 if ((time(NULL
)-start
) > timeout
) {
7831 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7832 ssize_t nread
, totread
= 0;
7833 time_t start
= time(NULL
);
7837 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
7838 nread
= read(fd
,ptr
,size
);
7839 if (nread
== -1) return -1;
7844 if ((time(NULL
)-start
) > timeout
) {
7852 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7859 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
7862 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
7873 static void syncCommand(redisClient
*c
) {
7874 /* ignore SYNC if aleady slave or in monitor mode */
7875 if (c
->flags
& REDIS_SLAVE
) return;
7877 /* SYNC can't be issued when the server has pending data to send to
7878 * the client about already issued commands. We need a fresh reply
7879 * buffer registering the differences between the BGSAVE and the current
7880 * dataset, so that we can copy to other slaves if needed. */
7881 if (listLength(c
->reply
) != 0) {
7882 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
7886 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
7887 /* Here we need to check if there is a background saving operation
7888 * in progress, or if it is required to start one */
7889 if (server
.bgsavechildpid
!= -1) {
7890 /* Ok a background save is in progress. Let's check if it is a good
7891 * one for replication, i.e. if there is another slave that is
7892 * registering differences since the server forked to save */
7897 listRewind(server
.slaves
,&li
);
7898 while((ln
= listNext(&li
))) {
7900 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
7903 /* Perfect, the server is already registering differences for
7904 * another slave. Set the right state, and copy the buffer. */
7905 listRelease(c
->reply
);
7906 c
->reply
= listDup(slave
->reply
);
7907 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7908 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
7910 /* No way, we need to wait for the next BGSAVE in order to
7911 * register differences */
7912 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7913 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
7916 /* Ok we don't have a BGSAVE in progress, let's start one */
7917 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
7918 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7919 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
7920 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
7923 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7926 c
->flags
|= REDIS_SLAVE
;
7928 listAddNodeTail(server
.slaves
,c
);
7932 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
7933 redisClient
*slave
= privdata
;
7935 REDIS_NOTUSED(mask
);
7936 char buf
[REDIS_IOBUF_LEN
];
7937 ssize_t nwritten
, buflen
;
7939 if (slave
->repldboff
== 0) {
7940 /* Write the bulk write count before to transfer the DB. In theory here
7941 * we don't know how much room there is in the output buffer of the
7942 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
7943 * operations) will never be smaller than the few bytes we need. */
7946 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
7948 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
7956 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
7957 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
7959 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
7960 (buflen
== 0) ? "premature EOF" : strerror(errno
));
7964 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
7965 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
7970 slave
->repldboff
+= nwritten
;
7971 if (slave
->repldboff
== slave
->repldbsize
) {
7972 close(slave
->repldbfd
);
7973 slave
->repldbfd
= -1;
7974 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7975 slave
->replstate
= REDIS_REPL_ONLINE
;
7976 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
7977 sendReplyToClient
, slave
) == AE_ERR
) {
7981 addReplySds(slave
,sdsempty());
7982 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
7986 /* This function is called at the end of every backgrond saving.
7987 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
7988 * otherwise REDIS_ERR is passed to the function.
7990 * The goal of this function is to handle slaves waiting for a successful
7991 * background saving in order to perform non-blocking synchronization. */
7992 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
7994 int startbgsave
= 0;
7997 listRewind(server
.slaves
,&li
);
7998 while((ln
= listNext(&li
))) {
7999 redisClient
*slave
= ln
->value
;
8001 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
8003 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
8004 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
8005 struct redis_stat buf
;
8007 if (bgsaveerr
!= REDIS_OK
) {
8009 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
8012 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
8013 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
8015 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
8018 slave
->repldboff
= 0;
8019 slave
->repldbsize
= buf
.st_size
;
8020 slave
->replstate
= REDIS_REPL_SEND_BULK
;
8021 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
8022 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
8029 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
8032 listRewind(server
.slaves
,&li
);
8033 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
8034 while((ln
= listNext(&li
))) {
8035 redisClient
*slave
= ln
->value
;
8037 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
8044 static int syncWithMaster(void) {
8045 char buf
[1024], tmpfile
[256], authcmd
[1024];
8047 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
8048 int dfd
, maxtries
= 5;
8051 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
8056 /* AUTH with the master if required. */
8057 if(server
.masterauth
) {
8058 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
8059 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
8061 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
8065 /* Read the AUTH result. */
8066 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
8068 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
8072 if (buf
[0] != '+') {
8074 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
8079 /* Issue the SYNC command */
8080 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
8082 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
8086 /* Read the bulk write count */
8087 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
8089 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
8093 if (buf
[0] != '$') {
8095 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
8098 dumpsize
= strtol(buf
+1,NULL
,10);
8099 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
8100 /* Read the bulk write data on a temp file */
8102 snprintf(tmpfile
,256,
8103 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
8104 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
8105 if (dfd
!= -1) break;
8110 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
8114 int nread
, nwritten
;
8116 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
8118 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
8124 nwritten
= write(dfd
,buf
,nread
);
8125 if (nwritten
== -1) {
8126 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
8134 if (rename(tmpfile
,server
.dbfilename
) == -1) {
8135 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
8141 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
8142 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
8146 server
.master
= createClient(fd
);
8147 server
.master
->flags
|= REDIS_MASTER
;
8148 server
.master
->authenticated
= 1;
8149 server
.replstate
= REDIS_REPL_CONNECTED
;
8153 static void slaveofCommand(redisClient
*c
) {
8154 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
8155 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
8156 if (server
.masterhost
) {
8157 sdsfree(server
.masterhost
);
8158 server
.masterhost
= NULL
;
8159 if (server
.master
) freeClient(server
.master
);
8160 server
.replstate
= REDIS_REPL_NONE
;
8161 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
8164 sdsfree(server
.masterhost
);
8165 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
8166 server
.masterport
= atoi(c
->argv
[2]->ptr
);
8167 if (server
.master
) freeClient(server
.master
);
8168 server
.replstate
= REDIS_REPL_CONNECT
;
8169 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
8170 server
.masterhost
, server
.masterport
);
8172 addReply(c
,shared
.ok
);
8175 /* ============================ Maxmemory directive ======================== */
8177 /* Try to free one object form the pre-allocated objects free list.
8178 * This is useful under low mem conditions as by default we take 1 million
8179 * free objects allocated. On success REDIS_OK is returned, otherwise
8181 static int tryFreeOneObjectFromFreelist(void) {
8184 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
8185 if (listLength(server
.objfreelist
)) {
8186 listNode
*head
= listFirst(server
.objfreelist
);
8187 o
= listNodeValue(head
);
8188 listDelNode(server
.objfreelist
,head
);
8189 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
8193 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
8198 /* This function gets called when 'maxmemory' is set on the config file to limit
8199 * the max memory used by the server, and we are out of memory.
8200 * This function will try to, in order:
8202 * - Free objects from the free list
8203 * - Try to remove keys with an EXPIRE set
8205 * It is not possible to free enough memory to reach used-memory < maxmemory
8206 * the server will start refusing commands that will enlarge even more the
8209 static void freeMemoryIfNeeded(void) {
8210 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
8211 int j
, k
, freed
= 0;
8213 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
8214 for (j
= 0; j
< server
.dbnum
; j
++) {
8216 robj
*minkey
= NULL
;
8217 struct dictEntry
*de
;
8219 if (dictSize(server
.db
[j
].expires
)) {
8221 /* From a sample of three keys drop the one nearest to
8222 * the natural expire */
8223 for (k
= 0; k
< 3; k
++) {
8226 de
= dictGetRandomKey(server
.db
[j
].expires
);
8227 t
= (time_t) dictGetEntryVal(de
);
8228 if (minttl
== -1 || t
< minttl
) {
8229 minkey
= dictGetEntryKey(de
);
8233 deleteKey(server
.db
+j
,minkey
);
8236 if (!freed
) return; /* nothing to free... */
8240 /* ============================== Append Only file ========================== */
8242 /* Write the append only file buffer on disk.
8244 * Since we are required to write the AOF before replying to the client,
8245 * and the only way the client socket can get a write is entering when the
8246 * the event loop, we accumulate all the AOF writes in a memory
8247 * buffer and write it on disk using this function just before entering
8248 * the event loop again. */
8249 static void flushAppendOnlyFile(void) {
8253 if (sdslen(server
.aofbuf
) == 0) return;
8255 /* We want to perform a single write. This should be guaranteed atomic
8256 * at least if the filesystem we are writing is a real physical one.
8257 * While this will save us against the server being killed I don't think
8258 * there is much to do about the whole server stopping for power problems
8260 nwritten
= write(server
.appendfd
,server
.aofbuf
,sdslen(server
.aofbuf
));
8261 if (nwritten
!= (signed)sdslen(server
.aofbuf
)) {
8262 /* Ooops, we are in troubles. The best thing to do for now is
8263 * aborting instead of giving the illusion that everything is
8264 * working as expected. */
8265 if (nwritten
== -1) {
8266 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
8268 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
8272 sdsfree(server
.aofbuf
);
8273 server
.aofbuf
= sdsempty();
8275 /* Fsync if needed */
8277 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
8278 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
8279 now
-server
.lastfsync
> 1))
8281 /* aof_fsync is defined as fdatasync() for Linux in order to avoid
8282 * flushing metadata. */
8283 aof_fsync(server
.appendfd
); /* Let's try to get this data on the disk */
8284 server
.lastfsync
= now
;
8288 static sds
catAppendOnlyGenericCommand(sds buf
, int argc
, robj
**argv
) {
8290 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
8291 for (j
= 0; j
< argc
; j
++) {
8292 robj
*o
= getDecodedObject(argv
[j
]);
8293 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
8294 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
8295 buf
= sdscatlen(buf
,"\r\n",2);
8301 static sds
catAppendOnlyExpireAtCommand(sds buf
, robj
*key
, robj
*seconds
) {
8306 /* Make sure we can use strtol */
8307 seconds
= getDecodedObject(seconds
);
8308 when
= time(NULL
)+strtol(seconds
->ptr
,NULL
,10);
8309 decrRefCount(seconds
);
8311 argv
[0] = createStringObject("EXPIREAT",8);
8313 argv
[2] = createObject(REDIS_STRING
,
8314 sdscatprintf(sdsempty(),"%ld",when
));
8315 buf
= catAppendOnlyGenericCommand(buf
, argc
, argv
);
8316 decrRefCount(argv
[0]);
8317 decrRefCount(argv
[2]);
8321 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
8322 sds buf
= sdsempty();
8325 /* The DB this command was targetting is not the same as the last command
8326 * we appendend. To issue a SELECT command is needed. */
8327 if (dictid
!= server
.appendseldb
) {
8330 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
8331 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
8332 (unsigned long)strlen(seldb
),seldb
);
8333 server
.appendseldb
= dictid
;
8336 if (cmd
->proc
== expireCommand
) {
8337 /* Translate EXPIRE into EXPIREAT */
8338 buf
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]);
8339 } else if (cmd
->proc
== setexCommand
) {
8340 /* Translate SETEX to SET and EXPIREAT */
8341 tmpargv
[0] = createStringObject("SET",3);
8342 tmpargv
[1] = argv
[1];
8343 tmpargv
[2] = argv
[3];
8344 buf
= catAppendOnlyGenericCommand(buf
,3,tmpargv
);
8345 decrRefCount(tmpargv
[0]);
8346 buf
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]);
8348 buf
= catAppendOnlyGenericCommand(buf
,argc
,argv
);
8351 /* Append to the AOF buffer. This will be flushed on disk just before
8352 * of re-entering the event loop, so before the client will get a
8353 * positive reply about the operation performed. */
8354 server
.aofbuf
= sdscatlen(server
.aofbuf
,buf
,sdslen(buf
));
8356 /* If a background append only file rewriting is in progress we want to
8357 * accumulate the differences between the child DB and the current one
8358 * in a buffer, so that when the child process will do its work we
8359 * can append the differences to the new append only file. */
8360 if (server
.bgrewritechildpid
!= -1)
8361 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
8366 /* In Redis commands are always executed in the context of a client, so in
8367 * order to load the append only file we need to create a fake client. */
8368 static struct redisClient
*createFakeClient(void) {
8369 struct redisClient
*c
= zmalloc(sizeof(*c
));
8373 c
->querybuf
= sdsempty();
8377 /* We set the fake client as a slave waiting for the synchronization
8378 * so that Redis will not try to send replies to this client. */
8379 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
8380 c
->reply
= listCreate();
8381 listSetFreeMethod(c
->reply
,decrRefCount
);
8382 listSetDupMethod(c
->reply
,dupClientReplyValue
);
8383 initClientMultiState(c
);
8387 static void freeFakeClient(struct redisClient
*c
) {
8388 sdsfree(c
->querybuf
);
8389 listRelease(c
->reply
);
8390 freeClientMultiState(c
);
8394 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
8395 * error (the append only file is zero-length) REDIS_ERR is returned. On
8396 * fatal error an error message is logged and the program exists. */
8397 int loadAppendOnlyFile(char *filename
) {
8398 struct redisClient
*fakeClient
;
8399 FILE *fp
= fopen(filename
,"r");
8400 struct redis_stat sb
;
8401 unsigned long long loadedkeys
= 0;
8402 int appendonly
= server
.appendonly
;
8404 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
8408 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
8412 /* Temporarily disable AOF, to prevent EXEC from feeding a MULTI
8413 * to the same file we're about to read. */
8414 server
.appendonly
= 0;
8416 fakeClient
= createFakeClient();
8423 struct redisCommand
*cmd
;
8425 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
8431 if (buf
[0] != '*') goto fmterr
;
8433 argv
= zmalloc(sizeof(robj
*)*argc
);
8434 for (j
= 0; j
< argc
; j
++) {
8435 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
8436 if (buf
[0] != '$') goto fmterr
;
8437 len
= strtol(buf
+1,NULL
,10);
8438 argsds
= sdsnewlen(NULL
,len
);
8439 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
8440 argv
[j
] = createObject(REDIS_STRING
,argsds
);
8441 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
8444 /* Command lookup */
8445 cmd
= lookupCommand(argv
[0]->ptr
);
8447 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
8450 /* Try object encoding */
8451 if (cmd
->flags
& REDIS_CMD_BULK
)
8452 argv
[argc
-1] = tryObjectEncoding(argv
[argc
-1]);
8453 /* Run the command in the context of a fake client */
8454 fakeClient
->argc
= argc
;
8455 fakeClient
->argv
= argv
;
8456 cmd
->proc(fakeClient
);
8457 /* Discard the reply objects list from the fake client */
8458 while(listLength(fakeClient
->reply
))
8459 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
8460 /* Clean up, ready for the next command */
8461 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
8463 /* Handle swapping while loading big datasets when VM is on */
8465 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
8466 while (zmalloc_used_memory() > server
.vm_max_memory
) {
8467 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
8472 /* This point can only be reached when EOF is reached without errors.
8473 * If the client is in the middle of a MULTI/EXEC, log error and quit. */
8474 if (fakeClient
->flags
& REDIS_MULTI
) goto readerr
;
8477 freeFakeClient(fakeClient
);
8478 server
.appendonly
= appendonly
;
8483 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
8485 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
8489 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
8493 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
8494 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
8498 /* Avoid the incr/decr ref count business if possible to help
8499 * copy-on-write (we are often in a child process when this function
8501 * Also makes sure that key objects don't get incrRefCount-ed when VM
8503 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
8504 obj
= getDecodedObject(obj
);
8507 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
));
8508 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
;
8509 if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0)
8511 if (fwrite("\r\n",2,1,fp
) == 0) goto err
;
8512 if (decrrc
) decrRefCount(obj
);
8515 if (decrrc
) decrRefCount(obj
);
8519 /* Write binary-safe string into a file in the bulkformat
8520 * $<count>\r\n<payload>\r\n */
8521 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
8524 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
);
8525 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8526 if (len
&& fwrite(s
,len
,1,fp
) == 0) return 0;
8527 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
8531 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
8532 static int fwriteBulkDouble(FILE *fp
, double d
) {
8533 char buf
[128], dbuf
[128];
8535 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
8536 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
8537 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8538 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
8542 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
8543 static int fwriteBulkLong(FILE *fp
, long l
) {
8544 char buf
[128], lbuf
[128];
8546 snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
);
8547 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2);
8548 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8549 if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0;
8553 /* Write a sequence of commands able to fully rebuild the dataset into
8554 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
8555 static int rewriteAppendOnlyFile(char *filename
) {
8556 dictIterator
*di
= NULL
;
8561 time_t now
= time(NULL
);
8563 /* Note that we have to use a different temp name here compared to the
8564 * one used by rewriteAppendOnlyFileBackground() function. */
8565 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
8566 fp
= fopen(tmpfile
,"w");
8568 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
8571 for (j
= 0; j
< server
.dbnum
; j
++) {
8572 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
8573 redisDb
*db
= server
.db
+j
;
8575 if (dictSize(d
) == 0) continue;
8576 di
= dictGetIterator(d
);
8582 /* SELECT the new DB */
8583 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
8584 if (fwriteBulkLong(fp
,j
) == 0) goto werr
;
8586 /* Iterate this DB writing every entry */
8587 while((de
= dictNext(di
)) != NULL
) {
8592 key
= dictGetEntryKey(de
);
8593 /* If the value for this key is swapped, load a preview in memory.
8594 * We use a "swapped" flag to remember if we need to free the
8595 * value object instead to just increment the ref count anyway
8596 * in order to avoid copy-on-write of pages if we are forked() */
8597 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
8598 key
->storage
== REDIS_VM_SWAPPING
) {
8599 o
= dictGetEntryVal(de
);
8602 o
= vmPreviewObject(key
);
8605 expiretime
= getExpire(db
,key
);
8607 /* Save the key and associated value */
8608 if (o
->type
== REDIS_STRING
) {
8609 /* Emit a SET command */
8610 char cmd
[]="*3\r\n$3\r\nSET\r\n";
8611 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8613 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8614 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
8615 } else if (o
->type
== REDIS_LIST
) {
8616 /* Emit the RPUSHes needed to rebuild the list */
8617 list
*list
= o
->ptr
;
8621 listRewind(list
,&li
);
8622 while((ln
= listNext(&li
))) {
8623 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
8624 robj
*eleobj
= listNodeValue(ln
);
8626 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8627 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8628 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8630 } else if (o
->type
== REDIS_SET
) {
8631 /* Emit the SADDs needed to rebuild the set */
8633 dictIterator
*di
= dictGetIterator(set
);
8636 while((de
= dictNext(di
)) != NULL
) {
8637 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
8638 robj
*eleobj
= dictGetEntryKey(de
);
8640 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8641 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8642 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8644 dictReleaseIterator(di
);
8645 } else if (o
->type
== REDIS_ZSET
) {
8646 /* Emit the ZADDs needed to rebuild the sorted set */
8648 dictIterator
*di
= dictGetIterator(zs
->dict
);
8651 while((de
= dictNext(di
)) != NULL
) {
8652 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
8653 robj
*eleobj
= dictGetEntryKey(de
);
8654 double *score
= dictGetEntryVal(de
);
8656 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8657 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8658 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
8659 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8661 dictReleaseIterator(di
);
8662 } else if (o
->type
== REDIS_HASH
) {
8663 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
8665 /* Emit the HSETs needed to rebuild the hash */
8666 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8667 unsigned char *p
= zipmapRewind(o
->ptr
);
8668 unsigned char *field
, *val
;
8669 unsigned int flen
, vlen
;
8671 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
8672 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8673 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8674 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
8676 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
8680 dictIterator
*di
= dictGetIterator(o
->ptr
);
8683 while((de
= dictNext(di
)) != NULL
) {
8684 robj
*field
= dictGetEntryKey(de
);
8685 robj
*val
= dictGetEntryVal(de
);
8687 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8688 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8689 if (fwriteBulkObject(fp
,field
) == -1) return -1;
8690 if (fwriteBulkObject(fp
,val
) == -1) return -1;
8692 dictReleaseIterator(di
);
8695 redisPanic("Unknown object type");
8697 /* Save the expire time */
8698 if (expiretime
!= -1) {
8699 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
8700 /* If this key is already expired skip it */
8701 if (expiretime
< now
) continue;
8702 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8703 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8704 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
;
8706 if (swapped
) decrRefCount(o
);
8708 dictReleaseIterator(di
);
8711 /* Make sure data will not remain on the OS's output buffers */
8716 /* Use RENAME to make sure the DB file is changed atomically only
8717 * if the generate DB file is ok. */
8718 if (rename(tmpfile
,filename
) == -1) {
8719 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
8723 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
8729 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
8730 if (di
) dictReleaseIterator(di
);
8734 /* This is how rewriting of the append only file in background works:
8736 * 1) The user calls BGREWRITEAOF
8737 * 2) Redis calls this function, that forks():
8738 * 2a) the child rewrite the append only file in a temp file.
8739 * 2b) the parent accumulates differences in server.bgrewritebuf.
8740 * 3) When the child finished '2a' exists.
8741 * 4) The parent will trap the exit code, if it's OK, will append the
8742 * data accumulated into server.bgrewritebuf into the temp file, and
8743 * finally will rename(2) the temp file in the actual file name.
8744 * The the new file is reopened as the new append only file. Profit!
8746 static int rewriteAppendOnlyFileBackground(void) {
8749 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
8750 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
8751 if ((childpid
= fork()) == 0) {
8755 if (server
.vm_enabled
) vmReopenSwapFile();
8757 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
8758 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
8765 if (childpid
== -1) {
8766 redisLog(REDIS_WARNING
,
8767 "Can't rewrite append only file in background: fork: %s",
8771 redisLog(REDIS_NOTICE
,
8772 "Background append only file rewriting started by pid %d",childpid
);
8773 server
.bgrewritechildpid
= childpid
;
8774 updateDictResizePolicy();
8775 /* We set appendseldb to -1 in order to force the next call to the
8776 * feedAppendOnlyFile() to issue a SELECT command, so the differences
8777 * accumulated by the parent into server.bgrewritebuf will start
8778 * with a SELECT statement and it will be safe to merge. */
8779 server
.appendseldb
= -1;
8782 return REDIS_OK
; /* unreached */
8785 static void bgrewriteaofCommand(redisClient
*c
) {
8786 if (server
.bgrewritechildpid
!= -1) {
8787 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
8790 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
8791 char *status
= "+Background append only file rewriting started\r\n";
8792 addReplySds(c
,sdsnew(status
));
8794 addReply(c
,shared
.err
);
8798 static void aofRemoveTempFile(pid_t childpid
) {
8801 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
8805 /* Virtual Memory is composed mainly of two subsystems:
8806 * - Blocking Virutal Memory
8807 * - Threaded Virtual Memory I/O
8808 * The two parts are not fully decoupled, but functions are split among two
8809 * different sections of the source code (delimited by comments) in order to
8810 * make more clear what functionality is about the blocking VM and what about
8811 * the threaded (not blocking) VM.
8815 * Redis VM is a blocking VM (one that blocks reading swapped values from
8816 * disk into memory when a value swapped out is needed in memory) that is made
8817 * unblocking by trying to examine the command argument vector in order to
8818 * load in background values that will likely be needed in order to exec
8819 * the command. The command is executed only once all the relevant keys
8820 * are loaded into memory.
8822 * This basically is almost as simple of a blocking VM, but almost as parallel
8823 * as a fully non-blocking VM.
8826 /* Called when the user switches from "appendonly yes" to "appendonly no"
8827 * at runtime using the CONFIG command. */
8828 static void stopAppendOnly(void) {
8829 flushAppendOnlyFile();
8830 fsync(server
.appendfd
);
8831 close(server
.appendfd
);
8833 server
.appendfd
= -1;
8834 server
.appendseldb
= -1;
8835 server
.appendonly
= 0;
8836 /* rewrite operation in progress? kill it, wait child exit */
8837 if (server
.bgsavechildpid
!= -1) {
8840 if (kill(server
.bgsavechildpid
,SIGKILL
) != -1)
8841 wait3(&statloc
,0,NULL
);
8842 /* reset the buffer accumulating changes while the child saves */
8843 sdsfree(server
.bgrewritebuf
);
8844 server
.bgrewritebuf
= sdsempty();
8845 server
.bgsavechildpid
= -1;
8849 /* Called when the user switches from "appendonly no" to "appendonly yes"
8850 * at runtime using the CONFIG command. */
8851 static int startAppendOnly(void) {
8852 server
.appendonly
= 1;
8853 server
.lastfsync
= time(NULL
);
8854 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
8855 if (server
.appendfd
== -1) {
8856 redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, but I can't open the AOF file: %s",strerror(errno
));
8859 if (rewriteAppendOnlyFileBackground() == REDIS_ERR
) {
8860 server
.appendonly
= 0;
8861 close(server
.appendfd
);
8862 redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, I can't trigger a background AOF rewrite operation. Check the above logs for more info about the error.",strerror(errno
));
8868 /* =================== Virtual Memory - Blocking Side ====================== */
8870 static void vmInit(void) {
8876 if (server
.vm_max_threads
!= 0)
8877 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
8879 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
8880 /* Try to open the old swap file, otherwise create it */
8881 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
8882 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
8884 if (server
.vm_fp
== NULL
) {
8885 redisLog(REDIS_WARNING
,
8886 "Can't open the swap file: %s. Exiting.",
8890 server
.vm_fd
= fileno(server
.vm_fp
);
8891 /* Lock the swap file for writing, this is useful in order to avoid
8892 * another instance to use the same swap file for a config error. */
8893 fl
.l_type
= F_WRLCK
;
8894 fl
.l_whence
= SEEK_SET
;
8895 fl
.l_start
= fl
.l_len
= 0;
8896 if (fcntl(server
.vm_fd
,F_SETLK
,&fl
) == -1) {
8897 redisLog(REDIS_WARNING
,
8898 "Can't lock the swap file at '%s': %s. Make sure it is not used by another Redis instance.", server
.vm_swap_file
, strerror(errno
));
8902 server
.vm_next_page
= 0;
8903 server
.vm_near_pages
= 0;
8904 server
.vm_stats_used_pages
= 0;
8905 server
.vm_stats_swapped_objects
= 0;
8906 server
.vm_stats_swapouts
= 0;
8907 server
.vm_stats_swapins
= 0;
8908 totsize
= server
.vm_pages
*server
.vm_page_size
;
8909 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
8910 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
8911 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
8915 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
8917 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
8918 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
8919 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
8920 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
8922 /* Initialize threaded I/O (used by Virtual Memory) */
8923 server
.io_newjobs
= listCreate();
8924 server
.io_processing
= listCreate();
8925 server
.io_processed
= listCreate();
8926 server
.io_ready_clients
= listCreate();
8927 pthread_mutex_init(&server
.io_mutex
,NULL
);
8928 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
8929 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
8930 server
.io_active_threads
= 0;
8931 if (pipe(pipefds
) == -1) {
8932 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
8936 server
.io_ready_pipe_read
= pipefds
[0];
8937 server
.io_ready_pipe_write
= pipefds
[1];
8938 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
8939 /* LZF requires a lot of stack */
8940 pthread_attr_init(&server
.io_threads_attr
);
8941 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
8942 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
8943 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
8944 /* Listen for events in the threaded I/O pipe */
8945 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
8946 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
8947 oom("creating file event");
8950 /* Mark the page as used */
8951 static void vmMarkPageUsed(off_t page
) {
8952 off_t byte
= page
/8;
8954 redisAssert(vmFreePage(page
) == 1);
8955 server
.vm_bitmap
[byte
] |= 1<<bit
;
8958 /* Mark N contiguous pages as used, with 'page' being the first. */
8959 static void vmMarkPagesUsed(off_t page
, off_t count
) {
8962 for (j
= 0; j
< count
; j
++)
8963 vmMarkPageUsed(page
+j
);
8964 server
.vm_stats_used_pages
+= count
;
8965 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
8966 (long long)count
, (long long)page
);
8969 /* Mark the page as free */
8970 static void vmMarkPageFree(off_t page
) {
8971 off_t byte
= page
/8;
8973 redisAssert(vmFreePage(page
) == 0);
8974 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
8977 /* Mark N contiguous pages as free, with 'page' being the first. */
8978 static void vmMarkPagesFree(off_t page
, off_t count
) {
8981 for (j
= 0; j
< count
; j
++)
8982 vmMarkPageFree(page
+j
);
8983 server
.vm_stats_used_pages
-= count
;
8984 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
8985 (long long)count
, (long long)page
);
8988 /* Test if the page is free */
8989 static int vmFreePage(off_t page
) {
8990 off_t byte
= page
/8;
8992 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
8995 /* Find N contiguous free pages storing the first page of the cluster in *first.
8996 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
8997 * REDIS_ERR is returned.
8999 * This function uses a simple algorithm: we try to allocate
9000 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
9001 * again from the start of the swap file searching for free spaces.
9003 * If it looks pretty clear that there are no free pages near our offset
9004 * we try to find less populated places doing a forward jump of
9005 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
9006 * without hurry, and then we jump again and so forth...
9008 * This function can be improved using a free list to avoid to guess
9009 * too much, since we could collect data about freed pages.
9011 * note: I implemented this function just after watching an episode of
9012 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
9014 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
9015 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
9017 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
9018 server
.vm_near_pages
= 0;
9019 server
.vm_next_page
= 0;
9021 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
9022 base
= server
.vm_next_page
;
9024 while(offset
< server
.vm_pages
) {
9025 off_t
this = base
+offset
;
9027 /* If we overflow, restart from page zero */
9028 if (this >= server
.vm_pages
) {
9029 this -= server
.vm_pages
;
9031 /* Just overflowed, what we found on tail is no longer
9032 * interesting, as it's no longer contiguous. */
9036 if (vmFreePage(this)) {
9037 /* This is a free page */
9039 /* Already got N free pages? Return to the caller, with success */
9041 *first
= this-(n
-1);
9042 server
.vm_next_page
= this+1;
9043 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
9047 /* The current one is not a free page */
9051 /* Fast-forward if the current page is not free and we already
9052 * searched enough near this place. */
9054 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
9055 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
9057 /* Note that even if we rewind after the jump, we are don't need
9058 * to make sure numfree is set to zero as we only jump *if* it
9059 * is set to zero. */
9061 /* Otherwise just check the next page */
9068 /* Write the specified object at the specified page of the swap file */
9069 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
9070 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
9071 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
9072 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9073 redisLog(REDIS_WARNING
,
9074 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
9078 rdbSaveObject(server
.vm_fp
,o
);
9079 fflush(server
.vm_fp
);
9080 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9084 /* Swap the 'val' object relative to 'key' into disk. Store all the information
9085 * needed to later retrieve the object into the key object.
9086 * If we can't find enough contiguous empty pages to swap the object on disk
9087 * REDIS_ERR is returned. */
9088 static int vmSwapObjectBlocking(robj
*key
, robj
*val
) {
9089 off_t pages
= rdbSavedObjectPages(val
,NULL
);
9092 assert(key
->storage
== REDIS_VM_MEMORY
);
9093 assert(key
->refcount
== 1);
9094 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
;
9095 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
;
9096 key
->vm
.page
= page
;
9097 key
->vm
.usedpages
= pages
;
9098 key
->storage
= REDIS_VM_SWAPPED
;
9099 key
->vtype
= val
->type
;
9100 decrRefCount(val
); /* Deallocate the object from memory. */
9101 vmMarkPagesUsed(page
,pages
);
9102 redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)",
9103 (unsigned char*) key
->ptr
,
9104 (unsigned long long) page
, (unsigned long long) pages
);
9105 server
.vm_stats_swapped_objects
++;
9106 server
.vm_stats_swapouts
++;
9110 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
9113 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
9114 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
9115 redisLog(REDIS_WARNING
,
9116 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
9120 o
= rdbLoadObject(type
,server
.vm_fp
);
9122 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
9125 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9129 /* Load the value object relative to the 'key' object from swap to memory.
9130 * The newly allocated object is returned.
9132 * If preview is true the unserialized object is returned to the caller but
9133 * no changes are made to the key object, nor the pages are marked as freed */
9134 static robj
*vmGenericLoadObject(robj
*key
, int preview
) {
9137 redisAssert(key
->storage
== REDIS_VM_SWAPPED
|| key
->storage
== REDIS_VM_LOADING
);
9138 val
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
);
9140 key
->storage
= REDIS_VM_MEMORY
;
9141 key
->vm
.atime
= server
.unixtime
;
9142 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
9143 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk",
9144 (unsigned char*) key
->ptr
);
9145 server
.vm_stats_swapped_objects
--;
9147 redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk",
9148 (unsigned char*) key
->ptr
);
9150 server
.vm_stats_swapins
++;
9154 /* Plain object loading, from swap to memory */
9155 static robj
*vmLoadObject(robj
*key
) {
9156 /* If we are loading the object in background, stop it, we
9157 * need to load this object synchronously ASAP. */
9158 if (key
->storage
== REDIS_VM_LOADING
)
9159 vmCancelThreadedIOJob(key
);
9160 return vmGenericLoadObject(key
,0);
9163 /* Just load the value on disk, without to modify the key.
9164 * This is useful when we want to perform some operation on the value
9165 * without to really bring it from swap to memory, like while saving the
9166 * dataset or rewriting the append only log. */
9167 static robj
*vmPreviewObject(robj
*key
) {
9168 return vmGenericLoadObject(key
,1);
9171 /* How a good candidate is this object for swapping?
9172 * The better candidate it is, the greater the returned value.
9174 * Currently we try to perform a fast estimation of the object size in
9175 * memory, and combine it with aging informations.
9177 * Basically swappability = idle-time * log(estimated size)
9179 * Bigger objects are preferred over smaller objects, but not
9180 * proportionally, this is why we use the logarithm. This algorithm is
9181 * just a first try and will probably be tuned later. */
9182 static double computeObjectSwappability(robj
*o
) {
9183 time_t age
= server
.unixtime
- o
->vm
.atime
;
9187 struct dictEntry
*de
;
9190 if (age
<= 0) return 0;
9193 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
9196 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
9201 listNode
*ln
= listFirst(l
);
9203 asize
= sizeof(list
);
9205 robj
*ele
= ln
->value
;
9208 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9209 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9211 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
9216 z
= (o
->type
== REDIS_ZSET
);
9217 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
9219 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
9220 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
9225 de
= dictGetRandomKey(d
);
9226 ele
= dictGetEntryKey(de
);
9227 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9228 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9230 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
9231 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
9235 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
9236 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
9237 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
9238 unsigned int klen
, vlen
;
9239 unsigned char *key
, *val
;
9241 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
9245 asize
= len
*(klen
+vlen
+3);
9246 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
9248 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
9253 de
= dictGetRandomKey(d
);
9254 ele
= dictGetEntryKey(de
);
9255 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9256 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9258 ele
= dictGetEntryVal(de
);
9259 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9260 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9262 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
9267 return (double)age
*log(1+asize
);
9270 /* Try to swap an object that's a good candidate for swapping.
9271 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
9272 * to swap any object at all.
9274 * If 'usethreaded' is true, Redis will try to swap the object in background
9275 * using I/O threads. */
9276 static int vmSwapOneObject(int usethreads
) {
9278 struct dictEntry
*best
= NULL
;
9279 double best_swappability
= 0;
9280 redisDb
*best_db
= NULL
;
9283 for (j
= 0; j
< server
.dbnum
; j
++) {
9284 redisDb
*db
= server
.db
+j
;
9285 /* Why maxtries is set to 100?
9286 * Because this way (usually) we'll find 1 object even if just 1% - 2%
9287 * are swappable objects */
9290 if (dictSize(db
->dict
) == 0) continue;
9291 for (i
= 0; i
< 5; i
++) {
9293 double swappability
;
9295 if (maxtries
) maxtries
--;
9296 de
= dictGetRandomKey(db
->dict
);
9297 key
= dictGetEntryKey(de
);
9298 val
= dictGetEntryVal(de
);
9299 /* Only swap objects that are currently in memory.
9301 * Also don't swap shared objects if threaded VM is on, as we
9302 * try to ensure that the main thread does not touch the
9303 * object while the I/O thread is using it, but we can't
9304 * control other keys without adding additional mutex. */
9305 if (key
->storage
!= REDIS_VM_MEMORY
||
9306 (server
.vm_max_threads
!= 0 && val
->refcount
!= 1)) {
9307 if (maxtries
) i
--; /* don't count this try */
9310 swappability
= computeObjectSwappability(val
);
9311 if (!best
|| swappability
> best_swappability
) {
9313 best_swappability
= swappability
;
9318 if (best
== NULL
) return REDIS_ERR
;
9319 key
= dictGetEntryKey(best
);
9320 val
= dictGetEntryVal(best
);
9322 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
9323 key
->ptr
, best_swappability
);
9325 /* Unshare the key if needed */
9326 if (key
->refcount
> 1) {
9327 robj
*newkey
= dupStringObject(key
);
9329 key
= dictGetEntryKey(best
) = newkey
;
9333 vmSwapObjectThreaded(key
,val
,best_db
);
9336 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
9337 dictGetEntryVal(best
) = NULL
;
9345 static int vmSwapOneObjectBlocking() {
9346 return vmSwapOneObject(0);
9349 static int vmSwapOneObjectThreaded() {
9350 return vmSwapOneObject(1);
9353 /* Return true if it's safe to swap out objects in a given moment.
9354 * Basically we don't want to swap objects out while there is a BGSAVE
9355 * or a BGAEOREWRITE running in backgroud. */
9356 static int vmCanSwapOut(void) {
9357 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
9360 /* Delete a key if swapped. Returns 1 if the key was found, was swapped
9361 * and was deleted. Otherwise 0 is returned. */
9362 static int deleteIfSwapped(redisDb
*db
, robj
*key
) {
9366 if ((de
= dictFind(db
->dict
,key
)) == NULL
) return 0;
9367 foundkey
= dictGetEntryKey(de
);
9368 if (foundkey
->storage
== REDIS_VM_MEMORY
) return 0;
9373 /* =================== Virtual Memory - Threaded I/O ======================= */
9375 static void freeIOJob(iojob
*j
) {
9376 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
9377 j
->type
== REDIS_IOJOB_DO_SWAP
||
9378 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
9379 decrRefCount(j
->val
);
9380 /* We don't decrRefCount the j->key field as we did't incremented
9381 * the count creating IO Jobs. This is because the key field here is
9382 * just used as an indentifier and if a key is removed the Job should
9383 * never be touched again. */
9387 /* Every time a thread finished a Job, it writes a byte into the write side
9388 * of an unix pipe in order to "awake" the main thread, and this function
9390 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
9394 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
9396 REDIS_NOTUSED(mask
);
9397 REDIS_NOTUSED(privdata
);
9399 /* For every byte we read in the read side of the pipe, there is one
9400 * I/O job completed to process. */
9401 while((retval
= read(fd
,buf
,1)) == 1) {
9405 struct dictEntry
*de
;
9407 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
9409 /* Get the processed element (the oldest one) */
9411 assert(listLength(server
.io_processed
) != 0);
9412 if (toprocess
== -1) {
9413 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
9414 if (toprocess
<= 0) toprocess
= 1;
9416 ln
= listFirst(server
.io_processed
);
9418 listDelNode(server
.io_processed
,ln
);
9420 /* If this job is marked as canceled, just ignore it */
9425 /* Post process it in the main thread, as there are things we
9426 * can do just here to avoid race conditions and/or invasive locks */
9427 redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
);
9428 de
= dictFind(j
->db
->dict
,j
->key
);
9430 key
= dictGetEntryKey(de
);
9431 if (j
->type
== REDIS_IOJOB_LOAD
) {
9434 /* Key loaded, bring it at home */
9435 key
->storage
= REDIS_VM_MEMORY
;
9436 key
->vm
.atime
= server
.unixtime
;
9437 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
9438 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
9439 (unsigned char*) key
->ptr
);
9440 server
.vm_stats_swapped_objects
--;
9441 server
.vm_stats_swapins
++;
9442 dictGetEntryVal(de
) = j
->val
;
9443 incrRefCount(j
->val
);
9446 /* Handle clients waiting for this key to be loaded. */
9447 handleClientsBlockedOnSwappedKey(db
,key
);
9448 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9449 /* Now we know the amount of pages required to swap this object.
9450 * Let's find some space for it, and queue this task again
9451 * rebranded as REDIS_IOJOB_DO_SWAP. */
9452 if (!vmCanSwapOut() ||
9453 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
9455 /* Ooops... no space or we can't swap as there is
9456 * a fork()ed Redis trying to save stuff on disk. */
9458 key
->storage
= REDIS_VM_MEMORY
; /* undo operation */
9460 /* Note that we need to mark this pages as used now,
9461 * if the job will be canceled, we'll mark them as freed
9463 vmMarkPagesUsed(j
->page
,j
->pages
);
9464 j
->type
= REDIS_IOJOB_DO_SWAP
;
9469 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9472 /* Key swapped. We can finally free some memory. */
9473 if (key
->storage
!= REDIS_VM_SWAPPING
) {
9474 printf("key->storage: %d\n",key
->storage
);
9475 printf("key->name: %s\n",(char*)key
->ptr
);
9476 printf("key->refcount: %d\n",key
->refcount
);
9477 printf("val: %p\n",(void*)j
->val
);
9478 printf("val->type: %d\n",j
->val
->type
);
9479 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
9481 redisAssert(key
->storage
== REDIS_VM_SWAPPING
);
9482 val
= dictGetEntryVal(de
);
9483 key
->vm
.page
= j
->page
;
9484 key
->vm
.usedpages
= j
->pages
;
9485 key
->storage
= REDIS_VM_SWAPPED
;
9486 key
->vtype
= j
->val
->type
;
9487 decrRefCount(val
); /* Deallocate the object from memory. */
9488 dictGetEntryVal(de
) = NULL
;
9489 redisLog(REDIS_DEBUG
,
9490 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
9491 (unsigned char*) key
->ptr
,
9492 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
9493 server
.vm_stats_swapped_objects
++;
9494 server
.vm_stats_swapouts
++;
9496 /* Put a few more swap requests in queue if we are still
9498 if (trytoswap
&& vmCanSwapOut() &&
9499 zmalloc_used_memory() > server
.vm_max_memory
)
9504 more
= listLength(server
.io_newjobs
) <
9505 (unsigned) server
.vm_max_threads
;
9507 /* Don't waste CPU time if swappable objects are rare. */
9508 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
9516 if (processed
== toprocess
) return;
9518 if (retval
< 0 && errno
!= EAGAIN
) {
9519 redisLog(REDIS_WARNING
,
9520 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
9525 static void lockThreadedIO(void) {
9526 pthread_mutex_lock(&server
.io_mutex
);
9529 static void unlockThreadedIO(void) {
9530 pthread_mutex_unlock(&server
.io_mutex
);
9533 /* Remove the specified object from the threaded I/O queue if still not
9534 * processed, otherwise make sure to flag it as canceled. */
9535 static void vmCancelThreadedIOJob(robj
*o
) {
9537 server
.io_newjobs
, /* 0 */
9538 server
.io_processing
, /* 1 */
9539 server
.io_processed
/* 2 */
9543 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
9546 /* Search for a matching key in one of the queues */
9547 for (i
= 0; i
< 3; i
++) {
9551 listRewind(lists
[i
],&li
);
9552 while ((ln
= listNext(&li
)) != NULL
) {
9553 iojob
*job
= ln
->value
;
9555 if (job
->canceled
) continue; /* Skip this, already canceled. */
9556 if (job
->key
== o
) {
9557 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n",
9558 (void*)job
, (char*)o
->ptr
, job
->type
, i
);
9559 /* Mark the pages as free since the swap didn't happened
9560 * or happened but is now discarded. */
9561 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
9562 vmMarkPagesFree(job
->page
,job
->pages
);
9563 /* Cancel the job. It depends on the list the job is
9566 case 0: /* io_newjobs */
9567 /* If the job was yet not processed the best thing to do
9568 * is to remove it from the queue at all */
9570 listDelNode(lists
[i
],ln
);
9572 case 1: /* io_processing */
9573 /* Oh Shi- the thread is messing with the Job:
9575 * Probably it's accessing the object if this is a
9576 * PREPARE_SWAP or DO_SWAP job.
9577 * If it's a LOAD job it may be reading from disk and
9578 * if we don't wait for the job to terminate before to
9579 * cancel it, maybe in a few microseconds data can be
9580 * corrupted in this pages. So the short story is:
9582 * Better to wait for the job to move into the
9583 * next queue (processed)... */
9585 /* We try again and again until the job is completed. */
9587 /* But let's wait some time for the I/O thread
9588 * to finish with this job. After all this condition
9589 * should be very rare. */
9592 case 2: /* io_processed */
9593 /* The job was already processed, that's easy...
9594 * just mark it as canceled so that we'll ignore it
9595 * when processing completed jobs. */
9599 /* Finally we have to adjust the storage type of the object
9600 * in order to "UNDO" the operaiton. */
9601 if (o
->storage
== REDIS_VM_LOADING
)
9602 o
->storage
= REDIS_VM_SWAPPED
;
9603 else if (o
->storage
== REDIS_VM_SWAPPING
)
9604 o
->storage
= REDIS_VM_MEMORY
;
9611 assert(1 != 1); /* We should never reach this */
9614 static void *IOThreadEntryPoint(void *arg
) {
9619 pthread_detach(pthread_self());
9621 /* Get a new job to process */
9623 if (listLength(server
.io_newjobs
) == 0) {
9624 /* No new jobs in queue, exit. */
9625 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
9626 (long) pthread_self());
9627 server
.io_active_threads
--;
9631 ln
= listFirst(server
.io_newjobs
);
9633 listDelNode(server
.io_newjobs
,ln
);
9634 /* Add the job in the processing queue */
9635 j
->thread
= pthread_self();
9636 listAddNodeTail(server
.io_processing
,j
);
9637 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
9639 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
9640 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
9642 /* Process the Job */
9643 if (j
->type
== REDIS_IOJOB_LOAD
) {
9644 j
->val
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
);
9645 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9646 FILE *fp
= fopen("/dev/null","w+");
9647 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
9649 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9650 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
9654 /* Done: insert the job into the processed queue */
9655 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
9656 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
9658 listDelNode(server
.io_processing
,ln
);
9659 listAddNodeTail(server
.io_processed
,j
);
9662 /* Signal the main thread there is new stuff to process */
9663 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
9665 return NULL
; /* never reached */
9668 static void spawnIOThread(void) {
9670 sigset_t mask
, omask
;
9674 sigaddset(&mask
,SIGCHLD
);
9675 sigaddset(&mask
,SIGHUP
);
9676 sigaddset(&mask
,SIGPIPE
);
9677 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
9678 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
9679 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
9683 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
9684 server
.io_active_threads
++;
9687 /* We need to wait for the last thread to exit before we are able to
9688 * fork() in order to BGSAVE or BGREWRITEAOF. */
9689 static void waitEmptyIOJobsQueue(void) {
9691 int io_processed_len
;
9694 if (listLength(server
.io_newjobs
) == 0 &&
9695 listLength(server
.io_processing
) == 0 &&
9696 server
.io_active_threads
== 0)
9701 /* While waiting for empty jobs queue condition we post-process some
9702 * finshed job, as I/O threads may be hanging trying to write against
9703 * the io_ready_pipe_write FD but there are so much pending jobs that
9705 io_processed_len
= listLength(server
.io_processed
);
9707 if (io_processed_len
) {
9708 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
9709 usleep(1000); /* 1 millisecond */
9711 usleep(10000); /* 10 milliseconds */
9716 static void vmReopenSwapFile(void) {
9717 /* Note: we don't close the old one as we are in the child process
9718 * and don't want to mess at all with the original file object. */
9719 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
9720 if (server
.vm_fp
== NULL
) {
9721 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
9722 server
.vm_swap_file
);
9725 server
.vm_fd
= fileno(server
.vm_fp
);
9728 /* This function must be called while with threaded IO locked */
9729 static void queueIOJob(iojob
*j
) {
9730 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
9731 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
9732 listAddNodeTail(server
.io_newjobs
,j
);
9733 if (server
.io_active_threads
< server
.vm_max_threads
)
9737 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
9740 assert(key
->storage
== REDIS_VM_MEMORY
);
9741 assert(key
->refcount
== 1);
9743 j
= zmalloc(sizeof(*j
));
9744 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
9750 j
->thread
= (pthread_t
) -1;
9751 key
->storage
= REDIS_VM_SWAPPING
;
9759 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
9761 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
9762 * If there is not already a job loading the key, it is craeted.
9763 * The key is added to the io_keys list in the client structure, and also
9764 * in the hash table mapping swapped keys to waiting clients, that is,
9765 * server.io_waited_keys. */
9766 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
9767 struct dictEntry
*de
;
9771 /* If the key does not exist or is already in RAM we don't need to
9772 * block the client at all. */
9773 de
= dictFind(c
->db
->dict
,key
);
9774 if (de
== NULL
) return 0;
9775 o
= dictGetEntryKey(de
);
9776 if (o
->storage
== REDIS_VM_MEMORY
) {
9778 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
9779 /* We were swapping the key, undo it! */
9780 vmCancelThreadedIOJob(o
);
9784 /* OK: the key is either swapped, or being loaded just now. */
9786 /* Add the key to the list of keys this client is waiting for.
9787 * This maps clients to keys they are waiting for. */
9788 listAddNodeTail(c
->io_keys
,key
);
9791 /* Add the client to the swapped keys => clients waiting map. */
9792 de
= dictFind(c
->db
->io_keys
,key
);
9796 /* For every key we take a list of clients blocked for it */
9798 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
9800 assert(retval
== DICT_OK
);
9802 l
= dictGetEntryVal(de
);
9804 listAddNodeTail(l
,c
);
9806 /* Are we already loading the key from disk? If not create a job */
9807 if (o
->storage
== REDIS_VM_SWAPPED
) {
9810 o
->storage
= REDIS_VM_LOADING
;
9811 j
= zmalloc(sizeof(*j
));
9812 j
->type
= REDIS_IOJOB_LOAD
;
9815 j
->key
->vtype
= o
->vtype
;
9816 j
->page
= o
->vm
.page
;
9819 j
->thread
= (pthread_t
) -1;
9827 /* Preload keys for any command with first, last and step values for
9828 * the command keys prototype, as defined in the command table. */
9829 static void waitForMultipleSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
9831 if (cmd
->vm_firstkey
== 0) return;
9832 last
= cmd
->vm_lastkey
;
9833 if (last
< 0) last
= argc
+last
;
9834 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
) {
9835 redisAssert(j
< argc
);
9836 waitForSwappedKey(c
,argv
[j
]);
9840 /* Preload keys needed for the ZUNIONSTORE and ZINTERSTORE commands.
9841 * Note that the number of keys to preload is user-defined, so we need to
9842 * apply a sanity check against argc. */
9843 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
9847 num
= atoi(argv
[2]->ptr
);
9848 if (num
> (argc
-3)) return;
9849 for (i
= 0; i
< num
; i
++) {
9850 waitForSwappedKey(c
,argv
[3+i
]);
9854 /* Preload keys needed to execute the entire MULTI/EXEC block.
9856 * This function is called by blockClientOnSwappedKeys when EXEC is issued,
9857 * and will block the client when any command requires a swapped out value. */
9858 static void execBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
9860 struct redisCommand
*mcmd
;
9863 REDIS_NOTUSED(argc
);
9864 REDIS_NOTUSED(argv
);
9866 if (!(c
->flags
& REDIS_MULTI
)) return;
9867 for (i
= 0; i
< c
->mstate
.count
; i
++) {
9868 mcmd
= c
->mstate
.commands
[i
].cmd
;
9869 margc
= c
->mstate
.commands
[i
].argc
;
9870 margv
= c
->mstate
.commands
[i
].argv
;
9872 if (mcmd
->vm_preload_proc
!= NULL
) {
9873 mcmd
->vm_preload_proc(c
,mcmd
,margc
,margv
);
9875 waitForMultipleSwappedKeys(c
,mcmd
,margc
,margv
);
9880 /* Is this client attempting to run a command against swapped keys?
9881 * If so, block it ASAP, load the keys in background, then resume it.
9883 * The important idea about this function is that it can fail! If keys will
9884 * still be swapped when the client is resumed, this key lookups will
9885 * just block loading keys from disk. In practical terms this should only
9886 * happen with SORT BY command or if there is a bug in this function.
9888 * Return 1 if the client is marked as blocked, 0 if the client can
9889 * continue as the keys it is going to access appear to be in memory. */
9890 static int blockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
) {
9891 if (cmd
->vm_preload_proc
!= NULL
) {
9892 cmd
->vm_preload_proc(c
,cmd
,c
->argc
,c
->argv
);
9894 waitForMultipleSwappedKeys(c
,cmd
,c
->argc
,c
->argv
);
9897 /* If the client was blocked for at least one key, mark it as blocked. */
9898 if (listLength(c
->io_keys
)) {
9899 c
->flags
|= REDIS_IO_WAIT
;
9900 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
9901 server
.vm_blocked_clients
++;
9908 /* Remove the 'key' from the list of blocked keys for a given client.
9910 * The function returns 1 when there are no longer blocking keys after
9911 * the current one was removed (and the client can be unblocked). */
9912 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
9916 struct dictEntry
*de
;
9918 /* Remove the key from the list of keys this client is waiting for. */
9919 listRewind(c
->io_keys
,&li
);
9920 while ((ln
= listNext(&li
)) != NULL
) {
9921 if (equalStringObjects(ln
->value
,key
)) {
9922 listDelNode(c
->io_keys
,ln
);
9928 /* Remove the client form the key => waiting clients map. */
9929 de
= dictFind(c
->db
->io_keys
,key
);
9931 l
= dictGetEntryVal(de
);
9932 ln
= listSearchKey(l
,c
);
9935 if (listLength(l
) == 0)
9936 dictDelete(c
->db
->io_keys
,key
);
9938 return listLength(c
->io_keys
) == 0;
9941 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
9942 struct dictEntry
*de
;
9947 de
= dictFind(db
->io_keys
,key
);
9950 l
= dictGetEntryVal(de
);
9951 len
= listLength(l
);
9952 /* Note: we can't use something like while(listLength(l)) as the list
9953 * can be freed by the calling function when we remove the last element. */
9956 redisClient
*c
= ln
->value
;
9958 if (dontWaitForSwappedKey(c
,key
)) {
9959 /* Put the client in the list of clients ready to go as we
9960 * loaded all the keys about it. */
9961 listAddNodeTail(server
.io_ready_clients
,c
);
9966 /* =========================== Remote Configuration ========================= */
9968 static void configSetCommand(redisClient
*c
) {
9969 robj
*o
= getDecodedObject(c
->argv
[3]);
9972 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
9973 zfree(server
.dbfilename
);
9974 server
.dbfilename
= zstrdup(o
->ptr
);
9975 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
9976 zfree(server
.requirepass
);
9977 server
.requirepass
= zstrdup(o
->ptr
);
9978 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
9979 zfree(server
.masterauth
);
9980 server
.masterauth
= zstrdup(o
->ptr
);
9981 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
9982 if (getLongLongFromObject(o
,&ll
) == REDIS_ERR
||
9983 ll
< 0) goto badfmt
;
9984 server
.maxmemory
= ll
;
9985 } else if (!strcasecmp(c
->argv
[2]->ptr
,"timeout")) {
9986 if (getLongLongFromObject(o
,&ll
) == REDIS_ERR
||
9987 ll
< 0 || ll
> LONG_MAX
) goto badfmt
;
9988 server
.maxidletime
= ll
;
9989 } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendfsync")) {
9990 if (!strcasecmp(o
->ptr
,"no")) {
9991 server
.appendfsync
= APPENDFSYNC_NO
;
9992 } else if (!strcasecmp(o
->ptr
,"everysec")) {
9993 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
9994 } else if (!strcasecmp(o
->ptr
,"always")) {
9995 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
9999 } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendonly")) {
10000 int old
= server
.appendonly
;
10001 int new = yesnotoi(o
->ptr
);
10003 if (new == -1) goto badfmt
;
10008 if (startAppendOnly() == REDIS_ERR
) {
10009 addReplySds(c
,sdscatprintf(sdsempty(),
10010 "-ERR Unable to turn on AOF. Check server logs.\r\n"));
10016 } else if (!strcasecmp(c
->argv
[2]->ptr
,"save")) {
10018 sds
*v
= sdssplitlen(o
->ptr
,sdslen(o
->ptr
)," ",1,&vlen
);
10020 /* Perform sanity check before setting the new config:
10021 * - Even number of args
10022 * - Seconds >= 1, changes >= 0 */
10024 sdsfreesplitres(v
,vlen
);
10027 for (j
= 0; j
< vlen
; j
++) {
10031 val
= strtoll(v
[j
], &eptr
, 10);
10032 if (eptr
[0] != '\0' ||
10033 ((j
& 1) == 0 && val
< 1) ||
10034 ((j
& 1) == 1 && val
< 0)) {
10035 sdsfreesplitres(v
,vlen
);
10039 /* Finally set the new config */
10040 resetServerSaveParams();
10041 for (j
= 0; j
< vlen
; j
+= 2) {
10045 seconds
= strtoll(v
[j
],NULL
,10);
10046 changes
= strtoll(v
[j
+1],NULL
,10);
10047 appendServerSaveParams(seconds
, changes
);
10049 sdsfreesplitres(v
,vlen
);
10051 addReplySds(c
,sdscatprintf(sdsempty(),
10052 "-ERR not supported CONFIG parameter %s\r\n",
10053 (char*)c
->argv
[2]->ptr
));
10058 addReply(c
,shared
.ok
);
10061 badfmt
: /* Bad format errors */
10062 addReplySds(c
,sdscatprintf(sdsempty(),
10063 "-ERR invalid argument '%s' for CONFIG SET '%s'\r\n",
10065 (char*)c
->argv
[2]->ptr
));
10069 static void configGetCommand(redisClient
*c
) {
10070 robj
*o
= getDecodedObject(c
->argv
[2]);
10071 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
10072 char *pattern
= o
->ptr
;
10075 addReply(c
,lenobj
);
10076 decrRefCount(lenobj
);
10078 if (stringmatch(pattern
,"dbfilename",0)) {
10079 addReplyBulkCString(c
,"dbfilename");
10080 addReplyBulkCString(c
,server
.dbfilename
);
10083 if (stringmatch(pattern
,"requirepass",0)) {
10084 addReplyBulkCString(c
,"requirepass");
10085 addReplyBulkCString(c
,server
.requirepass
);
10088 if (stringmatch(pattern
,"masterauth",0)) {
10089 addReplyBulkCString(c
,"masterauth");
10090 addReplyBulkCString(c
,server
.masterauth
);
10093 if (stringmatch(pattern
,"maxmemory",0)) {
10096 ll2string(buf
,128,server
.maxmemory
);
10097 addReplyBulkCString(c
,"maxmemory");
10098 addReplyBulkCString(c
,buf
);
10101 if (stringmatch(pattern
,"timeout",0)) {
10104 ll2string(buf
,128,server
.maxidletime
);
10105 addReplyBulkCString(c
,"timeout");
10106 addReplyBulkCString(c
,buf
);
10109 if (stringmatch(pattern
,"appendonly",0)) {
10110 addReplyBulkCString(c
,"appendonly");
10111 addReplyBulkCString(c
,server
.appendonly
? "yes" : "no");
10114 if (stringmatch(pattern
,"appendfsync",0)) {
10117 switch(server
.appendfsync
) {
10118 case APPENDFSYNC_NO
: policy
= "no"; break;
10119 case APPENDFSYNC_EVERYSEC
: policy
= "everysec"; break;
10120 case APPENDFSYNC_ALWAYS
: policy
= "always"; break;
10121 default: policy
= "unknown"; break; /* too harmless to panic */
10123 addReplyBulkCString(c
,"appendfsync");
10124 addReplyBulkCString(c
,policy
);
10127 if (stringmatch(pattern
,"save",0)) {
10128 sds buf
= sdsempty();
10131 for (j
= 0; j
< server
.saveparamslen
; j
++) {
10132 buf
= sdscatprintf(buf
,"%ld %d",
10133 server
.saveparams
[j
].seconds
,
10134 server
.saveparams
[j
].changes
);
10135 if (j
!= server
.saveparamslen
-1)
10136 buf
= sdscatlen(buf
," ",1);
10138 addReplyBulkCString(c
,"save");
10139 addReplyBulkCString(c
,buf
);
10144 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
10147 static void configCommand(redisClient
*c
) {
10148 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
10149 if (c
->argc
!= 4) goto badarity
;
10150 configSetCommand(c
);
10151 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
10152 if (c
->argc
!= 3) goto badarity
;
10153 configGetCommand(c
);
10154 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
10155 if (c
->argc
!= 2) goto badarity
;
10156 server
.stat_numcommands
= 0;
10157 server
.stat_numconnections
= 0;
10158 server
.stat_expiredkeys
= 0;
10159 server
.stat_starttime
= time(NULL
);
10160 addReply(c
,shared
.ok
);
10162 addReplySds(c
,sdscatprintf(sdsempty(),
10163 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
10168 addReplySds(c
,sdscatprintf(sdsempty(),
10169 "-ERR Wrong number of arguments for CONFIG %s\r\n",
10170 (char*) c
->argv
[1]->ptr
));
10173 /* =========================== Pubsub implementation ======================== */
10175 static void freePubsubPattern(void *p
) {
10176 pubsubPattern
*pat
= p
;
10178 decrRefCount(pat
->pattern
);
10182 static int listMatchPubsubPattern(void *a
, void *b
) {
10183 pubsubPattern
*pa
= a
, *pb
= b
;
10185 return (pa
->client
== pb
->client
) &&
10186 (equalStringObjects(pa
->pattern
,pb
->pattern
));
10189 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
10190 * 0 if the client was already subscribed to that channel. */
10191 static int pubsubSubscribeChannel(redisClient
*c
, robj
*channel
) {
10192 struct dictEntry
*de
;
10193 list
*clients
= NULL
;
10196 /* Add the channel to the client -> channels hash table */
10197 if (dictAdd(c
->pubsub_channels
,channel
,NULL
) == DICT_OK
) {
10199 incrRefCount(channel
);
10200 /* Add the client to the channel -> list of clients hash table */
10201 de
= dictFind(server
.pubsub_channels
,channel
);
10203 clients
= listCreate();
10204 dictAdd(server
.pubsub_channels
,channel
,clients
);
10205 incrRefCount(channel
);
10207 clients
= dictGetEntryVal(de
);
10209 listAddNodeTail(clients
,c
);
10211 /* Notify the client */
10212 addReply(c
,shared
.mbulk3
);
10213 addReply(c
,shared
.subscribebulk
);
10214 addReplyBulk(c
,channel
);
10215 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
10219 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
10220 * 0 if the client was not subscribed to the specified channel. */
10221 static int pubsubUnsubscribeChannel(redisClient
*c
, robj
*channel
, int notify
) {
10222 struct dictEntry
*de
;
10227 /* Remove the channel from the client -> channels hash table */
10228 incrRefCount(channel
); /* channel may be just a pointer to the same object
10229 we have in the hash tables. Protect it... */
10230 if (dictDelete(c
->pubsub_channels
,channel
) == DICT_OK
) {
10232 /* Remove the client from the channel -> clients list hash table */
10233 de
= dictFind(server
.pubsub_channels
,channel
);
10234 assert(de
!= NULL
);
10235 clients
= dictGetEntryVal(de
);
10236 ln
= listSearchKey(clients
,c
);
10237 assert(ln
!= NULL
);
10238 listDelNode(clients
,ln
);
10239 if (listLength(clients
) == 0) {
10240 /* Free the list and associated hash entry at all if this was
10241 * the latest client, so that it will be possible to abuse
10242 * Redis PUBSUB creating millions of channels. */
10243 dictDelete(server
.pubsub_channels
,channel
);
10246 /* Notify the client */
10248 addReply(c
,shared
.mbulk3
);
10249 addReply(c
,shared
.unsubscribebulk
);
10250 addReplyBulk(c
,channel
);
10251 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+
10252 listLength(c
->pubsub_patterns
));
10255 decrRefCount(channel
); /* it is finally safe to release it */
10259 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */
10260 static int pubsubSubscribePattern(redisClient
*c
, robj
*pattern
) {
10263 if (listSearchKey(c
->pubsub_patterns
,pattern
) == NULL
) {
10265 pubsubPattern
*pat
;
10266 listAddNodeTail(c
->pubsub_patterns
,pattern
);
10267 incrRefCount(pattern
);
10268 pat
= zmalloc(sizeof(*pat
));
10269 pat
->pattern
= getDecodedObject(pattern
);
10271 listAddNodeTail(server
.pubsub_patterns
,pat
);
10273 /* Notify the client */
10274 addReply(c
,shared
.mbulk3
);
10275 addReply(c
,shared
.psubscribebulk
);
10276 addReplyBulk(c
,pattern
);
10277 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
10281 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
10282 * 0 if the client was not subscribed to the specified channel. */
10283 static int pubsubUnsubscribePattern(redisClient
*c
, robj
*pattern
, int notify
) {
10288 incrRefCount(pattern
); /* Protect the object. May be the same we remove */
10289 if ((ln
= listSearchKey(c
->pubsub_patterns
,pattern
)) != NULL
) {
10291 listDelNode(c
->pubsub_patterns
,ln
);
10293 pat
.pattern
= pattern
;
10294 ln
= listSearchKey(server
.pubsub_patterns
,&pat
);
10295 listDelNode(server
.pubsub_patterns
,ln
);
10297 /* Notify the client */
10299 addReply(c
,shared
.mbulk3
);
10300 addReply(c
,shared
.punsubscribebulk
);
10301 addReplyBulk(c
,pattern
);
10302 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+
10303 listLength(c
->pubsub_patterns
));
10305 decrRefCount(pattern
);
10309 /* Unsubscribe from all the channels. Return the number of channels the
10310 * client was subscribed from. */
10311 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
) {
10312 dictIterator
*di
= dictGetIterator(c
->pubsub_channels
);
10316 while((de
= dictNext(di
)) != NULL
) {
10317 robj
*channel
= dictGetEntryKey(de
);
10319 count
+= pubsubUnsubscribeChannel(c
,channel
,notify
);
10321 dictReleaseIterator(di
);
10325 /* Unsubscribe from all the patterns. Return the number of patterns the
10326 * client was subscribed from. */
10327 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
) {
10332 listRewind(c
->pubsub_patterns
,&li
);
10333 while ((ln
= listNext(&li
)) != NULL
) {
10334 robj
*pattern
= ln
->value
;
10336 count
+= pubsubUnsubscribePattern(c
,pattern
,notify
);
10341 /* Publish a message */
10342 static int pubsubPublishMessage(robj
*channel
, robj
*message
) {
10344 struct dictEntry
*de
;
10348 /* Send to clients listening for that channel */
10349 de
= dictFind(server
.pubsub_channels
,channel
);
10351 list
*list
= dictGetEntryVal(de
);
10355 listRewind(list
,&li
);
10356 while ((ln
= listNext(&li
)) != NULL
) {
10357 redisClient
*c
= ln
->value
;
10359 addReply(c
,shared
.mbulk3
);
10360 addReply(c
,shared
.messagebulk
);
10361 addReplyBulk(c
,channel
);
10362 addReplyBulk(c
,message
);
10366 /* Send to clients listening to matching channels */
10367 if (listLength(server
.pubsub_patterns
)) {
10368 listRewind(server
.pubsub_patterns
,&li
);
10369 channel
= getDecodedObject(channel
);
10370 while ((ln
= listNext(&li
)) != NULL
) {
10371 pubsubPattern
*pat
= ln
->value
;
10373 if (stringmatchlen((char*)pat
->pattern
->ptr
,
10374 sdslen(pat
->pattern
->ptr
),
10375 (char*)channel
->ptr
,
10376 sdslen(channel
->ptr
),0)) {
10377 addReply(pat
->client
,shared
.mbulk4
);
10378 addReply(pat
->client
,shared
.pmessagebulk
);
10379 addReplyBulk(pat
->client
,pat
->pattern
);
10380 addReplyBulk(pat
->client
,channel
);
10381 addReplyBulk(pat
->client
,message
);
10385 decrRefCount(channel
);
10390 static void subscribeCommand(redisClient
*c
) {
10393 for (j
= 1; j
< c
->argc
; j
++)
10394 pubsubSubscribeChannel(c
,c
->argv
[j
]);
10397 static void unsubscribeCommand(redisClient
*c
) {
10398 if (c
->argc
== 1) {
10399 pubsubUnsubscribeAllChannels(c
,1);
10404 for (j
= 1; j
< c
->argc
; j
++)
10405 pubsubUnsubscribeChannel(c
,c
->argv
[j
],1);
10409 static void psubscribeCommand(redisClient
*c
) {
10412 for (j
= 1; j
< c
->argc
; j
++)
10413 pubsubSubscribePattern(c
,c
->argv
[j
]);
10416 static void punsubscribeCommand(redisClient
*c
) {
10417 if (c
->argc
== 1) {
10418 pubsubUnsubscribeAllPatterns(c
,1);
10423 for (j
= 1; j
< c
->argc
; j
++)
10424 pubsubUnsubscribePattern(c
,c
->argv
[j
],1);
10428 static void publishCommand(redisClient
*c
) {
10429 int receivers
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]);
10430 addReplyLongLong(c
,receivers
);
10433 /* ===================== WATCH (CAS alike for MULTI/EXEC) ===================
10435 * The implementation uses a per-DB hash table mapping keys to list of clients
10436 * WATCHing those keys, so that given a key that is going to be modified
10437 * we can mark all the associated clients as dirty.
10439 * Also every client contains a list of WATCHed keys so that's possible to
10440 * un-watch such keys when the client is freed or when UNWATCH is called. */
10442 /* In the client->watched_keys list we need to use watchedKey structures
10443 * as in order to identify a key in Redis we need both the key name and the
10445 typedef struct watchedKey
{
10450 /* Watch for the specified key */
10451 static void watchForKey(redisClient
*c
, robj
*key
) {
10452 list
*clients
= NULL
;
10457 /* Check if we are already watching for this key */
10458 listRewind(c
->watched_keys
,&li
);
10459 while((ln
= listNext(&li
))) {
10460 wk
= listNodeValue(ln
);
10461 if (wk
->db
== c
->db
&& equalStringObjects(key
,wk
->key
))
10462 return; /* Key already watched */
10464 /* This key is not already watched in this DB. Let's add it */
10465 clients
= dictFetchValue(c
->db
->watched_keys
,key
);
10467 clients
= listCreate();
10468 dictAdd(c
->db
->watched_keys
,key
,clients
);
10471 listAddNodeTail(clients
,c
);
10472 /* Add the new key to the lits of keys watched by this client */
10473 wk
= zmalloc(sizeof(*wk
));
10477 listAddNodeTail(c
->watched_keys
,wk
);
10480 /* Unwatch all the keys watched by this client. To clean the EXEC dirty
10481 * flag is up to the caller. */
10482 static void unwatchAllKeys(redisClient
*c
) {
10486 if (listLength(c
->watched_keys
) == 0) return;
10487 listRewind(c
->watched_keys
,&li
);
10488 while((ln
= listNext(&li
))) {
10492 /* Lookup the watched key -> clients list and remove the client
10494 wk
= listNodeValue(ln
);
10495 clients
= dictFetchValue(wk
->db
->watched_keys
, wk
->key
);
10496 assert(clients
!= NULL
);
10497 listDelNode(clients
,listSearchKey(clients
,c
));
10498 /* Kill the entry at all if this was the only client */
10499 if (listLength(clients
) == 0)
10500 dictDelete(wk
->db
->watched_keys
, wk
->key
);
10501 /* Remove this watched key from the client->watched list */
10502 listDelNode(c
->watched_keys
,ln
);
10503 decrRefCount(wk
->key
);
10508 /* "Touch" a key, so that if this key is being WATCHed by some client the
10509 * next EXEC will fail. */
10510 static void touchWatchedKey(redisDb
*db
, robj
*key
) {
10515 if (dictSize(db
->watched_keys
) == 0) return;
10516 clients
= dictFetchValue(db
->watched_keys
, key
);
10517 if (!clients
) return;
10519 /* Mark all the clients watching this key as REDIS_DIRTY_CAS */
10520 /* Check if we are already watching for this key */
10521 listRewind(clients
,&li
);
10522 while((ln
= listNext(&li
))) {
10523 redisClient
*c
= listNodeValue(ln
);
10525 c
->flags
|= REDIS_DIRTY_CAS
;
10529 /* On FLUSHDB or FLUSHALL all the watched keys that are present before the
10530 * flush but will be deleted as effect of the flushing operation should
10531 * be touched. "dbid" is the DB that's getting the flush. -1 if it is
10532 * a FLUSHALL operation (all the DBs flushed). */
10533 static void touchWatchedKeysOnFlush(int dbid
) {
10537 /* For every client, check all the waited keys */
10538 listRewind(server
.clients
,&li1
);
10539 while((ln
= listNext(&li1
))) {
10540 redisClient
*c
= listNodeValue(ln
);
10541 listRewind(c
->watched_keys
,&li2
);
10542 while((ln
= listNext(&li2
))) {
10543 watchedKey
*wk
= listNodeValue(ln
);
10545 /* For every watched key matching the specified DB, if the
10546 * key exists, mark the client as dirty, as the key will be
10548 if (dbid
== -1 || wk
->db
->id
== dbid
) {
10549 if (dictFind(wk
->db
->dict
, wk
->key
) != NULL
)
10550 c
->flags
|= REDIS_DIRTY_CAS
;
10556 static void watchCommand(redisClient
*c
) {
10559 if (c
->flags
& REDIS_MULTI
) {
10560 addReplySds(c
,sdsnew("-ERR WATCH inside MULTI is not allowed\r\n"));
10563 for (j
= 1; j
< c
->argc
; j
++)
10564 watchForKey(c
,c
->argv
[j
]);
10565 addReply(c
,shared
.ok
);
10568 static void unwatchCommand(redisClient
*c
) {
10570 c
->flags
&= (~REDIS_DIRTY_CAS
);
10571 addReply(c
,shared
.ok
);
10574 /* ================================= Debugging ============================== */
10576 /* Compute the sha1 of string at 's' with 'len' bytes long.
10577 * The SHA1 is then xored againt the string pointed by digest.
10578 * Since xor is commutative, this operation is used in order to
10579 * "add" digests relative to unordered elements.
10581 * So digest(a,b,c,d) will be the same of digest(b,a,c,d) */
10582 static void xorDigest(unsigned char *digest
, void *ptr
, size_t len
) {
10584 unsigned char hash
[20], *s
= ptr
;
10588 SHA1Update(&ctx
,s
,len
);
10589 SHA1Final(hash
,&ctx
);
10591 for (j
= 0; j
< 20; j
++)
10592 digest
[j
] ^= hash
[j
];
10595 static void xorObjectDigest(unsigned char *digest
, robj
*o
) {
10596 o
= getDecodedObject(o
);
10597 xorDigest(digest
,o
->ptr
,sdslen(o
->ptr
));
10601 /* This function instead of just computing the SHA1 and xoring it
10602 * against diget, also perform the digest of "digest" itself and
10603 * replace the old value with the new one.
10605 * So the final digest will be:
10607 * digest = SHA1(digest xor SHA1(data))
10609 * This function is used every time we want to preserve the order so
10610 * that digest(a,b,c,d) will be different than digest(b,c,d,a)
10612 * Also note that mixdigest("foo") followed by mixdigest("bar")
10613 * will lead to a different digest compared to "fo", "obar".
10615 static void mixDigest(unsigned char *digest
, void *ptr
, size_t len
) {
10619 xorDigest(digest
,s
,len
);
10621 SHA1Update(&ctx
,digest
,20);
10622 SHA1Final(digest
,&ctx
);
10625 static void mixObjectDigest(unsigned char *digest
, robj
*o
) {
10626 o
= getDecodedObject(o
);
10627 mixDigest(digest
,o
->ptr
,sdslen(o
->ptr
));
10631 /* Compute the dataset digest. Since keys, sets elements, hashes elements
10632 * are not ordered, we use a trick: every aggregate digest is the xor
10633 * of the digests of their elements. This way the order will not change
10634 * the result. For list instead we use a feedback entering the output digest
10635 * as input in order to ensure that a different ordered list will result in
10636 * a different digest. */
10637 static void computeDatasetDigest(unsigned char *final
) {
10638 unsigned char digest
[20];
10640 dictIterator
*di
= NULL
;
10645 memset(final
,0,20); /* Start with a clean result */
10647 for (j
= 0; j
< server
.dbnum
; j
++) {
10648 redisDb
*db
= server
.db
+j
;
10650 if (dictSize(db
->dict
) == 0) continue;
10651 di
= dictGetIterator(db
->dict
);
10653 /* hash the DB id, so the same dataset moved in a different
10654 * DB will lead to a different digest */
10656 mixDigest(final
,&aux
,sizeof(aux
));
10658 /* Iterate this DB writing every entry */
10659 while((de
= dictNext(di
)) != NULL
) {
10660 robj
*key
, *o
, *kcopy
;
10663 memset(digest
,0,20); /* This key-val digest */
10664 key
= dictGetEntryKey(de
);
10666 if (!server
.vm_enabled
) {
10667 mixObjectDigest(digest
,key
);
10668 o
= dictGetEntryVal(de
);
10670 /* Don't work with the key directly as when VM is active
10671 * this is unsafe: TODO: fix decrRefCount to check if the
10672 * count really reached 0 to avoid this mess */
10673 kcopy
= dupStringObject(key
);
10674 mixObjectDigest(digest
,kcopy
);
10675 o
= lookupKeyRead(db
,kcopy
);
10676 decrRefCount(kcopy
);
10678 aux
= htonl(o
->type
);
10679 mixDigest(digest
,&aux
,sizeof(aux
));
10680 expiretime
= getExpire(db
,key
);
10682 /* Save the key and associated value */
10683 if (o
->type
== REDIS_STRING
) {
10684 mixObjectDigest(digest
,o
);
10685 } else if (o
->type
== REDIS_LIST
) {
10686 list
*list
= o
->ptr
;
10690 listRewind(list
,&li
);
10691 while((ln
= listNext(&li
))) {
10692 robj
*eleobj
= listNodeValue(ln
);
10694 mixObjectDigest(digest
,eleobj
);
10696 } else if (o
->type
== REDIS_SET
) {
10697 dict
*set
= o
->ptr
;
10698 dictIterator
*di
= dictGetIterator(set
);
10701 while((de
= dictNext(di
)) != NULL
) {
10702 robj
*eleobj
= dictGetEntryKey(de
);
10704 xorObjectDigest(digest
,eleobj
);
10706 dictReleaseIterator(di
);
10707 } else if (o
->type
== REDIS_ZSET
) {
10709 dictIterator
*di
= dictGetIterator(zs
->dict
);
10712 while((de
= dictNext(di
)) != NULL
) {
10713 robj
*eleobj
= dictGetEntryKey(de
);
10714 double *score
= dictGetEntryVal(de
);
10715 unsigned char eledigest
[20];
10717 snprintf(buf
,sizeof(buf
),"%.17g",*score
);
10718 memset(eledigest
,0,20);
10719 mixObjectDigest(eledigest
,eleobj
);
10720 mixDigest(eledigest
,buf
,strlen(buf
));
10721 xorDigest(digest
,eledigest
,20);
10723 dictReleaseIterator(di
);
10724 } else if (o
->type
== REDIS_HASH
) {
10728 hi
= hashInitIterator(o
);
10729 while (hashNext(hi
) != REDIS_ERR
) {
10730 unsigned char eledigest
[20];
10732 memset(eledigest
,0,20);
10733 obj
= hashCurrent(hi
,REDIS_HASH_KEY
);
10734 mixObjectDigest(eledigest
,obj
);
10736 obj
= hashCurrent(hi
,REDIS_HASH_VALUE
);
10737 mixObjectDigest(eledigest
,obj
);
10739 xorDigest(digest
,eledigest
,20);
10741 hashReleaseIterator(hi
);
10743 redisPanic("Unknown object type");
10745 /* If the key has an expire, add it to the mix */
10746 if (expiretime
!= -1) xorDigest(digest
,"!!expire!!",10);
10747 /* We can finally xor the key-val digest to the final digest */
10748 xorDigest(final
,digest
,20);
10750 dictReleaseIterator(di
);
10754 static void debugCommand(redisClient
*c
) {
10755 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
10756 *((char*)-1) = 'x';
10757 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
10758 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
10759 addReply(c
,shared
.err
);
10763 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
10764 addReply(c
,shared
.err
);
10767 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
10768 addReply(c
,shared
.ok
);
10769 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
10771 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
10772 addReply(c
,shared
.err
);
10775 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
10776 addReply(c
,shared
.ok
);
10777 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
10778 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
10782 addReply(c
,shared
.nokeyerr
);
10785 key
= dictGetEntryKey(de
);
10786 val
= dictGetEntryVal(de
);
10787 if (!server
.vm_enabled
|| (key
->storage
== REDIS_VM_MEMORY
||
10788 key
->storage
== REDIS_VM_SWAPPING
)) {
10792 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
10793 strenc
= strencoding
[val
->encoding
];
10795 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
10798 addReplySds(c
,sdscatprintf(sdsempty(),
10799 "+Key at:%p refcount:%d, value at:%p refcount:%d "
10800 "encoding:%s serializedlength:%lld\r\n",
10801 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
,
10802 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
10804 addReplySds(c
,sdscatprintf(sdsempty(),
10805 "+Key at:%p refcount:%d, value swapped at: page %llu "
10806 "using %llu pages\r\n",
10807 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
,
10808 (unsigned long long) key
->vm
.usedpages
));
10810 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapin") && c
->argc
== 3) {
10811 lookupKeyRead(c
->db
,c
->argv
[2]);
10812 addReply(c
,shared
.ok
);
10813 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
10814 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
10817 if (!server
.vm_enabled
) {
10818 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
10822 addReply(c
,shared
.nokeyerr
);
10825 key
= dictGetEntryKey(de
);
10826 val
= dictGetEntryVal(de
);
10827 /* If the key is shared we want to create a copy */
10828 if (key
->refcount
> 1) {
10829 robj
*newkey
= dupStringObject(key
);
10831 key
= dictGetEntryKey(de
) = newkey
;
10834 if (key
->storage
!= REDIS_VM_MEMORY
) {
10835 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
10836 } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
10837 dictGetEntryVal(de
) = NULL
;
10838 addReply(c
,shared
.ok
);
10840 addReply(c
,shared
.err
);
10842 } else if (!strcasecmp(c
->argv
[1]->ptr
,"populate") && c
->argc
== 3) {
10847 if (getLongFromObjectOrReply(c
, c
->argv
[2], &keys
, NULL
) != REDIS_OK
)
10849 for (j
= 0; j
< keys
; j
++) {
10850 snprintf(buf
,sizeof(buf
),"key:%lu",j
);
10851 key
= createStringObject(buf
,strlen(buf
));
10852 if (lookupKeyRead(c
->db
,key
) != NULL
) {
10856 snprintf(buf
,sizeof(buf
),"value:%lu",j
);
10857 val
= createStringObject(buf
,strlen(buf
));
10858 dictAdd(c
->db
->dict
,key
,val
);
10860 addReply(c
,shared
.ok
);
10861 } else if (!strcasecmp(c
->argv
[1]->ptr
,"digest") && c
->argc
== 2) {
10862 unsigned char digest
[20];
10863 sds d
= sdsnew("+");
10866 computeDatasetDigest(digest
);
10867 for (j
= 0; j
< 20; j
++)
10868 d
= sdscatprintf(d
, "%02x",digest
[j
]);
10870 d
= sdscatlen(d
,"\r\n",2);
10873 addReplySds(c
,sdsnew(
10874 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n"));
10878 static void _redisAssert(char *estr
, char *file
, int line
) {
10879 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
10880 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true",file
,line
,estr
);
10881 #ifdef HAVE_BACKTRACE
10882 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
10883 *((char*)-1) = 'x';
10887 static void _redisPanic(char *msg
, char *file
, int line
) {
10888 redisLog(REDIS_WARNING
,"!!! Software Failure. Press left mouse button to continue");
10889 redisLog(REDIS_WARNING
,"Guru Meditation: %s #%s:%d",msg
,file
,line
);
10890 #ifdef HAVE_BACKTRACE
10891 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
10892 *((char*)-1) = 'x';
10896 /* =================================== Main! ================================ */
10899 int linuxOvercommitMemoryValue(void) {
10900 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
10903 if (!fp
) return -1;
10904 if (fgets(buf
,64,fp
) == NULL
) {
10913 void linuxOvercommitMemoryWarning(void) {
10914 if (linuxOvercommitMemoryValue() == 0) {
10915 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
10918 #endif /* __linux__ */
10920 static void daemonize(void) {
10924 if (fork() != 0) exit(0); /* parent exits */
10925 setsid(); /* create a new session */
10927 /* Every output goes to /dev/null. If Redis is daemonized but
10928 * the 'logfile' is set to 'stdout' in the configuration file
10929 * it will not log at all. */
10930 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
10931 dup2(fd
, STDIN_FILENO
);
10932 dup2(fd
, STDOUT_FILENO
);
10933 dup2(fd
, STDERR_FILENO
);
10934 if (fd
> STDERR_FILENO
) close(fd
);
10936 /* Try to write the pid file */
10937 fp
= fopen(server
.pidfile
,"w");
10939 fprintf(fp
,"%d\n",getpid());
10944 static void version() {
10945 printf("Redis server version %s (%s:%d)\n", REDIS_VERSION
,
10946 REDIS_GIT_SHA1
, atoi(REDIS_GIT_DIRTY
) > 0);
10950 static void usage() {
10951 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
10952 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
10956 int main(int argc
, char **argv
) {
10959 initServerConfig();
10960 sortCommandTable();
10962 if (strcmp(argv
[1], "-v") == 0 ||
10963 strcmp(argv
[1], "--version") == 0) version();
10964 if (strcmp(argv
[1], "--help") == 0) usage();
10965 resetServerSaveParams();
10966 loadServerConfig(argv
[1]);
10967 } else if ((argc
> 2)) {
10970 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
10972 if (server
.daemonize
) daemonize();
10974 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
10976 linuxOvercommitMemoryWarning();
10978 start
= time(NULL
);
10979 if (server
.appendonly
) {
10980 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
10981 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
10983 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
10984 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
10986 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
10987 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
10989 aeDeleteEventLoop(server
.el
);
10993 /* ============================= Backtrace support ========================= */
10995 #ifdef HAVE_BACKTRACE
10996 static char *findFuncName(void *pointer
, unsigned long *offset
);
10998 static void *getMcontextEip(ucontext_t
*uc
) {
10999 #if defined(__FreeBSD__)
11000 return (void*) uc
->uc_mcontext
.mc_eip
;
11001 #elif defined(__dietlibc__)
11002 return (void*) uc
->uc_mcontext
.eip
;
11003 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
11005 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
11007 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
11009 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
11010 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
11011 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
11013 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
11015 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
11016 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
11017 #elif defined(__ia64__) /* Linux IA64 */
11018 return (void*) uc
->uc_mcontext
.sc_ip
;
11024 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
11026 char **messages
= NULL
;
11027 int i
, trace_size
= 0;
11028 unsigned long offset
=0;
11029 ucontext_t
*uc
= (ucontext_t
*) secret
;
11031 REDIS_NOTUSED(info
);
11033 redisLog(REDIS_WARNING
,
11034 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
11035 infostring
= genRedisInfoString();
11036 redisLog(REDIS_WARNING
, "%s",infostring
);
11037 /* It's not safe to sdsfree() the returned string under memory
11038 * corruption conditions. Let it leak as we are going to abort */
11040 trace_size
= backtrace(trace
, 100);
11041 /* overwrite sigaction with caller's address */
11042 if (getMcontextEip(uc
) != NULL
) {
11043 trace
[1] = getMcontextEip(uc
);
11045 messages
= backtrace_symbols(trace
, trace_size
);
11047 for (i
=1; i
<trace_size
; ++i
) {
11048 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
11050 p
= strchr(messages
[i
],'+');
11051 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
11052 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
11054 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
11057 /* free(messages); Don't call free() with possibly corrupted memory. */
11061 static void sigtermHandler(int sig
) {
11062 REDIS_NOTUSED(sig
);
11064 redisLog(REDIS_WARNING
,"SIGTERM received, scheduling shutting down...");
11065 server
.shutdown_asap
= 1;
11068 static void setupSigSegvAction(void) {
11069 struct sigaction act
;
11071 sigemptyset (&act
.sa_mask
);
11072 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
11073 * is used. Otherwise, sa_handler is used */
11074 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
11075 act
.sa_sigaction
= segvHandler
;
11076 sigaction (SIGSEGV
, &act
, NULL
);
11077 sigaction (SIGBUS
, &act
, NULL
);
11078 sigaction (SIGFPE
, &act
, NULL
);
11079 sigaction (SIGILL
, &act
, NULL
);
11080 sigaction (SIGBUS
, &act
, NULL
);
11082 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
;
11083 act
.sa_handler
= sigtermHandler
;
11084 sigaction (SIGTERM
, &act
, NULL
);
11088 #include "staticsymbols.h"
11089 /* This function try to convert a pointer into a function name. It's used in
11090 * oreder to provide a backtrace under segmentation fault that's able to
11091 * display functions declared as static (otherwise the backtrace is useless). */
11092 static char *findFuncName(void *pointer
, unsigned long *offset
){
11094 unsigned long off
, minoff
= 0;
11096 /* Try to match against the Symbol with the smallest offset */
11097 for (i
=0; symsTable
[i
].pointer
; i
++) {
11098 unsigned long lp
= (unsigned long) pointer
;
11100 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
11101 off
=lp
-symsTable
[i
].pointer
;
11102 if (ret
< 0 || off
< minoff
) {
11108 if (ret
== -1) return NULL
;
11110 return symsTable
[ret
].name
;
11112 #else /* HAVE_BACKTRACE */
11113 static void setupSigSegvAction(void) {
11115 #endif /* HAVE_BACKTRACE */