2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "2.1.1"
45 #endif /* HAVE_BACKTRACE */
53 #include <arpa/inet.h>
57 #include <sys/resource.h>
65 #include "solarisfixes.h"
69 #include "ae.h" /* Event driven programming library */
70 #include "sds.h" /* Dynamic safe strings */
71 #include "anet.h" /* Networking the easy way */
72 #include "dict.h" /* Hash tables */
73 #include "adlist.h" /* Linked lists */
74 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
75 #include "lzf.h" /* LZF compression library */
76 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
77 #include "zipmap.h" /* Compact dictionary-alike data structure */
78 #include "sha1.h" /* SHA1 is used for DEBUG DIGEST */
79 #include "release.h" /* Release and/or git repository information */
85 /* Static server configuration */
86 #define REDIS_SERVERPORT 6379 /* TCP port */
87 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
88 #define REDIS_IOBUF_LEN 1024
89 #define REDIS_LOADBUF_LEN 1024
90 #define REDIS_STATIC_ARGS 8
91 #define REDIS_DEFAULT_DBNUM 16
92 #define REDIS_CONFIGLINE_MAX 1024
93 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
94 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
95 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* lookup 10 expires per loop */
96 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
97 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
99 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
100 #define REDIS_WRITEV_THRESHOLD 3
101 /* Max number of iovecs used for each writev call */
102 #define REDIS_WRITEV_IOVEC_COUNT 256
104 /* Hash table parameters */
105 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
108 #define REDIS_CMD_BULK 1 /* Bulk write command */
109 #define REDIS_CMD_INLINE 2 /* Inline command */
110 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
111 this flags will return an error when the 'maxmemory' option is set in the
112 config file and the server is using more than maxmemory bytes of memory.
113 In short this commands are denied on low memory conditions. */
114 #define REDIS_CMD_DENYOOM 4
115 #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */
118 #define REDIS_STRING 0
124 /* Objects encoding. Some kind of objects like Strings and Hashes can be
125 * internally represented in multiple ways. The 'encoding' field of the object
126 * is set to one of this fields for this object. */
127 #define REDIS_ENCODING_RAW 0 /* Raw representation */
128 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
129 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
130 #define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
132 static char* strencoding
[] = {
133 "raw", "int", "zipmap", "hashtable"
136 /* Object types only used for dumping to disk */
137 #define REDIS_EXPIRETIME 253
138 #define REDIS_SELECTDB 254
139 #define REDIS_EOF 255
141 /* Defines related to the dump file format. To store 32 bits lengths for short
142 * keys requires a lot of space, so we check the most significant 2 bits of
143 * the first byte to interpreter the length:
145 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
146 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
147 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
148 * 11|000000 this means: specially encoded object will follow. The six bits
149 * number specify the kind of object that follows.
150 * See the REDIS_RDB_ENC_* defines.
152 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
153 * values, will fit inside. */
154 #define REDIS_RDB_6BITLEN 0
155 #define REDIS_RDB_14BITLEN 1
156 #define REDIS_RDB_32BITLEN 2
157 #define REDIS_RDB_ENCVAL 3
158 #define REDIS_RDB_LENERR UINT_MAX
160 /* When a length of a string object stored on disk has the first two bits
161 * set, the remaining two bits specify a special encoding for the object
162 * accordingly to the following defines: */
163 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
164 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
165 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
166 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
168 /* Virtual memory object->where field. */
169 #define REDIS_VM_MEMORY 0 /* The object is on memory */
170 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
171 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
172 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
174 /* Virtual memory static configuration stuff.
175 * Check vmFindContiguousPages() to know more about this magic numbers. */
176 #define REDIS_VM_MAX_NEAR_PAGES 65536
177 #define REDIS_VM_MAX_RANDOM_JUMP 4096
178 #define REDIS_VM_MAX_THREADS 32
179 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
180 /* The following is the *percentage* of completed I/O jobs to process when the
181 * handelr is called. While Virtual Memory I/O operations are performed by
182 * threads, this operations must be processed by the main thread when completed
183 * in order to take effect. */
184 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
187 #define REDIS_SLAVE 1 /* This client is a slave server */
188 #define REDIS_MASTER 2 /* This client is a master server */
189 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
190 #define REDIS_MULTI 8 /* This client is in a MULTI context */
191 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
192 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
193 #define REDIS_DIRTY_CAS 64 /* Watched keys modified. EXEC will fail. */
195 /* Slave replication state - slave side */
196 #define REDIS_REPL_NONE 0 /* No active replication */
197 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
198 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
200 /* Slave replication state - from the point of view of master
201 * Note that in SEND_BULK and ONLINE state the slave receives new updates
202 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
203 * to start the next background saving in order to send updates to it. */
204 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
205 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
206 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
207 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
209 /* List related stuff */
213 /* Sort operations */
214 #define REDIS_SORT_GET 0
215 #define REDIS_SORT_ASC 1
216 #define REDIS_SORT_DESC 2
217 #define REDIS_SORTKEY_MAX 1024
220 #define REDIS_DEBUG 0
221 #define REDIS_VERBOSE 1
222 #define REDIS_NOTICE 2
223 #define REDIS_WARNING 3
225 /* Anti-warning macro... */
226 #define REDIS_NOTUSED(V) ((void) V)
228 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
229 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
231 /* Append only defines */
232 #define APPENDFSYNC_NO 0
233 #define APPENDFSYNC_ALWAYS 1
234 #define APPENDFSYNC_EVERYSEC 2
236 /* Hashes related defaults */
237 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
238 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
240 /* We can print the stacktrace, so our assert is defined this way: */
241 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
242 #define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1)
243 static void _redisAssert(char *estr
, char *file
, int line
);
244 static void _redisPanic(char *msg
, char *file
, int line
);
246 /*================================= Data types ============================== */
248 /* A redis object, that is a type able to hold a string / list / set */
250 /* The VM object structure */
251 struct redisObjectVM
{
252 off_t page
; /* the page at witch the object is stored on disk */
253 off_t usedpages
; /* number of pages used on disk */
254 time_t atime
; /* Last access time */
257 /* The actual Redis Object */
258 typedef struct redisObject
{
261 unsigned char encoding
;
262 unsigned char storage
; /* If this object is a key, where is the value?
263 * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
264 unsigned char vtype
; /* If this object is a key, and value is swapped out,
265 * this is the type of the swapped out object. */
267 /* VM fields, this are only allocated if VM is active, otherwise the
268 * object allocation function will just allocate
269 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
270 * Redis without VM active will not have any overhead. */
271 struct redisObjectVM vm
;
274 /* Macro used to initalize a Redis object allocated on the stack.
275 * Note that this macro is taken near the structure definition to make sure
276 * we'll update it when the structure is changed, to avoid bugs like
277 * bug #85 introduced exactly in this way. */
278 #define initStaticStringObject(_var,_ptr) do { \
280 _var.type = REDIS_STRING; \
281 _var.encoding = REDIS_ENCODING_RAW; \
283 if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \
286 typedef struct redisDb
{
287 dict
*dict
; /* The keyspace for this DB */
288 dict
*expires
; /* Timeout of keys with a timeout set */
289 dict
*blocking_keys
; /* Keys with clients waiting for data (BLPOP) */
290 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
291 dict
*watched_keys
; /* WATCHED keys for MULTI/EXEC CAS */
295 /* Client MULTI/EXEC state */
296 typedef struct multiCmd
{
299 struct redisCommand
*cmd
;
302 typedef struct multiState
{
303 multiCmd
*commands
; /* Array of MULTI commands */
304 int count
; /* Total number of MULTI commands */
307 /* With multiplexing we need to take per-clinet state.
308 * Clients are taken in a liked list. */
309 typedef struct redisClient
{
314 robj
**argv
, **mbargv
;
316 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
317 int multibulk
; /* multi bulk command format active */
320 time_t lastinteraction
; /* time of the last interaction, used for timeout */
321 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
322 int slaveseldb
; /* slave selected db, if this client is a slave */
323 int authenticated
; /* when requirepass is non-NULL */
324 int replstate
; /* replication state if this is a slave */
325 int repldbfd
; /* replication DB file descriptor */
326 long repldboff
; /* replication DB file offset */
327 off_t repldbsize
; /* replication DB file size */
328 multiState mstate
; /* MULTI/EXEC state */
329 robj
**blocking_keys
; /* The key we are waiting to terminate a blocking
330 * operation such as BLPOP. Otherwise NULL. */
331 int blocking_keys_num
; /* Number of blocking keys */
332 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
333 * is >= blockingto then the operation timed out. */
334 list
*io_keys
; /* Keys this client is waiting to be loaded from the
335 * swap file in order to continue. */
336 list
*watched_keys
; /* Keys WATCHED for MULTI/EXEC CAS */
337 dict
*pubsub_channels
; /* channels a client is interested in (SUBSCRIBE) */
338 list
*pubsub_patterns
; /* patterns a client is interested in (SUBSCRIBE) */
346 /* Global server state structure */
351 long long dirty
; /* changes to DB from the last save */
353 list
*slaves
, *monitors
;
354 char neterr
[ANET_ERR_LEN
];
356 int cronloops
; /* number of times the cron function run */
357 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
358 time_t lastsave
; /* Unix time of last save succeeede */
359 /* Fields used only for stats */
360 time_t stat_starttime
; /* server start time */
361 long long stat_numcommands
; /* number of processed commands */
362 long long stat_numconnections
; /* number of connections received */
363 long long stat_expiredkeys
; /* number of expired keys */
372 int no_appendfsync_on_rewrite
;
378 pid_t bgsavechildpid
;
379 pid_t bgrewritechildpid
;
380 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
381 sds aofbuf
; /* AOF buffer, written before entering the event loop */
382 struct saveparam
*saveparams
;
387 char *appendfilename
;
391 /* Replication related */
396 redisClient
*master
; /* client that is master for this slave */
398 unsigned int maxclients
;
399 unsigned long long maxmemory
;
400 unsigned int blpop_blocked_clients
;
401 unsigned int vm_blocked_clients
;
402 /* Sort parameters - qsort_r() is only available under BSD so we
403 * have to take this state global, in order to pass it to sortCompare() */
407 /* Virtual memory configuration */
412 unsigned long long vm_max_memory
;
414 size_t hash_max_zipmap_entries
;
415 size_t hash_max_zipmap_value
;
416 /* Virtual memory state */
419 off_t vm_next_page
; /* Next probably empty page */
420 off_t vm_near_pages
; /* Number of pages allocated sequentially */
421 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
422 time_t unixtime
; /* Unix time sampled every second. */
423 /* Virtual memory I/O threads stuff */
424 /* An I/O thread process an element taken from the io_jobs queue and
425 * put the result of the operation in the io_done list. While the
426 * job is being processed, it's put on io_processing queue. */
427 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
428 list
*io_processing
; /* List of VM I/O jobs being processed */
429 list
*io_processed
; /* List of VM I/O jobs already processed */
430 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
431 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
432 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
433 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
434 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
435 int io_active_threads
; /* Number of running I/O threads */
436 int vm_max_threads
; /* Max number of I/O threads running at the same time */
437 /* Our main thread is blocked on the event loop, locking for sockets ready
438 * to be read or written, so when a threaded I/O operation is ready to be
439 * processed by the main thread, the I/O thread will use a unix pipe to
440 * awake the main thread. The followings are the two pipe FDs. */
441 int io_ready_pipe_read
;
442 int io_ready_pipe_write
;
443 /* Virtual memory stats */
444 unsigned long long vm_stats_used_pages
;
445 unsigned long long vm_stats_swapped_objects
;
446 unsigned long long vm_stats_swapouts
;
447 unsigned long long vm_stats_swapins
;
449 dict
*pubsub_channels
; /* Map channels to list of subscribed clients */
450 list
*pubsub_patterns
; /* A list of pubsub_patterns */
455 typedef struct pubsubPattern
{
460 typedef void redisCommandProc(redisClient
*c
);
461 typedef void redisVmPreloadProc(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
462 struct redisCommand
{
464 redisCommandProc
*proc
;
467 /* Use a function to determine which keys need to be loaded
468 * in the background prior to executing this command. Takes precedence
469 * over vm_firstkey and others, ignored when NULL */
470 redisVmPreloadProc
*vm_preload_proc
;
471 /* What keys should be loaded in background when calling this command? */
472 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
473 int vm_lastkey
; /* THe last argument that's a key */
474 int vm_keystep
; /* The step between first and last key */
477 struct redisFunctionSym
{
479 unsigned long pointer
;
482 typedef struct _redisSortObject
{
490 typedef struct _redisSortOperation
{
493 } redisSortOperation
;
495 /* ZSETs use a specialized version of Skiplists */
497 typedef struct zskiplistNode
{
498 struct zskiplistNode
**forward
;
499 struct zskiplistNode
*backward
;
505 typedef struct zskiplist
{
506 struct zskiplistNode
*header
, *tail
;
507 unsigned long length
;
511 typedef struct zset
{
516 /* Our shared "common" objects */
518 #define REDIS_SHARED_INTEGERS 10000
519 struct sharedObjectsStruct
{
520 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
521 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
522 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
523 *outofrangeerr
, *plus
,
524 *select0
, *select1
, *select2
, *select3
, *select4
,
525 *select5
, *select6
, *select7
, *select8
, *select9
,
526 *messagebulk
, *pmessagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
,
527 *mbulk4
, *psubscribebulk
, *punsubscribebulk
,
528 *integers
[REDIS_SHARED_INTEGERS
];
531 /* Global vars that are actally used as constants. The following double
532 * values are used for double on-disk serialization, and are initialized
533 * at runtime to avoid strange compiler optimizations. */
535 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
537 /* VM threaded I/O request message */
538 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
539 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
540 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
541 typedef struct iojob
{
542 int type
; /* Request type, REDIS_IOJOB_* */
543 redisDb
*db
;/* Redis database */
544 robj
*key
; /* This I/O request is about swapping this key */
545 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
546 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
547 off_t page
; /* Swap page where to read/write the object */
548 off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */
549 int canceled
; /* True if this command was canceled by blocking side of VM */
550 pthread_t thread
; /* ID of the thread processing this entry */
553 /*================================ Prototypes =============================== */
555 static void freeStringObject(robj
*o
);
556 static void freeListObject(robj
*o
);
557 static void freeSetObject(robj
*o
);
558 static void decrRefCount(void *o
);
559 static robj
*createObject(int type
, void *ptr
);
560 static void freeClient(redisClient
*c
);
561 static int rdbLoad(char *filename
);
562 static void addReply(redisClient
*c
, robj
*obj
);
563 static void addReplySds(redisClient
*c
, sds s
);
564 static void incrRefCount(robj
*o
);
565 static int rdbSaveBackground(char *filename
);
566 static robj
*createStringObject(char *ptr
, size_t len
);
567 static robj
*dupStringObject(robj
*o
);
568 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
);
569 static void replicationFeedMonitors(list
*monitors
, int dictid
, robj
**argv
, int argc
);
570 static void flushAppendOnlyFile(void);
571 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
572 static int syncWithMaster(void);
573 static robj
*tryObjectEncoding(robj
*o
);
574 static robj
*getDecodedObject(robj
*o
);
575 static int removeExpire(redisDb
*db
, robj
*key
);
576 static int expireIfNeeded(redisDb
*db
, robj
*key
);
577 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
578 static int deleteIfSwapped(redisDb
*db
, robj
*key
);
579 static int deleteKey(redisDb
*db
, robj
*key
);
580 static time_t getExpire(redisDb
*db
, robj
*key
);
581 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
582 static void updateSlavesWaitingBgsave(int bgsaveerr
);
583 static void freeMemoryIfNeeded(void);
584 static int processCommand(redisClient
*c
);
585 static void setupSigSegvAction(void);
586 static void rdbRemoveTempFile(pid_t childpid
);
587 static void aofRemoveTempFile(pid_t childpid
);
588 static size_t stringObjectLen(robj
*o
);
589 static void processInputBuffer(redisClient
*c
);
590 static zskiplist
*zslCreate(void);
591 static void zslFree(zskiplist
*zsl
);
592 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
593 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
594 static void initClientMultiState(redisClient
*c
);
595 static void freeClientMultiState(redisClient
*c
);
596 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
597 static void unblockClientWaitingData(redisClient
*c
);
598 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
599 static void vmInit(void);
600 static void vmMarkPagesFree(off_t page
, off_t count
);
601 static robj
*vmLoadObject(robj
*key
);
602 static robj
*vmPreviewObject(robj
*key
);
603 static int vmSwapOneObjectBlocking(void);
604 static int vmSwapOneObjectThreaded(void);
605 static int vmCanSwapOut(void);
606 static int tryFreeOneObjectFromFreelist(void);
607 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
608 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
609 static void vmCancelThreadedIOJob(robj
*o
);
610 static void lockThreadedIO(void);
611 static void unlockThreadedIO(void);
612 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
613 static void freeIOJob(iojob
*j
);
614 static void queueIOJob(iojob
*j
);
615 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
616 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
617 static void waitEmptyIOJobsQueue(void);
618 static void vmReopenSwapFile(void);
619 static int vmFreePage(off_t page
);
620 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
621 static void execBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
622 static int blockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
);
623 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
624 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
625 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
626 static struct redisCommand
*lookupCommand(char *name
);
627 static void call(redisClient
*c
, struct redisCommand
*cmd
);
628 static void resetClient(redisClient
*c
);
629 static void convertToRealHash(robj
*o
);
630 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
);
631 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
);
632 static void freePubsubPattern(void *p
);
633 static int listMatchPubsubPattern(void *a
, void *b
);
634 static int compareStringObjects(robj
*a
, robj
*b
);
635 static int equalStringObjects(robj
*a
, robj
*b
);
637 static int rewriteAppendOnlyFileBackground(void);
638 static int vmSwapObjectBlocking(robj
*key
, robj
*val
);
639 static int prepareForShutdown();
640 static void touchWatchedKey(redisDb
*db
, robj
*key
);
641 static void touchWatchedKeysOnFlush(int dbid
);
642 static void unwatchAllKeys(redisClient
*c
);
644 static void authCommand(redisClient
*c
);
645 static void pingCommand(redisClient
*c
);
646 static void echoCommand(redisClient
*c
);
647 static void setCommand(redisClient
*c
);
648 static void setnxCommand(redisClient
*c
);
649 static void setexCommand(redisClient
*c
);
650 static void getCommand(redisClient
*c
);
651 static void delCommand(redisClient
*c
);
652 static void existsCommand(redisClient
*c
);
653 static void incrCommand(redisClient
*c
);
654 static void decrCommand(redisClient
*c
);
655 static void incrbyCommand(redisClient
*c
);
656 static void decrbyCommand(redisClient
*c
);
657 static void selectCommand(redisClient
*c
);
658 static void randomkeyCommand(redisClient
*c
);
659 static void keysCommand(redisClient
*c
);
660 static void dbsizeCommand(redisClient
*c
);
661 static void lastsaveCommand(redisClient
*c
);
662 static void saveCommand(redisClient
*c
);
663 static void bgsaveCommand(redisClient
*c
);
664 static void bgrewriteaofCommand(redisClient
*c
);
665 static void shutdownCommand(redisClient
*c
);
666 static void moveCommand(redisClient
*c
);
667 static void renameCommand(redisClient
*c
);
668 static void renamenxCommand(redisClient
*c
);
669 static void lpushCommand(redisClient
*c
);
670 static void rpushCommand(redisClient
*c
);
671 static void lpopCommand(redisClient
*c
);
672 static void rpopCommand(redisClient
*c
);
673 static void llenCommand(redisClient
*c
);
674 static void lindexCommand(redisClient
*c
);
675 static void lrangeCommand(redisClient
*c
);
676 static void ltrimCommand(redisClient
*c
);
677 static void typeCommand(redisClient
*c
);
678 static void lsetCommand(redisClient
*c
);
679 static void saddCommand(redisClient
*c
);
680 static void sremCommand(redisClient
*c
);
681 static void smoveCommand(redisClient
*c
);
682 static void sismemberCommand(redisClient
*c
);
683 static void scardCommand(redisClient
*c
);
684 static void spopCommand(redisClient
*c
);
685 static void srandmemberCommand(redisClient
*c
);
686 static void sinterCommand(redisClient
*c
);
687 static void sinterstoreCommand(redisClient
*c
);
688 static void sunionCommand(redisClient
*c
);
689 static void sunionstoreCommand(redisClient
*c
);
690 static void sdiffCommand(redisClient
*c
);
691 static void sdiffstoreCommand(redisClient
*c
);
692 static void syncCommand(redisClient
*c
);
693 static void flushdbCommand(redisClient
*c
);
694 static void flushallCommand(redisClient
*c
);
695 static void sortCommand(redisClient
*c
);
696 static void lremCommand(redisClient
*c
);
697 static void rpoplpushcommand(redisClient
*c
);
698 static void infoCommand(redisClient
*c
);
699 static void mgetCommand(redisClient
*c
);
700 static void monitorCommand(redisClient
*c
);
701 static void expireCommand(redisClient
*c
);
702 static void expireatCommand(redisClient
*c
);
703 static void getsetCommand(redisClient
*c
);
704 static void ttlCommand(redisClient
*c
);
705 static void slaveofCommand(redisClient
*c
);
706 static void debugCommand(redisClient
*c
);
707 static void msetCommand(redisClient
*c
);
708 static void msetnxCommand(redisClient
*c
);
709 static void zaddCommand(redisClient
*c
);
710 static void zincrbyCommand(redisClient
*c
);
711 static void zrangeCommand(redisClient
*c
);
712 static void zrangebyscoreCommand(redisClient
*c
);
713 static void zcountCommand(redisClient
*c
);
714 static void zrevrangeCommand(redisClient
*c
);
715 static void zcardCommand(redisClient
*c
);
716 static void zremCommand(redisClient
*c
);
717 static void zscoreCommand(redisClient
*c
);
718 static void zremrangebyscoreCommand(redisClient
*c
);
719 static void multiCommand(redisClient
*c
);
720 static void execCommand(redisClient
*c
);
721 static void discardCommand(redisClient
*c
);
722 static void blpopCommand(redisClient
*c
);
723 static void brpopCommand(redisClient
*c
);
724 static void appendCommand(redisClient
*c
);
725 static void substrCommand(redisClient
*c
);
726 static void zrankCommand(redisClient
*c
);
727 static void zrevrankCommand(redisClient
*c
);
728 static void hsetCommand(redisClient
*c
);
729 static void hsetnxCommand(redisClient
*c
);
730 static void hgetCommand(redisClient
*c
);
731 static void hmsetCommand(redisClient
*c
);
732 static void hmgetCommand(redisClient
*c
);
733 static void hdelCommand(redisClient
*c
);
734 static void hlenCommand(redisClient
*c
);
735 static void zremrangebyrankCommand(redisClient
*c
);
736 static void zunionstoreCommand(redisClient
*c
);
737 static void zinterstoreCommand(redisClient
*c
);
738 static void hkeysCommand(redisClient
*c
);
739 static void hvalsCommand(redisClient
*c
);
740 static void hgetallCommand(redisClient
*c
);
741 static void hexistsCommand(redisClient
*c
);
742 static void configCommand(redisClient
*c
);
743 static void hincrbyCommand(redisClient
*c
);
744 static void subscribeCommand(redisClient
*c
);
745 static void unsubscribeCommand(redisClient
*c
);
746 static void psubscribeCommand(redisClient
*c
);
747 static void punsubscribeCommand(redisClient
*c
);
748 static void publishCommand(redisClient
*c
);
749 static void watchCommand(redisClient
*c
);
750 static void unwatchCommand(redisClient
*c
);
752 /*================================= Globals ================================= */
755 static struct redisServer server
; /* server global state */
756 static struct redisCommand
*commandTable
;
757 static struct redisCommand readonlyCommandTable
[] = {
758 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
759 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
760 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
761 {"setex",setexCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
762 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
763 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
764 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
765 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
766 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
767 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
768 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
769 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
770 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
771 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
772 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
773 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
774 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
775 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
776 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
777 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
778 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
779 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
780 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
781 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
782 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
783 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
784 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
785 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
786 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
787 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
788 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
789 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
790 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
791 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
792 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
793 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
794 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
795 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
796 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
797 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
798 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
799 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
800 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
801 {"zunionstore",zunionstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
802 {"zinterstore",zinterstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
803 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
804 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
805 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
806 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
807 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
808 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
809 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
810 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
811 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
812 {"hsetnx",hsetnxCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
813 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
814 {"hmset",hmsetCommand
,-4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
815 {"hmget",hmgetCommand
,-3,REDIS_CMD_BULK
,NULL
,1,1,1},
816 {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
817 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
818 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
819 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
820 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
821 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
822 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
823 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
824 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
825 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
826 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
827 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
828 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
829 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
830 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
831 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
832 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
833 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
834 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
835 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
836 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
837 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
838 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
839 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
840 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
841 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
842 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
843 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
844 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
845 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
846 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
847 {"exec",execCommand
,1,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,execBlockClientOnSwappedKeys
,0,0,0},
848 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
849 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
850 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
851 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
852 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
853 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
854 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
855 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
856 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
857 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
858 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
859 {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
860 {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
861 {"psubscribe",psubscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
862 {"punsubscribe",punsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
863 {"publish",publishCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_FORCE_REPLICATION
,NULL
,0,0,0},
864 {"watch",watchCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
865 {"unwatch",unwatchCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}
868 /*============================ Utility functions ============================ */
870 /* Glob-style pattern matching. */
871 static int stringmatchlen(const char *pattern
, int patternLen
,
872 const char *string
, int stringLen
, int nocase
)
877 while (pattern
[1] == '*') {
882 return 1; /* match */
884 if (stringmatchlen(pattern
+1, patternLen
-1,
885 string
, stringLen
, nocase
))
886 return 1; /* match */
890 return 0; /* no match */
894 return 0; /* no match */
904 not = pattern
[0] == '^';
911 if (pattern
[0] == '\\') {
914 if (pattern
[0] == string
[0])
916 } else if (pattern
[0] == ']') {
918 } else if (patternLen
== 0) {
922 } else if (pattern
[1] == '-' && patternLen
>= 3) {
923 int start
= pattern
[0];
924 int end
= pattern
[2];
932 start
= tolower(start
);
938 if (c
>= start
&& c
<= end
)
942 if (pattern
[0] == string
[0])
945 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
955 return 0; /* no match */
961 if (patternLen
>= 2) {
968 if (pattern
[0] != string
[0])
969 return 0; /* no match */
971 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
972 return 0; /* no match */
980 if (stringLen
== 0) {
981 while(*pattern
== '*') {
988 if (patternLen
== 0 && stringLen
== 0)
993 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
994 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
997 /* Convert a string representing an amount of memory into the number of
998 * bytes, so for instance memtoll("1Gi") will return 1073741824 that is
1001 * On parsing error, if *err is not NULL, it's set to 1, otherwise it's
1003 static long long memtoll(const char *p
, int *err
) {
1006 long mul
; /* unit multiplier */
1008 unsigned int digits
;
1011 /* Search the first non digit character. */
1014 while(*u
&& isdigit(*u
)) u
++;
1015 if (*u
== '\0' || !strcasecmp(u
,"b")) {
1017 } else if (!strcasecmp(u
,"k")) {
1019 } else if (!strcasecmp(u
,"kb")) {
1021 } else if (!strcasecmp(u
,"m")) {
1023 } else if (!strcasecmp(u
,"mb")) {
1025 } else if (!strcasecmp(u
,"g")) {
1026 mul
= 1000L*1000*1000;
1027 } else if (!strcasecmp(u
,"gb")) {
1028 mul
= 1024L*1024*1024;
1034 if (digits
>= sizeof(buf
)) {
1038 memcpy(buf
,p
,digits
);
1040 val
= strtoll(buf
,NULL
,10);
1044 /* Convert a long long into a string. Returns the number of
1045 * characters needed to represent the number, that can be shorter if passed
1046 * buffer length is not enough to store the whole number. */
1047 static int ll2string(char *s
, size_t len
, long long value
) {
1049 unsigned long long v
;
1052 if (len
== 0) return 0;
1053 v
= (value
< 0) ? -value
: value
;
1054 p
= buf
+31; /* point to the last character */
1059 if (value
< 0) *p
-- = '-';
1062 if (l
+1 > len
) l
= len
-1; /* Make sure it fits, including the nul term */
1068 static void redisLog(int level
, const char *fmt
, ...) {
1072 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
1076 if (level
>= server
.verbosity
) {
1082 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
1083 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
1084 vfprintf(fp
, fmt
, ap
);
1090 if (server
.logfile
) fclose(fp
);
1093 /*====================== Hash table type implementation ==================== */
1095 /* This is an hash table type that uses the SDS dynamic strings libary as
1096 * keys and radis objects as values (objects can hold SDS strings,
1099 static void dictVanillaFree(void *privdata
, void *val
)
1101 DICT_NOTUSED(privdata
);
1105 static void dictListDestructor(void *privdata
, void *val
)
1107 DICT_NOTUSED(privdata
);
1108 listRelease((list
*)val
);
1111 static int sdsDictKeyCompare(void *privdata
, const void *key1
,
1115 DICT_NOTUSED(privdata
);
1117 l1
= sdslen((sds
)key1
);
1118 l2
= sdslen((sds
)key2
);
1119 if (l1
!= l2
) return 0;
1120 return memcmp(key1
, key2
, l1
) == 0;
1123 static void dictRedisObjectDestructor(void *privdata
, void *val
)
1125 DICT_NOTUSED(privdata
);
1127 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
1131 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1134 const robj
*o1
= key1
, *o2
= key2
;
1135 return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1138 static unsigned int dictObjHash(const void *key
) {
1139 const robj
*o
= key
;
1140 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1143 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1146 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1149 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1150 o2
->encoding
== REDIS_ENCODING_INT
)
1151 return o1
->ptr
== o2
->ptr
;
1153 o1
= getDecodedObject(o1
);
1154 o2
= getDecodedObject(o2
);
1155 cmp
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1161 static unsigned int dictEncObjHash(const void *key
) {
1162 robj
*o
= (robj
*) key
;
1164 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1165 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1167 if (o
->encoding
== REDIS_ENCODING_INT
) {
1171 len
= ll2string(buf
,32,(long)o
->ptr
);
1172 return dictGenHashFunction((unsigned char*)buf
, len
);
1176 o
= getDecodedObject(o
);
1177 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1184 /* Sets type and expires */
1185 static dictType setDictType
= {
1186 dictEncObjHash
, /* hash function */
1189 dictEncObjKeyCompare
, /* key compare */
1190 dictRedisObjectDestructor
, /* key destructor */
1191 NULL
/* val destructor */
1194 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1195 static dictType zsetDictType
= {
1196 dictEncObjHash
, /* hash function */
1199 dictEncObjKeyCompare
, /* key compare */
1200 dictRedisObjectDestructor
, /* key destructor */
1201 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1205 static dictType dbDictType
= {
1206 dictObjHash
, /* hash function */
1209 dictObjKeyCompare
, /* key compare */
1210 dictRedisObjectDestructor
, /* key destructor */
1211 dictRedisObjectDestructor
/* val destructor */
1215 static dictType keyptrDictType
= {
1216 dictObjHash
, /* hash function */
1219 dictObjKeyCompare
, /* key compare */
1220 dictRedisObjectDestructor
, /* key destructor */
1221 NULL
/* val destructor */
1224 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1225 static dictType hashDictType
= {
1226 dictEncObjHash
, /* hash function */
1229 dictEncObjKeyCompare
, /* key compare */
1230 dictRedisObjectDestructor
, /* key destructor */
1231 dictRedisObjectDestructor
/* val destructor */
1234 /* Keylist hash table type has unencoded redis objects as keys and
1235 * lists as values. It's used for blocking operations (BLPOP) and to
1236 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1237 static dictType keylistDictType
= {
1238 dictObjHash
, /* hash function */
1241 dictObjKeyCompare
, /* key compare */
1242 dictRedisObjectDestructor
, /* key destructor */
1243 dictListDestructor
/* val destructor */
1246 static void version();
1248 /* ========================= Random utility functions ======================= */
1250 /* Redis generally does not try to recover from out of memory conditions
1251 * when allocating objects or strings, it is not clear if it will be possible
1252 * to report this condition to the client since the networking layer itself
1253 * is based on heap allocation for send buffers, so we simply abort.
1254 * At least the code will be simpler to read... */
1255 static void oom(const char *msg
) {
1256 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1261 /* ====================== Redis server networking stuff ===================== */
1262 static void closeTimedoutClients(void) {
1265 time_t now
= time(NULL
);
1268 listRewind(server
.clients
,&li
);
1269 while ((ln
= listNext(&li
)) != NULL
) {
1270 c
= listNodeValue(ln
);
1271 if (server
.maxidletime
&&
1272 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1273 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1274 dictSize(c
->pubsub_channels
) == 0 && /* no timeout for pubsub */
1275 listLength(c
->pubsub_patterns
) == 0 &&
1276 (now
- c
->lastinteraction
> server
.maxidletime
))
1278 redisLog(REDIS_VERBOSE
,"Closing idle client");
1280 } else if (c
->flags
& REDIS_BLOCKED
) {
1281 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1282 addReply(c
,shared
.nullmultibulk
);
1283 unblockClientWaitingData(c
);
1289 static int htNeedsResize(dict
*dict
) {
1290 long long size
, used
;
1292 size
= dictSlots(dict
);
1293 used
= dictSize(dict
);
1294 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1295 (used
*100/size
< REDIS_HT_MINFILL
));
1298 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1299 * we resize the hash table to save memory */
1300 static void tryResizeHashTables(void) {
1303 for (j
= 0; j
< server
.dbnum
; j
++) {
1304 if (htNeedsResize(server
.db
[j
].dict
))
1305 dictResize(server
.db
[j
].dict
);
1306 if (htNeedsResize(server
.db
[j
].expires
))
1307 dictResize(server
.db
[j
].expires
);
1311 /* Our hash table implementation performs rehashing incrementally while
1312 * we write/read from the hash table. Still if the server is idle, the hash
1313 * table will use two tables for a long time. So we try to use 1 millisecond
1314 * of CPU time at every serverCron() loop in order to rehash some key. */
1315 static void incrementallyRehash(void) {
1318 for (j
= 0; j
< server
.dbnum
; j
++) {
1319 if (dictIsRehashing(server
.db
[j
].dict
)) {
1320 dictRehashMilliseconds(server
.db
[j
].dict
,1);
1321 break; /* already used our millisecond for this loop... */
1326 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1327 void backgroundSaveDoneHandler(int statloc
) {
1328 int exitcode
= WEXITSTATUS(statloc
);
1329 int bysignal
= WIFSIGNALED(statloc
);
1331 if (!bysignal
&& exitcode
== 0) {
1332 redisLog(REDIS_NOTICE
,
1333 "Background saving terminated with success");
1335 server
.lastsave
= time(NULL
);
1336 } else if (!bysignal
&& exitcode
!= 0) {
1337 redisLog(REDIS_WARNING
, "Background saving error");
1339 redisLog(REDIS_WARNING
,
1340 "Background saving terminated by signal %d", WTERMSIG(statloc
));
1341 rdbRemoveTempFile(server
.bgsavechildpid
);
1343 server
.bgsavechildpid
= -1;
1344 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1345 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1346 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1349 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1351 void backgroundRewriteDoneHandler(int statloc
) {
1352 int exitcode
= WEXITSTATUS(statloc
);
1353 int bysignal
= WIFSIGNALED(statloc
);
1355 if (!bysignal
&& exitcode
== 0) {
1359 redisLog(REDIS_NOTICE
,
1360 "Background append only file rewriting terminated with success");
1361 /* Now it's time to flush the differences accumulated by the parent */
1362 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1363 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1365 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1368 /* Flush our data... */
1369 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1370 (signed) sdslen(server
.bgrewritebuf
)) {
1371 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1375 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1376 /* Now our work is to rename the temp file into the stable file. And
1377 * switch the file descriptor used by the server for append only. */
1378 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1379 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1383 /* Mission completed... almost */
1384 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1385 if (server
.appendfd
!= -1) {
1386 /* If append only is actually enabled... */
1387 close(server
.appendfd
);
1388 server
.appendfd
= fd
;
1389 if (server
.appendfsync
!= APPENDFSYNC_NO
) aof_fsync(fd
);
1390 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1391 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1393 /* If append only is disabled we just generate a dump in this
1394 * format. Why not? */
1397 } else if (!bysignal
&& exitcode
!= 0) {
1398 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1400 redisLog(REDIS_WARNING
,
1401 "Background append only file rewriting terminated by signal %d",
1405 sdsfree(server
.bgrewritebuf
);
1406 server
.bgrewritebuf
= sdsempty();
1407 aofRemoveTempFile(server
.bgrewritechildpid
);
1408 server
.bgrewritechildpid
= -1;
1411 /* This function is called once a background process of some kind terminates,
1412 * as we want to avoid resizing the hash tables when there is a child in order
1413 * to play well with copy-on-write (otherwise when a resize happens lots of
1414 * memory pages are copied). The goal of this function is to update the ability
1415 * for dict.c to resize the hash tables accordingly to the fact we have o not
1416 * running childs. */
1417 static void updateDictResizePolicy(void) {
1418 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1)
1421 dictDisableResize();
1424 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1425 int j
, loops
= server
.cronloops
++;
1426 REDIS_NOTUSED(eventLoop
);
1428 REDIS_NOTUSED(clientData
);
1430 /* We take a cached value of the unix time in the global state because
1431 * with virtual memory and aging there is to store the current time
1432 * in objects at every object access, and accuracy is not needed.
1433 * To access a global var is faster than calling time(NULL) */
1434 server
.unixtime
= time(NULL
);
1436 /* We received a SIGTERM, shutting down here in a safe way, as it is
1437 * not ok doing so inside the signal handler. */
1438 if (server
.shutdown_asap
) {
1439 if (prepareForShutdown() == REDIS_OK
) exit(0);
1440 redisLog(REDIS_WARNING
,"SIGTERM received but errors trying to shut down the server, check the logs for more information");
1443 /* Show some info about non-empty databases */
1444 for (j
= 0; j
< server
.dbnum
; j
++) {
1445 long long size
, used
, vkeys
;
1447 size
= dictSlots(server
.db
[j
].dict
);
1448 used
= dictSize(server
.db
[j
].dict
);
1449 vkeys
= dictSize(server
.db
[j
].expires
);
1450 if (!(loops
% 50) && (used
|| vkeys
)) {
1451 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1452 /* dictPrintStats(server.dict); */
1456 /* We don't want to resize the hash tables while a bacground saving
1457 * is in progress: the saving child is created using fork() that is
1458 * implemented with a copy-on-write semantic in most modern systems, so
1459 * if we resize the HT while there is the saving child at work actually
1460 * a lot of memory movements in the parent will cause a lot of pages
1462 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1) {
1463 if (!(loops
% 10)) tryResizeHashTables();
1464 if (server
.activerehashing
) incrementallyRehash();
1467 /* Show information about connected clients */
1468 if (!(loops
% 50)) {
1469 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use",
1470 listLength(server
.clients
)-listLength(server
.slaves
),
1471 listLength(server
.slaves
),
1472 zmalloc_used_memory());
1475 /* Close connections of timedout clients */
1476 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1477 closeTimedoutClients();
1479 /* Check if a background saving or AOF rewrite in progress terminated */
1480 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1484 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1485 if (pid
== server
.bgsavechildpid
) {
1486 backgroundSaveDoneHandler(statloc
);
1488 backgroundRewriteDoneHandler(statloc
);
1490 updateDictResizePolicy();
1493 /* If there is not a background saving in progress check if
1494 * we have to save now */
1495 time_t now
= time(NULL
);
1496 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1497 struct saveparam
*sp
= server
.saveparams
+j
;
1499 if (server
.dirty
>= sp
->changes
&&
1500 now
-server
.lastsave
> sp
->seconds
) {
1501 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1502 sp
->changes
, sp
->seconds
);
1503 rdbSaveBackground(server
.dbfilename
);
1509 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1510 * will use few CPU cycles if there are few expiring keys, otherwise
1511 * it will get more aggressive to avoid that too much memory is used by
1512 * keys that can be removed from the keyspace. */
1513 for (j
= 0; j
< server
.dbnum
; j
++) {
1515 redisDb
*db
= server
.db
+j
;
1517 /* Continue to expire if at the end of the cycle more than 25%
1518 * of the keys were expired. */
1520 long num
= dictSize(db
->expires
);
1521 time_t now
= time(NULL
);
1524 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1525 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1530 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1531 t
= (time_t) dictGetEntryVal(de
);
1533 deleteKey(db
,dictGetEntryKey(de
));
1535 server
.stat_expiredkeys
++;
1538 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1541 /* Swap a few keys on disk if we are over the memory limit and VM
1542 * is enbled. Try to free objects from the free list first. */
1543 if (vmCanSwapOut()) {
1544 while (server
.vm_enabled
&& zmalloc_used_memory() >
1545 server
.vm_max_memory
)
1549 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1550 retval
= (server
.vm_max_threads
== 0) ?
1551 vmSwapOneObjectBlocking() :
1552 vmSwapOneObjectThreaded();
1553 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1554 zmalloc_used_memory() >
1555 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1557 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1559 /* Note that when using threade I/O we free just one object,
1560 * because anyway when the I/O thread in charge to swap this
1561 * object out will finish, the handler of completed jobs
1562 * will try to swap more objects if we are still out of memory. */
1563 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1567 /* Check if we should connect to a MASTER */
1568 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1569 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1570 if (syncWithMaster() == REDIS_OK
) {
1571 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1572 if (server
.appendonly
) rewriteAppendOnlyFileBackground();
1578 /* This function gets called every time Redis is entering the
1579 * main loop of the event driven library, that is, before to sleep
1580 * for ready file descriptors. */
1581 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1582 REDIS_NOTUSED(eventLoop
);
1584 /* Awake clients that got all the swapped keys they requested */
1585 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1589 listRewind(server
.io_ready_clients
,&li
);
1590 while((ln
= listNext(&li
))) {
1591 redisClient
*c
= ln
->value
;
1592 struct redisCommand
*cmd
;
1594 /* Resume the client. */
1595 listDelNode(server
.io_ready_clients
,ln
);
1596 c
->flags
&= (~REDIS_IO_WAIT
);
1597 server
.vm_blocked_clients
--;
1598 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1599 readQueryFromClient
, c
);
1600 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1601 assert(cmd
!= NULL
);
1604 /* There may be more data to process in the input buffer. */
1605 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1606 processInputBuffer(c
);
1609 /* Write the AOF buffer on disk */
1610 flushAppendOnlyFile();
1613 static void createSharedObjects(void) {
1616 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1617 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1618 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1619 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1620 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1621 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1622 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1623 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1624 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1625 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1626 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1627 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1628 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1629 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1630 "-ERR no such key\r\n"));
1631 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1632 "-ERR syntax error\r\n"));
1633 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1634 "-ERR source and destination objects are the same\r\n"));
1635 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1636 "-ERR index out of range\r\n"));
1637 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1638 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1639 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1640 shared
.select0
= createStringObject("select 0\r\n",10);
1641 shared
.select1
= createStringObject("select 1\r\n",10);
1642 shared
.select2
= createStringObject("select 2\r\n",10);
1643 shared
.select3
= createStringObject("select 3\r\n",10);
1644 shared
.select4
= createStringObject("select 4\r\n",10);
1645 shared
.select5
= createStringObject("select 5\r\n",10);
1646 shared
.select6
= createStringObject("select 6\r\n",10);
1647 shared
.select7
= createStringObject("select 7\r\n",10);
1648 shared
.select8
= createStringObject("select 8\r\n",10);
1649 shared
.select9
= createStringObject("select 9\r\n",10);
1650 shared
.messagebulk
= createStringObject("$7\r\nmessage\r\n",13);
1651 shared
.pmessagebulk
= createStringObject("$8\r\npmessage\r\n",14);
1652 shared
.subscribebulk
= createStringObject("$9\r\nsubscribe\r\n",15);
1653 shared
.unsubscribebulk
= createStringObject("$11\r\nunsubscribe\r\n",18);
1654 shared
.psubscribebulk
= createStringObject("$10\r\npsubscribe\r\n",17);
1655 shared
.punsubscribebulk
= createStringObject("$12\r\npunsubscribe\r\n",19);
1656 shared
.mbulk3
= createStringObject("*3\r\n",4);
1657 shared
.mbulk4
= createStringObject("*4\r\n",4);
1658 for (j
= 0; j
< REDIS_SHARED_INTEGERS
; j
++) {
1659 shared
.integers
[j
] = createObject(REDIS_STRING
,(void*)(long)j
);
1660 shared
.integers
[j
]->encoding
= REDIS_ENCODING_INT
;
1664 static void appendServerSaveParams(time_t seconds
, int changes
) {
1665 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1666 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1667 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1668 server
.saveparamslen
++;
1671 static void resetServerSaveParams() {
1672 zfree(server
.saveparams
);
1673 server
.saveparams
= NULL
;
1674 server
.saveparamslen
= 0;
1677 static void initServerConfig() {
1678 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1679 server
.port
= REDIS_SERVERPORT
;
1680 server
.verbosity
= REDIS_VERBOSE
;
1681 server
.maxidletime
= REDIS_MAXIDLETIME
;
1682 server
.saveparams
= NULL
;
1683 server
.logfile
= NULL
; /* NULL = log on standard output */
1684 server
.bindaddr
= NULL
;
1685 server
.glueoutputbuf
= 1;
1686 server
.daemonize
= 0;
1687 server
.appendonly
= 0;
1688 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1689 server
.no_appendfsync_on_rewrite
= 0;
1690 server
.lastfsync
= time(NULL
);
1691 server
.appendfd
= -1;
1692 server
.appendseldb
= -1; /* Make sure the first time will not match */
1693 server
.pidfile
= zstrdup("/var/run/redis.pid");
1694 server
.dbfilename
= zstrdup("dump.rdb");
1695 server
.appendfilename
= zstrdup("appendonly.aof");
1696 server
.requirepass
= NULL
;
1697 server
.rdbcompression
= 1;
1698 server
.activerehashing
= 1;
1699 server
.maxclients
= 0;
1700 server
.blpop_blocked_clients
= 0;
1701 server
.maxmemory
= 0;
1702 server
.vm_enabled
= 0;
1703 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1704 server
.vm_page_size
= 256; /* 256 bytes per page */
1705 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1706 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1707 server
.vm_max_threads
= 4;
1708 server
.vm_blocked_clients
= 0;
1709 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1710 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1711 server
.shutdown_asap
= 0;
1713 resetServerSaveParams();
1715 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1716 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1717 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1718 /* Replication related */
1720 server
.masterauth
= NULL
;
1721 server
.masterhost
= NULL
;
1722 server
.masterport
= 6379;
1723 server
.master
= NULL
;
1724 server
.replstate
= REDIS_REPL_NONE
;
1726 /* Double constants initialization */
1728 R_PosInf
= 1.0/R_Zero
;
1729 R_NegInf
= -1.0/R_Zero
;
1730 R_Nan
= R_Zero
/R_Zero
;
1733 static void initServer() {
1736 signal(SIGHUP
, SIG_IGN
);
1737 signal(SIGPIPE
, SIG_IGN
);
1738 setupSigSegvAction();
1740 server
.devnull
= fopen("/dev/null","w");
1741 if (server
.devnull
== NULL
) {
1742 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1745 server
.clients
= listCreate();
1746 server
.slaves
= listCreate();
1747 server
.monitors
= listCreate();
1748 server
.objfreelist
= listCreate();
1749 createSharedObjects();
1750 server
.el
= aeCreateEventLoop();
1751 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1752 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1753 if (server
.fd
== -1) {
1754 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1757 for (j
= 0; j
< server
.dbnum
; j
++) {
1758 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1759 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1760 server
.db
[j
].blocking_keys
= dictCreate(&keylistDictType
,NULL
);
1761 server
.db
[j
].watched_keys
= dictCreate(&keylistDictType
,NULL
);
1762 if (server
.vm_enabled
)
1763 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1764 server
.db
[j
].id
= j
;
1766 server
.pubsub_channels
= dictCreate(&keylistDictType
,NULL
);
1767 server
.pubsub_patterns
= listCreate();
1768 listSetFreeMethod(server
.pubsub_patterns
,freePubsubPattern
);
1769 listSetMatchMethod(server
.pubsub_patterns
,listMatchPubsubPattern
);
1770 server
.cronloops
= 0;
1771 server
.bgsavechildpid
= -1;
1772 server
.bgrewritechildpid
= -1;
1773 server
.bgrewritebuf
= sdsempty();
1774 server
.aofbuf
= sdsempty();
1775 server
.lastsave
= time(NULL
);
1777 server
.stat_numcommands
= 0;
1778 server
.stat_numconnections
= 0;
1779 server
.stat_expiredkeys
= 0;
1780 server
.stat_starttime
= time(NULL
);
1781 server
.unixtime
= time(NULL
);
1782 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1783 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1784 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1786 if (server
.appendonly
) {
1787 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1788 if (server
.appendfd
== -1) {
1789 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1795 if (server
.vm_enabled
) vmInit();
1798 /* Empty the whole database */
1799 static long long emptyDb() {
1801 long long removed
= 0;
1803 for (j
= 0; j
< server
.dbnum
; j
++) {
1804 removed
+= dictSize(server
.db
[j
].dict
);
1805 dictEmpty(server
.db
[j
].dict
);
1806 dictEmpty(server
.db
[j
].expires
);
1811 static int yesnotoi(char *s
) {
1812 if (!strcasecmp(s
,"yes")) return 1;
1813 else if (!strcasecmp(s
,"no")) return 0;
1817 /* I agree, this is a very rudimental way to load a configuration...
1818 will improve later if the config gets more complex */
1819 static void loadServerConfig(char *filename
) {
1821 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1825 if (filename
[0] == '-' && filename
[1] == '\0')
1828 if ((fp
= fopen(filename
,"r")) == NULL
) {
1829 redisLog(REDIS_WARNING
, "Fatal error, can't open config file '%s'", filename
);
1834 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1840 line
= sdstrim(line
," \t\r\n");
1842 /* Skip comments and blank lines*/
1843 if (line
[0] == '#' || line
[0] == '\0') {
1848 /* Split into arguments */
1849 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1850 sdstolower(argv
[0]);
1852 /* Execute config directives */
1853 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1854 server
.maxidletime
= atoi(argv
[1]);
1855 if (server
.maxidletime
< 0) {
1856 err
= "Invalid timeout value"; goto loaderr
;
1858 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1859 server
.port
= atoi(argv
[1]);
1860 if (server
.port
< 1 || server
.port
> 65535) {
1861 err
= "Invalid port"; goto loaderr
;
1863 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1864 server
.bindaddr
= zstrdup(argv
[1]);
1865 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1866 int seconds
= atoi(argv
[1]);
1867 int changes
= atoi(argv
[2]);
1868 if (seconds
< 1 || changes
< 0) {
1869 err
= "Invalid save parameters"; goto loaderr
;
1871 appendServerSaveParams(seconds
,changes
);
1872 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1873 if (chdir(argv
[1]) == -1) {
1874 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1875 argv
[1], strerror(errno
));
1878 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1879 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1880 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1881 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1882 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1884 err
= "Invalid log level. Must be one of debug, notice, warning";
1887 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1890 server
.logfile
= zstrdup(argv
[1]);
1891 if (!strcasecmp(server
.logfile
,"stdout")) {
1892 zfree(server
.logfile
);
1893 server
.logfile
= NULL
;
1895 if (server
.logfile
) {
1896 /* Test if we are able to open the file. The server will not
1897 * be able to abort just for this problem later... */
1898 logfp
= fopen(server
.logfile
,"a");
1899 if (logfp
== NULL
) {
1900 err
= sdscatprintf(sdsempty(),
1901 "Can't open the log file: %s", strerror(errno
));
1906 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1907 server
.dbnum
= atoi(argv
[1]);
1908 if (server
.dbnum
< 1) {
1909 err
= "Invalid number of databases"; goto loaderr
;
1911 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1912 loadServerConfig(argv
[1]);
1913 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1914 server
.maxclients
= atoi(argv
[1]);
1915 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1916 server
.maxmemory
= memtoll(argv
[1],NULL
);
1917 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1918 server
.masterhost
= sdsnew(argv
[1]);
1919 server
.masterport
= atoi(argv
[2]);
1920 server
.replstate
= REDIS_REPL_CONNECT
;
1921 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1922 server
.masterauth
= zstrdup(argv
[1]);
1923 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1924 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1925 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1927 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1928 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1929 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1931 } else if (!strcasecmp(argv
[0],"activerehashing") && argc
== 2) {
1932 if ((server
.activerehashing
= yesnotoi(argv
[1])) == -1) {
1933 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1935 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1936 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1937 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1939 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1940 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1941 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1943 } else if (!strcasecmp(argv
[0],"appendfilename") && argc
== 2) {
1944 zfree(server
.appendfilename
);
1945 server
.appendfilename
= zstrdup(argv
[1]);
1946 } else if (!strcasecmp(argv
[0],"no-appendfsync-on-rewrite")
1948 if ((server
.no_appendfsync_on_rewrite
= yesnotoi(argv
[1])) == -1) {
1949 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1951 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
1952 if (!strcasecmp(argv
[1],"no")) {
1953 server
.appendfsync
= APPENDFSYNC_NO
;
1954 } else if (!strcasecmp(argv
[1],"always")) {
1955 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1956 } else if (!strcasecmp(argv
[1],"everysec")) {
1957 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1959 err
= "argument must be 'no', 'always' or 'everysec'";
1962 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
1963 server
.requirepass
= zstrdup(argv
[1]);
1964 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
1965 zfree(server
.pidfile
);
1966 server
.pidfile
= zstrdup(argv
[1]);
1967 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
1968 zfree(server
.dbfilename
);
1969 server
.dbfilename
= zstrdup(argv
[1]);
1970 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
1971 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
1972 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1974 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
1975 zfree(server
.vm_swap_file
);
1976 server
.vm_swap_file
= zstrdup(argv
[1]);
1977 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
1978 server
.vm_max_memory
= memtoll(argv
[1],NULL
);
1979 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
1980 server
.vm_page_size
= memtoll(argv
[1], NULL
);
1981 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
1982 server
.vm_pages
= memtoll(argv
[1], NULL
);
1983 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1984 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1985 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
1986 server
.hash_max_zipmap_entries
= memtoll(argv
[1], NULL
);
1987 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
1988 server
.hash_max_zipmap_value
= memtoll(argv
[1], NULL
);
1990 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
1992 for (j
= 0; j
< argc
; j
++)
1997 if (fp
!= stdin
) fclose(fp
);
2001 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
2002 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
2003 fprintf(stderr
, ">>> '%s'\n", line
);
2004 fprintf(stderr
, "%s\n", err
);
2008 static void freeClientArgv(redisClient
*c
) {
2011 for (j
= 0; j
< c
->argc
; j
++)
2012 decrRefCount(c
->argv
[j
]);
2013 for (j
= 0; j
< c
->mbargc
; j
++)
2014 decrRefCount(c
->mbargv
[j
]);
2019 static void freeClient(redisClient
*c
) {
2022 /* Note that if the client we are freeing is blocked into a blocking
2023 * call, we have to set querybuf to NULL *before* to call
2024 * unblockClientWaitingData() to avoid processInputBuffer() will get
2025 * called. Also it is important to remove the file events after
2026 * this, because this call adds the READABLE event. */
2027 sdsfree(c
->querybuf
);
2029 if (c
->flags
& REDIS_BLOCKED
)
2030 unblockClientWaitingData(c
);
2032 /* UNWATCH all the keys */
2034 listRelease(c
->watched_keys
);
2035 /* Unsubscribe from all the pubsub channels */
2036 pubsubUnsubscribeAllChannels(c
,0);
2037 pubsubUnsubscribeAllPatterns(c
,0);
2038 dictRelease(c
->pubsub_channels
);
2039 listRelease(c
->pubsub_patterns
);
2040 /* Obvious cleanup */
2041 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
2042 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2043 listRelease(c
->reply
);
2046 /* Remove from the list of clients */
2047 ln
= listSearchKey(server
.clients
,c
);
2048 redisAssert(ln
!= NULL
);
2049 listDelNode(server
.clients
,ln
);
2050 /* Remove from the list of clients that are now ready to be restarted
2051 * after waiting for swapped keys */
2052 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
2053 ln
= listSearchKey(server
.io_ready_clients
,c
);
2055 listDelNode(server
.io_ready_clients
,ln
);
2056 server
.vm_blocked_clients
--;
2059 /* Remove from the list of clients waiting for swapped keys */
2060 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
2061 ln
= listFirst(c
->io_keys
);
2062 dontWaitForSwappedKey(c
,ln
->value
);
2064 listRelease(c
->io_keys
);
2065 /* Master/slave cleanup */
2066 if (c
->flags
& REDIS_SLAVE
) {
2067 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
2069 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
2070 ln
= listSearchKey(l
,c
);
2071 redisAssert(ln
!= NULL
);
2074 if (c
->flags
& REDIS_MASTER
) {
2075 server
.master
= NULL
;
2076 server
.replstate
= REDIS_REPL_CONNECT
;
2078 /* Release memory */
2081 freeClientMultiState(c
);
2085 #define GLUEREPLY_UP_TO (1024)
2086 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
2088 char buf
[GLUEREPLY_UP_TO
];
2093 listRewind(c
->reply
,&li
);
2094 while((ln
= listNext(&li
))) {
2098 objlen
= sdslen(o
->ptr
);
2099 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
2100 memcpy(buf
+copylen
,o
->ptr
,objlen
);
2102 listDelNode(c
->reply
,ln
);
2104 if (copylen
== 0) return;
2108 /* Now the output buffer is empty, add the new single element */
2109 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
2110 listAddNodeHead(c
->reply
,o
);
2113 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2114 redisClient
*c
= privdata
;
2115 int nwritten
= 0, totwritten
= 0, objlen
;
2118 REDIS_NOTUSED(mask
);
2120 /* Use writev() if we have enough buffers to send */
2121 if (!server
.glueoutputbuf
&&
2122 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
2123 !(c
->flags
& REDIS_MASTER
))
2125 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
2129 while(listLength(c
->reply
)) {
2130 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
2131 glueReplyBuffersIfNeeded(c
);
2133 o
= listNodeValue(listFirst(c
->reply
));
2134 objlen
= sdslen(o
->ptr
);
2137 listDelNode(c
->reply
,listFirst(c
->reply
));
2141 if (c
->flags
& REDIS_MASTER
) {
2142 /* Don't reply to a master */
2143 nwritten
= objlen
- c
->sentlen
;
2145 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
2146 if (nwritten
<= 0) break;
2148 c
->sentlen
+= nwritten
;
2149 totwritten
+= nwritten
;
2150 /* If we fully sent the object on head go to the next one */
2151 if (c
->sentlen
== objlen
) {
2152 listDelNode(c
->reply
,listFirst(c
->reply
));
2155 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
2156 * bytes, in a single threaded server it's a good idea to serve
2157 * other clients as well, even if a very large request comes from
2158 * super fast link that is always able to accept data (in real world
2159 * scenario think about 'KEYS *' against the loopback interfae) */
2160 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
2162 if (nwritten
== -1) {
2163 if (errno
== EAGAIN
) {
2166 redisLog(REDIS_VERBOSE
,
2167 "Error writing to client: %s", strerror(errno
));
2172 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
2173 if (listLength(c
->reply
) == 0) {
2175 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2179 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
2181 redisClient
*c
= privdata
;
2182 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
2184 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
2185 int offset
, ion
= 0;
2187 REDIS_NOTUSED(mask
);
2190 while (listLength(c
->reply
)) {
2191 offset
= c
->sentlen
;
2195 /* fill-in the iov[] array */
2196 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
2197 o
= listNodeValue(node
);
2198 objlen
= sdslen(o
->ptr
);
2200 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
2203 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2204 break; /* no more iovecs */
2206 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2207 iov
[ion
].iov_len
= objlen
- offset
;
2208 willwrite
+= objlen
- offset
;
2209 offset
= 0; /* just for the first item */
2216 /* write all collected blocks at once */
2217 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2218 if (errno
!= EAGAIN
) {
2219 redisLog(REDIS_VERBOSE
,
2220 "Error writing to client: %s", strerror(errno
));
2227 totwritten
+= nwritten
;
2228 offset
= c
->sentlen
;
2230 /* remove written robjs from c->reply */
2231 while (nwritten
&& listLength(c
->reply
)) {
2232 o
= listNodeValue(listFirst(c
->reply
));
2233 objlen
= sdslen(o
->ptr
);
2235 if(nwritten
>= objlen
- offset
) {
2236 listDelNode(c
->reply
, listFirst(c
->reply
));
2237 nwritten
-= objlen
- offset
;
2241 c
->sentlen
+= nwritten
;
2249 c
->lastinteraction
= time(NULL
);
2251 if (listLength(c
->reply
) == 0) {
2253 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2257 static int qsortRedisCommands(const void *r1
, const void *r2
) {
2259 ((struct redisCommand
*)r1
)->name
,
2260 ((struct redisCommand
*)r2
)->name
);
2263 static void sortCommandTable() {
2264 /* Copy and sort the read-only version of the command table */
2265 commandTable
= (struct redisCommand
*)malloc(sizeof(readonlyCommandTable
));
2266 memcpy(commandTable
,readonlyCommandTable
,sizeof(readonlyCommandTable
));
2268 sizeof(readonlyCommandTable
)/sizeof(struct redisCommand
),
2269 sizeof(struct redisCommand
),qsortRedisCommands
);
2272 static struct redisCommand
*lookupCommand(char *name
) {
2273 struct redisCommand tmp
= {name
,NULL
,0,0,NULL
,0,0,0};
2277 sizeof(readonlyCommandTable
)/sizeof(struct redisCommand
),
2278 sizeof(struct redisCommand
),
2279 qsortRedisCommands
);
2282 /* resetClient prepare the client to process the next command */
2283 static void resetClient(redisClient
*c
) {
2289 /* Call() is the core of Redis execution of a command */
2290 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2293 dirty
= server
.dirty
;
2295 dirty
= server
.dirty
-dirty
;
2297 if (server
.appendonly
&& dirty
)
2298 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2299 if ((dirty
|| cmd
->flags
& REDIS_CMD_FORCE_REPLICATION
) &&
2300 listLength(server
.slaves
))
2301 replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
);
2302 if (listLength(server
.monitors
))
2303 replicationFeedMonitors(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
);
2304 server
.stat_numcommands
++;
2307 /* If this function gets called we already read a whole
2308 * command, argments are in the client argv/argc fields.
2309 * processCommand() execute the command or prepare the
2310 * server for a bulk read from the client.
2312 * If 1 is returned the client is still alive and valid and
2313 * and other operations can be performed by the caller. Otherwise
2314 * if 0 is returned the client was destroied (i.e. after QUIT). */
2315 static int processCommand(redisClient
*c
) {
2316 struct redisCommand
*cmd
;
2318 /* Free some memory if needed (maxmemory setting) */
2319 if (server
.maxmemory
) freeMemoryIfNeeded();
2321 /* Handle the multi bulk command type. This is an alternative protocol
2322 * supported by Redis in order to receive commands that are composed of
2323 * multiple binary-safe "bulk" arguments. The latency of processing is
2324 * a bit higher but this allows things like multi-sets, so if this
2325 * protocol is used only for MSET and similar commands this is a big win. */
2326 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2327 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2328 if (c
->multibulk
<= 0) {
2332 decrRefCount(c
->argv
[c
->argc
-1]);
2336 } else if (c
->multibulk
) {
2337 if (c
->bulklen
== -1) {
2338 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2339 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2343 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2344 decrRefCount(c
->argv
[0]);
2345 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2347 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2352 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2356 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2357 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2361 if (c
->multibulk
== 0) {
2365 /* Here we need to swap the multi-bulk argc/argv with the
2366 * normal argc/argv of the client structure. */
2368 c
->argv
= c
->mbargv
;
2369 c
->mbargv
= auxargv
;
2372 c
->argc
= c
->mbargc
;
2373 c
->mbargc
= auxargc
;
2375 /* We need to set bulklen to something different than -1
2376 * in order for the code below to process the command without
2377 * to try to read the last argument of a bulk command as
2378 * a special argument. */
2380 /* continue below and process the command */
2387 /* -- end of multi bulk commands processing -- */
2389 /* The QUIT command is handled as a special case. Normal command
2390 * procs are unable to close the client connection safely */
2391 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2396 /* Now lookup the command and check ASAP about trivial error conditions
2397 * such wrong arity, bad command name and so forth. */
2398 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2401 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2402 (char*)c
->argv
[0]->ptr
));
2405 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2406 (c
->argc
< -cmd
->arity
)) {
2408 sdscatprintf(sdsempty(),
2409 "-ERR wrong number of arguments for '%s' command\r\n",
2413 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2414 /* This is a bulk command, we have to read the last argument yet. */
2415 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2417 decrRefCount(c
->argv
[c
->argc
-1]);
2418 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2420 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2425 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2426 /* It is possible that the bulk read is already in the
2427 * buffer. Check this condition and handle it accordingly.
2428 * This is just a fast path, alternative to call processInputBuffer().
2429 * It's a good idea since the code is small and this condition
2430 * happens most of the times. */
2431 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2432 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2434 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2436 /* Otherwise return... there is to read the last argument
2437 * from the socket. */
2441 /* Let's try to encode the bulk object to save space. */
2442 if (cmd
->flags
& REDIS_CMD_BULK
)
2443 c
->argv
[c
->argc
-1] = tryObjectEncoding(c
->argv
[c
->argc
-1]);
2445 /* Check if the user is authenticated */
2446 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2447 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2452 /* Handle the maxmemory directive */
2453 if (server
.maxmemory
&& (cmd
->flags
& REDIS_CMD_DENYOOM
) &&
2454 zmalloc_used_memory() > server
.maxmemory
)
2456 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2461 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
2462 if ((dictSize(c
->pubsub_channels
) > 0 || listLength(c
->pubsub_patterns
) > 0)
2464 cmd
->proc
!= subscribeCommand
&& cmd
->proc
!= unsubscribeCommand
&&
2465 cmd
->proc
!= psubscribeCommand
&& cmd
->proc
!= punsubscribeCommand
) {
2466 addReplySds(c
,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n"));
2471 /* Exec the command */
2472 if (c
->flags
& REDIS_MULTI
&&
2473 cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
&&
2474 cmd
->proc
!= multiCommand
&& cmd
->proc
!= watchCommand
)
2476 queueMultiCommand(c
,cmd
);
2477 addReply(c
,shared
.queued
);
2479 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2480 blockClientOnSwappedKeys(c
,cmd
)) return 1;
2484 /* Prepare the client for the next command */
2489 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
) {
2494 /* We need 1+(ARGS*3) objects since commands are using the new protocol
2495 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
2496 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
2497 robj
*static_outv
[REDIS_STATIC_ARGS
*3+1];
2500 if (argc
<= REDIS_STATIC_ARGS
) {
2503 outv
= zmalloc(sizeof(robj
*)*(argc
*3+1));
2506 lenobj
= createObject(REDIS_STRING
,
2507 sdscatprintf(sdsempty(), "*%d\r\n", argc
));
2508 lenobj
->refcount
= 0;
2509 outv
[outc
++] = lenobj
;
2510 for (j
= 0; j
< argc
; j
++) {
2511 lenobj
= createObject(REDIS_STRING
,
2512 sdscatprintf(sdsempty(),"$%lu\r\n",
2513 (unsigned long) stringObjectLen(argv
[j
])));
2514 lenobj
->refcount
= 0;
2515 outv
[outc
++] = lenobj
;
2516 outv
[outc
++] = argv
[j
];
2517 outv
[outc
++] = shared
.crlf
;
2520 /* Increment all the refcounts at start and decrement at end in order to
2521 * be sure to free objects if there is no slave in a replication state
2522 * able to be feed with commands */
2523 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2524 listRewind(slaves
,&li
);
2525 while((ln
= listNext(&li
))) {
2526 redisClient
*slave
= ln
->value
;
2528 /* Don't feed slaves that are still waiting for BGSAVE to start */
2529 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2531 /* Feed all the other slaves, MONITORs and so on */
2532 if (slave
->slaveseldb
!= dictid
) {
2536 case 0: selectcmd
= shared
.select0
; break;
2537 case 1: selectcmd
= shared
.select1
; break;
2538 case 2: selectcmd
= shared
.select2
; break;
2539 case 3: selectcmd
= shared
.select3
; break;
2540 case 4: selectcmd
= shared
.select4
; break;
2541 case 5: selectcmd
= shared
.select5
; break;
2542 case 6: selectcmd
= shared
.select6
; break;
2543 case 7: selectcmd
= shared
.select7
; break;
2544 case 8: selectcmd
= shared
.select8
; break;
2545 case 9: selectcmd
= shared
.select9
; break;
2547 selectcmd
= createObject(REDIS_STRING
,
2548 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2549 selectcmd
->refcount
= 0;
2552 addReply(slave
,selectcmd
);
2553 slave
->slaveseldb
= dictid
;
2555 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2557 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2558 if (outv
!= static_outv
) zfree(outv
);
2561 static sds
sdscatrepr(sds s
, char *p
, size_t len
) {
2562 s
= sdscatlen(s
,"\"",1);
2567 s
= sdscatprintf(s
,"\\%c",*p
);
2569 case '\n': s
= sdscatlen(s
,"\\n",1); break;
2570 case '\r': s
= sdscatlen(s
,"\\r",1); break;
2571 case '\t': s
= sdscatlen(s
,"\\t",1); break;
2572 case '\a': s
= sdscatlen(s
,"\\a",1); break;
2573 case '\b': s
= sdscatlen(s
,"\\b",1); break;
2576 s
= sdscatprintf(s
,"%c",*p
);
2578 s
= sdscatprintf(s
,"\\x%02x",(unsigned char)*p
);
2583 return sdscatlen(s
,"\"",1);
2586 static void replicationFeedMonitors(list
*monitors
, int dictid
, robj
**argv
, int argc
) {
2590 sds cmdrepr
= sdsnew("+");
2594 gettimeofday(&tv
,NULL
);
2595 cmdrepr
= sdscatprintf(cmdrepr
,"%ld.%ld ",(long)tv
.tv_sec
,(long)tv
.tv_usec
);
2596 if (dictid
!= 0) cmdrepr
= sdscatprintf(cmdrepr
,"(db %d) ", dictid
);
2598 for (j
= 0; j
< argc
; j
++) {
2599 if (argv
[j
]->encoding
== REDIS_ENCODING_INT
) {
2600 cmdrepr
= sdscatprintf(cmdrepr
, "%ld", (long)argv
[j
]->ptr
);
2602 cmdrepr
= sdscatrepr(cmdrepr
,(char*)argv
[j
]->ptr
,
2603 sdslen(argv
[j
]->ptr
));
2606 cmdrepr
= sdscatlen(cmdrepr
," ",1);
2608 cmdrepr
= sdscatlen(cmdrepr
,"\r\n",2);
2609 cmdobj
= createObject(REDIS_STRING
,cmdrepr
);
2611 listRewind(monitors
,&li
);
2612 while((ln
= listNext(&li
))) {
2613 redisClient
*monitor
= ln
->value
;
2614 addReply(monitor
,cmdobj
);
2616 decrRefCount(cmdobj
);
2619 static void processInputBuffer(redisClient
*c
) {
2621 /* Before to process the input buffer, make sure the client is not
2622 * waitig for a blocking operation such as BLPOP. Note that the first
2623 * iteration the client is never blocked, otherwise the processInputBuffer
2624 * would not be called at all, but after the execution of the first commands
2625 * in the input buffer the client may be blocked, and the "goto again"
2626 * will try to reiterate. The following line will make it return asap. */
2627 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2628 if (c
->bulklen
== -1) {
2629 /* Read the first line of the query */
2630 char *p
= strchr(c
->querybuf
,'\n');
2637 query
= c
->querybuf
;
2638 c
->querybuf
= sdsempty();
2639 querylen
= 1+(p
-(query
));
2640 if (sdslen(query
) > querylen
) {
2641 /* leave data after the first line of the query in the buffer */
2642 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2644 *p
= '\0'; /* remove "\n" */
2645 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2646 sdsupdatelen(query
);
2648 /* Now we can split the query in arguments */
2649 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2652 if (c
->argv
) zfree(c
->argv
);
2653 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2655 for (j
= 0; j
< argc
; j
++) {
2656 if (sdslen(argv
[j
])) {
2657 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2665 /* Execute the command. If the client is still valid
2666 * after processCommand() return and there is something
2667 * on the query buffer try to process the next command. */
2668 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2670 /* Nothing to process, argc == 0. Just process the query
2671 * buffer if it's not empty or return to the caller */
2672 if (sdslen(c
->querybuf
)) goto again
;
2675 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2676 redisLog(REDIS_VERBOSE
, "Client protocol error");
2681 /* Bulk read handling. Note that if we are at this point
2682 the client already sent a command terminated with a newline,
2683 we are reading the bulk data that is actually the last
2684 argument of the command. */
2685 int qbl
= sdslen(c
->querybuf
);
2687 if (c
->bulklen
<= qbl
) {
2688 /* Copy everything but the final CRLF as final argument */
2689 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2691 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2692 /* Process the command. If the client is still valid after
2693 * the processing and there is more data in the buffer
2694 * try to parse it. */
2695 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2701 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2702 redisClient
*c
= (redisClient
*) privdata
;
2703 char buf
[REDIS_IOBUF_LEN
];
2706 REDIS_NOTUSED(mask
);
2708 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2710 if (errno
== EAGAIN
) {
2713 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2717 } else if (nread
== 0) {
2718 redisLog(REDIS_VERBOSE
, "Client closed connection");
2723 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2724 c
->lastinteraction
= time(NULL
);
2728 processInputBuffer(c
);
2731 static int selectDb(redisClient
*c
, int id
) {
2732 if (id
< 0 || id
>= server
.dbnum
)
2734 c
->db
= &server
.db
[id
];
2738 static void *dupClientReplyValue(void *o
) {
2739 incrRefCount((robj
*)o
);
2743 static int listMatchObjects(void *a
, void *b
) {
2744 return equalStringObjects(a
,b
);
2747 static redisClient
*createClient(int fd
) {
2748 redisClient
*c
= zmalloc(sizeof(*c
));
2750 anetNonBlock(NULL
,fd
);
2751 anetTcpNoDelay(NULL
,fd
);
2752 if (!c
) return NULL
;
2755 c
->querybuf
= sdsempty();
2764 c
->lastinteraction
= time(NULL
);
2765 c
->authenticated
= 0;
2766 c
->replstate
= REDIS_REPL_NONE
;
2767 c
->reply
= listCreate();
2768 listSetFreeMethod(c
->reply
,decrRefCount
);
2769 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2770 c
->blocking_keys
= NULL
;
2771 c
->blocking_keys_num
= 0;
2772 c
->io_keys
= listCreate();
2773 c
->watched_keys
= listCreate();
2774 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2775 c
->pubsub_channels
= dictCreate(&setDictType
,NULL
);
2776 c
->pubsub_patterns
= listCreate();
2777 listSetFreeMethod(c
->pubsub_patterns
,decrRefCount
);
2778 listSetMatchMethod(c
->pubsub_patterns
,listMatchObjects
);
2779 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2780 readQueryFromClient
, c
) == AE_ERR
) {
2784 listAddNodeTail(server
.clients
,c
);
2785 initClientMultiState(c
);
2789 static void addReply(redisClient
*c
, robj
*obj
) {
2790 if (listLength(c
->reply
) == 0 &&
2791 (c
->replstate
== REDIS_REPL_NONE
||
2792 c
->replstate
== REDIS_REPL_ONLINE
) &&
2793 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2794 sendReplyToClient
, c
) == AE_ERR
) return;
2796 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2797 obj
= dupStringObject(obj
);
2798 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2800 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2803 static void addReplySds(redisClient
*c
, sds s
) {
2804 robj
*o
= createObject(REDIS_STRING
,s
);
2809 static void addReplyDouble(redisClient
*c
, double d
) {
2812 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2813 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2814 (unsigned long) strlen(buf
),buf
));
2817 static void addReplyLongLong(redisClient
*c
, long long ll
) {
2822 addReply(c
,shared
.czero
);
2824 } else if (ll
== 1) {
2825 addReply(c
,shared
.cone
);
2829 len
= ll2string(buf
+1,sizeof(buf
)-1,ll
);
2832 addReplySds(c
,sdsnewlen(buf
,len
+3));
2835 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2840 addReply(c
,shared
.czero
);
2842 } else if (ul
== 1) {
2843 addReply(c
,shared
.cone
);
2846 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2847 addReplySds(c
,sdsnewlen(buf
,len
));
2850 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2854 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2855 len
= sdslen(obj
->ptr
);
2857 long n
= (long)obj
->ptr
;
2859 /* Compute how many bytes will take this integer as a radix 10 string */
2865 while((n
= n
/10) != 0) {
2870 intlen
= ll2string(buf
+1,sizeof(buf
)-1,(long long)len
);
2871 buf
[intlen
+1] = '\r';
2872 buf
[intlen
+2] = '\n';
2873 addReplySds(c
,sdsnewlen(buf
,intlen
+3));
2876 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2877 addReplyBulkLen(c
,obj
);
2879 addReply(c
,shared
.crlf
);
2882 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2883 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2885 addReply(c
,shared
.nullbulk
);
2887 robj
*o
= createStringObject(s
,strlen(s
));
2893 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2898 REDIS_NOTUSED(mask
);
2899 REDIS_NOTUSED(privdata
);
2901 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2902 if (cfd
== AE_ERR
) {
2903 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2906 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2907 if ((c
= createClient(cfd
)) == NULL
) {
2908 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2909 close(cfd
); /* May be already closed, just ingore errors */
2912 /* If maxclient directive is set and this is one client more... close the
2913 * connection. Note that we create the client instead to check before
2914 * for this condition, since now the socket is already set in nonblocking
2915 * mode and we can send an error for free using the Kernel I/O */
2916 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2917 char *err
= "-ERR max number of clients reached\r\n";
2919 /* That's a best effort error message, don't check write errors */
2920 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2921 /* Nothing to do, Just to avoid the warning... */
2926 server
.stat_numconnections
++;
2929 /* ======================= Redis objects implementation ===================== */
2931 static robj
*createObject(int type
, void *ptr
) {
2934 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2935 if (listLength(server
.objfreelist
)) {
2936 listNode
*head
= listFirst(server
.objfreelist
);
2937 o
= listNodeValue(head
);
2938 listDelNode(server
.objfreelist
,head
);
2939 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2941 if (server
.vm_enabled
) {
2942 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2943 o
= zmalloc(sizeof(*o
));
2945 o
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
));
2949 o
->encoding
= REDIS_ENCODING_RAW
;
2952 if (server
.vm_enabled
) {
2953 /* Note that this code may run in the context of an I/O thread
2954 * and accessing to server.unixtime in theory is an error
2955 * (no locks). But in practice this is safe, and even if we read
2956 * garbage Redis will not fail, as it's just a statistical info */
2957 o
->vm
.atime
= server
.unixtime
;
2958 o
->storage
= REDIS_VM_MEMORY
;
2963 static robj
*createStringObject(char *ptr
, size_t len
) {
2964 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
2967 static robj
*createStringObjectFromLongLong(long long value
) {
2969 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
2970 incrRefCount(shared
.integers
[value
]);
2971 o
= shared
.integers
[value
];
2973 if (value
>= LONG_MIN
&& value
<= LONG_MAX
) {
2974 o
= createObject(REDIS_STRING
, NULL
);
2975 o
->encoding
= REDIS_ENCODING_INT
;
2976 o
->ptr
= (void*)((long)value
);
2978 o
= createObject(REDIS_STRING
,sdsfromlonglong(value
));
2984 static robj
*dupStringObject(robj
*o
) {
2985 assert(o
->encoding
== REDIS_ENCODING_RAW
);
2986 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
2989 static robj
*createListObject(void) {
2990 list
*l
= listCreate();
2992 listSetFreeMethod(l
,decrRefCount
);
2993 return createObject(REDIS_LIST
,l
);
2996 static robj
*createSetObject(void) {
2997 dict
*d
= dictCreate(&setDictType
,NULL
);
2998 return createObject(REDIS_SET
,d
);
3001 static robj
*createHashObject(void) {
3002 /* All the Hashes start as zipmaps. Will be automatically converted
3003 * into hash tables if there are enough elements or big elements
3005 unsigned char *zm
= zipmapNew();
3006 robj
*o
= createObject(REDIS_HASH
,zm
);
3007 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
3011 static robj
*createZsetObject(void) {
3012 zset
*zs
= zmalloc(sizeof(*zs
));
3014 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
3015 zs
->zsl
= zslCreate();
3016 return createObject(REDIS_ZSET
,zs
);
3019 static void freeStringObject(robj
*o
) {
3020 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3025 static void freeListObject(robj
*o
) {
3026 listRelease((list
*) o
->ptr
);
3029 static void freeSetObject(robj
*o
) {
3030 dictRelease((dict
*) o
->ptr
);
3033 static void freeZsetObject(robj
*o
) {
3036 dictRelease(zs
->dict
);
3041 static void freeHashObject(robj
*o
) {
3042 switch (o
->encoding
) {
3043 case REDIS_ENCODING_HT
:
3044 dictRelease((dict
*) o
->ptr
);
3046 case REDIS_ENCODING_ZIPMAP
:
3050 redisPanic("Unknown hash encoding type");
3055 static void incrRefCount(robj
*o
) {
3059 static void decrRefCount(void *obj
) {
3062 if (o
->refcount
<= 0) redisPanic("decrRefCount against refcount <= 0");
3063 /* Object is a key of a swapped out value, or in the process of being
3065 if (server
.vm_enabled
&&
3066 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
3068 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
);
3069 redisAssert(o
->type
== REDIS_STRING
);
3070 freeStringObject(o
);
3071 vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
);
3072 pthread_mutex_lock(&server
.obj_freelist_mutex
);
3073 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
3074 !listAddNodeHead(server
.objfreelist
,o
))
3076 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
3077 server
.vm_stats_swapped_objects
--;
3080 /* Object is in memory, or in the process of being swapped out. */
3081 if (--(o
->refcount
) == 0) {
3082 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
3083 vmCancelThreadedIOJob(obj
);
3085 case REDIS_STRING
: freeStringObject(o
); break;
3086 case REDIS_LIST
: freeListObject(o
); break;
3087 case REDIS_SET
: freeSetObject(o
); break;
3088 case REDIS_ZSET
: freeZsetObject(o
); break;
3089 case REDIS_HASH
: freeHashObject(o
); break;
3090 default: redisPanic("Unknown object type"); break;
3092 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
3093 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
3094 !listAddNodeHead(server
.objfreelist
,o
))
3096 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
3100 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
3101 dictEntry
*de
= dictFind(db
->dict
,key
);
3103 robj
*key
= dictGetEntryKey(de
);
3104 robj
*val
= dictGetEntryVal(de
);
3106 if (server
.vm_enabled
) {
3107 if (key
->storage
== REDIS_VM_MEMORY
||
3108 key
->storage
== REDIS_VM_SWAPPING
)
3110 /* If we were swapping the object out, stop it, this key
3112 if (key
->storage
== REDIS_VM_SWAPPING
)
3113 vmCancelThreadedIOJob(key
);
3114 /* Update the access time of the key for the aging algorithm. */
3115 key
->vm
.atime
= server
.unixtime
;
3117 int notify
= (key
->storage
== REDIS_VM_LOADING
);
3119 /* Our value was swapped on disk. Bring it at home. */
3120 redisAssert(val
== NULL
);
3121 val
= vmLoadObject(key
);
3122 dictGetEntryVal(de
) = val
;
3124 /* Clients blocked by the VM subsystem may be waiting for
3126 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
3135 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
3136 expireIfNeeded(db
,key
);
3137 return lookupKey(db
,key
);
3140 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
3141 deleteIfVolatile(db
,key
);
3142 touchWatchedKey(db
,key
);
3143 return lookupKey(db
,key
);
3146 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3147 robj
*o
= lookupKeyRead(c
->db
, key
);
3148 if (!o
) addReply(c
,reply
);
3152 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3153 robj
*o
= lookupKeyWrite(c
->db
, key
);
3154 if (!o
) addReply(c
,reply
);
3158 static int checkType(redisClient
*c
, robj
*o
, int type
) {
3159 if (o
->type
!= type
) {
3160 addReply(c
,shared
.wrongtypeerr
);
3166 static int deleteKey(redisDb
*db
, robj
*key
) {
3169 /* We need to protect key from destruction: after the first dictDelete()
3170 * it may happen that 'key' is no longer valid if we don't increment
3171 * it's count. This may happen when we get the object reference directly
3172 * from the hash table with dictRandomKey() or dict iterators */
3174 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
);
3175 retval
= dictDelete(db
->dict
,key
);
3178 return retval
== DICT_OK
;
3181 /* Check if the nul-terminated string 's' can be represented by a long
3182 * (that is, is a number that fits into long without any other space or
3183 * character before or after the digits).
3185 * If so, the function returns REDIS_OK and *longval is set to the value
3186 * of the number. Otherwise REDIS_ERR is returned */
3187 static int isStringRepresentableAsLong(sds s
, long *longval
) {
3188 char buf
[32], *endptr
;
3192 value
= strtol(s
, &endptr
, 10);
3193 if (endptr
[0] != '\0') return REDIS_ERR
;
3194 slen
= ll2string(buf
,32,value
);
3196 /* If the number converted back into a string is not identical
3197 * then it's not possible to encode the string as integer */
3198 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
3199 if (longval
) *longval
= value
;
3203 /* Try to encode a string object in order to save space */
3204 static robj
*tryObjectEncoding(robj
*o
) {
3208 if (o
->encoding
!= REDIS_ENCODING_RAW
)
3209 return o
; /* Already encoded */
3211 /* It's not safe to encode shared objects: shared objects can be shared
3212 * everywhere in the "object space" of Redis. Encoded objects can only
3213 * appear as "values" (and not, for instance, as keys) */
3214 if (o
->refcount
> 1) return o
;
3216 /* Currently we try to encode only strings */
3217 redisAssert(o
->type
== REDIS_STRING
);
3219 /* Check if we can represent this string as a long integer */
3220 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return o
;
3222 /* Ok, this object can be encoded */
3223 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
3225 incrRefCount(shared
.integers
[value
]);
3226 return shared
.integers
[value
];
3228 o
->encoding
= REDIS_ENCODING_INT
;
3230 o
->ptr
= (void*) value
;
3235 /* Get a decoded version of an encoded object (returned as a new object).
3236 * If the object is already raw-encoded just increment the ref count. */
3237 static robj
*getDecodedObject(robj
*o
) {
3240 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3244 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
3247 ll2string(buf
,32,(long)o
->ptr
);
3248 dec
= createStringObject(buf
,strlen(buf
));
3251 redisPanic("Unknown encoding type");
3255 /* Compare two string objects via strcmp() or alike.
3256 * Note that the objects may be integer-encoded. In such a case we
3257 * use ll2string() to get a string representation of the numbers on the stack
3258 * and compare the strings, it's much faster than calling getDecodedObject().
3260 * Important note: if objects are not integer encoded, but binary-safe strings,
3261 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
3263 static int compareStringObjects(robj
*a
, robj
*b
) {
3264 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
3265 char bufa
[128], bufb
[128], *astr
, *bstr
;
3268 if (a
== b
) return 0;
3269 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
3270 ll2string(bufa
,sizeof(bufa
),(long) a
->ptr
);
3276 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
3277 ll2string(bufb
,sizeof(bufb
),(long) b
->ptr
);
3283 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
3286 /* Equal string objects return 1 if the two objects are the same from the
3287 * point of view of a string comparison, otherwise 0 is returned. Note that
3288 * this function is faster then checking for (compareStringObject(a,b) == 0)
3289 * because it can perform some more optimization. */
3290 static int equalStringObjects(robj
*a
, robj
*b
) {
3291 if (a
->encoding
!= REDIS_ENCODING_RAW
&& b
->encoding
!= REDIS_ENCODING_RAW
){
3292 return a
->ptr
== b
->ptr
;
3294 return compareStringObjects(a
,b
) == 0;
3298 static size_t stringObjectLen(robj
*o
) {
3299 redisAssert(o
->type
== REDIS_STRING
);
3300 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3301 return sdslen(o
->ptr
);
3305 return ll2string(buf
,32,(long)o
->ptr
);
3309 static int getDoubleFromObject(robj
*o
, double *target
) {
3316 redisAssert(o
->type
== REDIS_STRING
);
3317 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3318 value
= strtod(o
->ptr
, &eptr
);
3319 if (eptr
[0] != '\0') return REDIS_ERR
;
3320 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3321 value
= (long)o
->ptr
;
3323 redisPanic("Unknown string encoding");
3331 static int getDoubleFromObjectOrReply(redisClient
*c
, robj
*o
, double *target
, const char *msg
) {
3333 if (getDoubleFromObject(o
, &value
) != REDIS_OK
) {
3335 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3337 addReplySds(c
, sdsnew("-ERR value is not a double\r\n"));
3346 static int getLongLongFromObject(robj
*o
, long long *target
) {
3353 redisAssert(o
->type
== REDIS_STRING
);
3354 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3355 value
= strtoll(o
->ptr
, &eptr
, 10);
3356 if (eptr
[0] != '\0') return REDIS_ERR
;
3357 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3358 value
= (long)o
->ptr
;
3360 redisPanic("Unknown string encoding");
3368 static int getLongLongFromObjectOrReply(redisClient
*c
, robj
*o
, long long *target
, const char *msg
) {
3370 if (getLongLongFromObject(o
, &value
) != REDIS_OK
) {
3372 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3374 addReplySds(c
, sdsnew("-ERR value is not an integer\r\n"));
3383 static int getLongFromObjectOrReply(redisClient
*c
, robj
*o
, long *target
, const char *msg
) {
3386 if (getLongLongFromObjectOrReply(c
, o
, &value
, msg
) != REDIS_OK
) return REDIS_ERR
;
3387 if (value
< LONG_MIN
|| value
> LONG_MAX
) {
3389 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3391 addReplySds(c
, sdsnew("-ERR value is out of range\r\n"));
3400 /*============================ RDB saving/loading =========================== */
3402 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3403 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3407 static int rdbSaveTime(FILE *fp
, time_t t
) {
3408 int32_t t32
= (int32_t) t
;
3409 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3413 /* check rdbLoadLen() comments for more info */
3414 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3415 unsigned char buf
[2];
3418 /* Save a 6 bit len */
3419 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3420 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3421 } else if (len
< (1<<14)) {
3422 /* Save a 14 bit len */
3423 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3425 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3427 /* Save a 32 bit len */
3428 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3429 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3431 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3436 /* Encode 'value' as an integer if possible (if integer will fit the
3437 * supported range). If the function sucessful encoded the integer
3438 * then the (up to 5 bytes) encoded representation is written in the
3439 * string pointed by 'enc' and the length is returned. Otherwise
3441 static int rdbEncodeInteger(long long value
, unsigned char *enc
) {
3442 /* Finally check if it fits in our ranges */
3443 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3444 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3445 enc
[1] = value
&0xFF;
3447 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3448 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3449 enc
[1] = value
&0xFF;
3450 enc
[2] = (value
>>8)&0xFF;
3452 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3453 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3454 enc
[1] = value
&0xFF;
3455 enc
[2] = (value
>>8)&0xFF;
3456 enc
[3] = (value
>>16)&0xFF;
3457 enc
[4] = (value
>>24)&0xFF;
3464 /* String objects in the form "2391" "-100" without any space and with a
3465 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3466 * encoded as integers to save space */
3467 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3469 char *endptr
, buf
[32];
3471 /* Check if it's possible to encode this value as a number */
3472 value
= strtoll(s
, &endptr
, 10);
3473 if (endptr
[0] != '\0') return 0;
3474 ll2string(buf
,32,value
);
3476 /* If the number converted back into a string is not identical
3477 * then it's not possible to encode the string as integer */
3478 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3480 return rdbEncodeInteger(value
,enc
);
3483 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3484 size_t comprlen
, outlen
;
3488 /* We require at least four bytes compression for this to be worth it */
3489 if (len
<= 4) return 0;
3491 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3492 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3493 if (comprlen
== 0) {
3497 /* Data compressed! Let's save it on disk */
3498 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3499 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3500 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3501 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3502 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3511 /* Save a string objet as [len][data] on disk. If the object is a string
3512 * representation of an integer value we try to safe it in a special form */
3513 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3516 /* Try integer encoding */
3518 unsigned char buf
[5];
3519 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3520 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3525 /* Try LZF compression - under 20 bytes it's unable to compress even
3526 * aaaaaaaaaaaaaaaaaa so skip it */
3527 if (server
.rdbcompression
&& len
> 20) {
3530 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3531 if (retval
== -1) return -1;
3532 if (retval
> 0) return 0;
3533 /* retval == 0 means data can't be compressed, save the old way */
3536 /* Store verbatim */
3537 if (rdbSaveLen(fp
,len
) == -1) return -1;
3538 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3542 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3543 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3546 /* Avoid to decode the object, then encode it again, if the
3547 * object is alrady integer encoded. */
3548 if (obj
->encoding
== REDIS_ENCODING_INT
) {
3549 long val
= (long) obj
->ptr
;
3550 unsigned char buf
[5];
3553 if ((enclen
= rdbEncodeInteger(val
,buf
)) > 0) {
3554 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3557 /* otherwise... fall throught and continue with the usual
3561 /* Avoid incr/decr ref count business when possible.
3562 * This plays well with copy-on-write given that we are probably
3563 * in a child process (BGSAVE). Also this makes sure key objects
3564 * of swapped objects are not incRefCount-ed (an assert does not allow
3565 * this in order to avoid bugs) */
3566 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
3567 obj
= getDecodedObject(obj
);
3568 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3571 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3576 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3577 * 8 bit integer specifing the length of the representation.
3578 * This 8 bit integer has special values in order to specify the following
3584 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3585 unsigned char buf
[128];
3591 } else if (!isfinite(val
)) {
3593 buf
[0] = (val
< 0) ? 255 : 254;
3595 #if (DBL_MANT_DIG >= 52) && (LLONG_MAX == 0x7fffffffffffffffLL)
3596 /* Check if the float is in a safe range to be casted into a
3597 * long long. We are assuming that long long is 64 bit here.
3598 * Also we are assuming that there are no implementations around where
3599 * double has precision < 52 bit.
3601 * Under this assumptions we test if a double is inside an interval
3602 * where casting to long long is safe. Then using two castings we
3603 * make sure the decimal part is zero. If all this is true we use
3604 * integer printing function that is much faster. */
3605 double min
= -4503599627370495; /* (2^52)-1 */
3606 double max
= 4503599627370496; /* -(2^52) */
3607 if (val
> min
&& val
< max
&& val
== ((double)((long long)val
)))
3608 ll2string((char*)buf
+1,sizeof(buf
),(long long)val
);
3611 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3612 buf
[0] = strlen((char*)buf
+1);
3615 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3619 /* Save a Redis object. */
3620 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3621 if (o
->type
== REDIS_STRING
) {
3622 /* Save a string value */
3623 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3624 } else if (o
->type
== REDIS_LIST
) {
3625 /* Save a list value */
3626 list
*list
= o
->ptr
;
3630 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3631 listRewind(list
,&li
);
3632 while((ln
= listNext(&li
))) {
3633 robj
*eleobj
= listNodeValue(ln
);
3635 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3637 } else if (o
->type
== REDIS_SET
) {
3638 /* Save a set value */
3640 dictIterator
*di
= dictGetIterator(set
);
3643 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3644 while((de
= dictNext(di
)) != NULL
) {
3645 robj
*eleobj
= dictGetEntryKey(de
);
3647 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3649 dictReleaseIterator(di
);
3650 } else if (o
->type
== REDIS_ZSET
) {
3651 /* Save a set value */
3653 dictIterator
*di
= dictGetIterator(zs
->dict
);
3656 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3657 while((de
= dictNext(di
)) != NULL
) {
3658 robj
*eleobj
= dictGetEntryKey(de
);
3659 double *score
= dictGetEntryVal(de
);
3661 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3662 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3664 dictReleaseIterator(di
);
3665 } else if (o
->type
== REDIS_HASH
) {
3666 /* Save a hash value */
3667 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3668 unsigned char *p
= zipmapRewind(o
->ptr
);
3669 unsigned int count
= zipmapLen(o
->ptr
);
3670 unsigned char *key
, *val
;
3671 unsigned int klen
, vlen
;
3673 if (rdbSaveLen(fp
,count
) == -1) return -1;
3674 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3675 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3676 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3679 dictIterator
*di
= dictGetIterator(o
->ptr
);
3682 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3683 while((de
= dictNext(di
)) != NULL
) {
3684 robj
*key
= dictGetEntryKey(de
);
3685 robj
*val
= dictGetEntryVal(de
);
3687 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3688 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3690 dictReleaseIterator(di
);
3693 redisPanic("Unknown object type");
3698 /* Return the length the object will have on disk if saved with
3699 * the rdbSaveObject() function. Currently we use a trick to get
3700 * this length with very little changes to the code. In the future
3701 * we could switch to a faster solution. */
3702 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3703 if (fp
== NULL
) fp
= server
.devnull
;
3705 assert(rdbSaveObject(fp
,o
) != 1);
3709 /* Return the number of pages required to save this object in the swap file */
3710 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3711 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3713 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3716 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3717 static int rdbSave(char *filename
) {
3718 dictIterator
*di
= NULL
;
3723 time_t now
= time(NULL
);
3725 /* Wait for I/O therads to terminate, just in case this is a
3726 * foreground-saving, to avoid seeking the swap file descriptor at the
3728 if (server
.vm_enabled
)
3729 waitEmptyIOJobsQueue();
3731 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3732 fp
= fopen(tmpfile
,"w");
3734 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3737 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3738 for (j
= 0; j
< server
.dbnum
; j
++) {
3739 redisDb
*db
= server
.db
+j
;
3741 if (dictSize(d
) == 0) continue;
3742 di
= dictGetIterator(d
);
3748 /* Write the SELECT DB opcode */
3749 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3750 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3752 /* Iterate this DB writing every entry */
3753 while((de
= dictNext(di
)) != NULL
) {
3754 robj
*key
= dictGetEntryKey(de
);
3755 robj
*o
= dictGetEntryVal(de
);
3756 time_t expiretime
= getExpire(db
,key
);
3758 /* Save the expire time */
3759 if (expiretime
!= -1) {
3760 /* If this key is already expired skip it */
3761 if (expiretime
< now
) continue;
3762 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3763 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3765 /* Save the key and associated value. This requires special
3766 * handling if the value is swapped out. */
3767 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
3768 key
->storage
== REDIS_VM_SWAPPING
) {
3769 /* Save type, key, value */
3770 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3771 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3772 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3774 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3776 /* Get a preview of the object in memory */
3777 po
= vmPreviewObject(key
);
3778 /* Save type, key, value */
3779 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
;
3780 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3781 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3782 /* Remove the loaded object from memory */
3786 dictReleaseIterator(di
);
3789 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3791 /* Make sure data will not remain on the OS's output buffers */
3796 /* Use RENAME to make sure the DB file is changed atomically only
3797 * if the generate DB file is ok. */
3798 if (rename(tmpfile
,filename
) == -1) {
3799 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3803 redisLog(REDIS_NOTICE
,"DB saved on disk");
3805 server
.lastsave
= time(NULL
);
3811 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3812 if (di
) dictReleaseIterator(di
);
3816 static int rdbSaveBackground(char *filename
) {
3819 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3820 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3821 if ((childpid
= fork()) == 0) {
3823 if (server
.vm_enabled
) vmReopenSwapFile();
3825 if (rdbSave(filename
) == REDIS_OK
) {
3832 if (childpid
== -1) {
3833 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3837 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3838 server
.bgsavechildpid
= childpid
;
3839 updateDictResizePolicy();
3842 return REDIS_OK
; /* unreached */
3845 static void rdbRemoveTempFile(pid_t childpid
) {
3848 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
3852 static int rdbLoadType(FILE *fp
) {
3854 if (fread(&type
,1,1,fp
) == 0) return -1;
3858 static time_t rdbLoadTime(FILE *fp
) {
3860 if (fread(&t32
,4,1,fp
) == 0) return -1;
3861 return (time_t) t32
;
3864 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3865 * of this file for a description of how this are stored on disk.
3867 * isencoded is set to 1 if the readed length is not actually a length but
3868 * an "encoding type", check the above comments for more info */
3869 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
3870 unsigned char buf
[2];
3874 if (isencoded
) *isencoded
= 0;
3875 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3876 type
= (buf
[0]&0xC0)>>6;
3877 if (type
== REDIS_RDB_6BITLEN
) {
3878 /* Read a 6 bit len */
3880 } else if (type
== REDIS_RDB_ENCVAL
) {
3881 /* Read a 6 bit len encoding type */
3882 if (isencoded
) *isencoded
= 1;
3884 } else if (type
== REDIS_RDB_14BITLEN
) {
3885 /* Read a 14 bit len */
3886 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3887 return ((buf
[0]&0x3F)<<8)|buf
[1];
3889 /* Read a 32 bit len */
3890 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
3895 /* Load an integer-encoded object from file 'fp', with the specified
3896 * encoding type 'enctype'. If encode is true the function may return
3897 * an integer-encoded object as reply, otherwise the returned object
3898 * will always be encoded as a raw string. */
3899 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
, int encode
) {
3900 unsigned char enc
[4];
3903 if (enctype
== REDIS_RDB_ENC_INT8
) {
3904 if (fread(enc
,1,1,fp
) == 0) return NULL
;
3905 val
= (signed char)enc
[0];
3906 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
3908 if (fread(enc
,2,1,fp
) == 0) return NULL
;
3909 v
= enc
[0]|(enc
[1]<<8);
3911 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
3913 if (fread(enc
,4,1,fp
) == 0) return NULL
;
3914 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
3917 val
= 0; /* anti-warning */
3918 redisPanic("Unknown RDB integer encoding type");
3921 return createStringObjectFromLongLong(val
);
3923 return createObject(REDIS_STRING
,sdsfromlonglong(val
));
3926 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
3927 unsigned int len
, clen
;
3928 unsigned char *c
= NULL
;
3931 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3932 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3933 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
3934 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
3935 if (fread(c
,clen
,1,fp
) == 0) goto err
;
3936 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
3938 return createObject(REDIS_STRING
,val
);
3945 static robj
*rdbGenericLoadStringObject(FILE*fp
, int encode
) {
3950 len
= rdbLoadLen(fp
,&isencoded
);
3953 case REDIS_RDB_ENC_INT8
:
3954 case REDIS_RDB_ENC_INT16
:
3955 case REDIS_RDB_ENC_INT32
:
3956 return rdbLoadIntegerObject(fp
,len
,encode
);
3957 case REDIS_RDB_ENC_LZF
:
3958 return rdbLoadLzfStringObject(fp
);
3960 redisPanic("Unknown RDB encoding type");
3964 if (len
== REDIS_RDB_LENERR
) return NULL
;
3965 val
= sdsnewlen(NULL
,len
);
3966 if (len
&& fread(val
,len
,1,fp
) == 0) {
3970 return createObject(REDIS_STRING
,val
);
3973 static robj
*rdbLoadStringObject(FILE *fp
) {
3974 return rdbGenericLoadStringObject(fp
,0);
3977 static robj
*rdbLoadEncodedStringObject(FILE *fp
) {
3978 return rdbGenericLoadStringObject(fp
,1);
3981 /* For information about double serialization check rdbSaveDoubleValue() */
3982 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
3986 if (fread(&len
,1,1,fp
) == 0) return -1;
3988 case 255: *val
= R_NegInf
; return 0;
3989 case 254: *val
= R_PosInf
; return 0;
3990 case 253: *val
= R_Nan
; return 0;
3992 if (fread(buf
,len
,1,fp
) == 0) return -1;
3994 sscanf(buf
, "%lg", val
);
3999 /* Load a Redis object of the specified type from the specified file.
4000 * On success a newly allocated object is returned, otherwise NULL. */
4001 static robj
*rdbLoadObject(int type
, FILE *fp
) {
4004 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
4005 if (type
== REDIS_STRING
) {
4006 /* Read string value */
4007 if ((o
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4008 o
= tryObjectEncoding(o
);
4009 } else if (type
== REDIS_LIST
|| type
== REDIS_SET
) {
4010 /* Read list/set value */
4013 if ((listlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4014 o
= (type
== REDIS_LIST
) ? createListObject() : createSetObject();
4015 /* It's faster to expand the dict to the right size asap in order
4016 * to avoid rehashing */
4017 if (type
== REDIS_SET
&& listlen
> DICT_HT_INITIAL_SIZE
)
4018 dictExpand(o
->ptr
,listlen
);
4019 /* Load every single element of the list/set */
4023 if ((ele
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4024 ele
= tryObjectEncoding(ele
);
4025 if (type
== REDIS_LIST
) {
4026 listAddNodeTail((list
*)o
->ptr
,ele
);
4028 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
4031 } else if (type
== REDIS_ZSET
) {
4032 /* Read list/set value */
4036 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4037 o
= createZsetObject();
4039 /* Load every single element of the list/set */
4042 double *score
= zmalloc(sizeof(double));
4044 if ((ele
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4045 ele
= tryObjectEncoding(ele
);
4046 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
4047 dictAdd(zs
->dict
,ele
,score
);
4048 zslInsert(zs
->zsl
,*score
,ele
);
4049 incrRefCount(ele
); /* added to skiplist */
4051 } else if (type
== REDIS_HASH
) {
4054 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4055 o
= createHashObject();
4056 /* Too many entries? Use an hash table. */
4057 if (hashlen
> server
.hash_max_zipmap_entries
)
4058 convertToRealHash(o
);
4059 /* Load every key/value, then set it into the zipmap or hash
4060 * table, as needed. */
4064 if ((key
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
4065 if ((val
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
4066 /* If we are using a zipmap and there are too big values
4067 * the object is converted to real hash table encoding. */
4068 if (o
->encoding
!= REDIS_ENCODING_HT
&&
4069 (sdslen(key
->ptr
) > server
.hash_max_zipmap_value
||
4070 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
))
4072 convertToRealHash(o
);
4075 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
4076 unsigned char *zm
= o
->ptr
;
4078 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
4079 val
->ptr
,sdslen(val
->ptr
),NULL
);
4084 key
= tryObjectEncoding(key
);
4085 val
= tryObjectEncoding(val
);
4086 dictAdd((dict
*)o
->ptr
,key
,val
);
4090 redisPanic("Unknown object type");
4095 static int rdbLoad(char *filename
) {
4098 int type
, retval
, rdbver
;
4099 int swap_all_values
= 0;
4100 dict
*d
= server
.db
[0].dict
;
4101 redisDb
*db
= server
.db
+0;
4103 time_t expiretime
, now
= time(NULL
);
4104 long long loadedkeys
= 0;
4106 fp
= fopen(filename
,"r");
4107 if (!fp
) return REDIS_ERR
;
4108 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
4110 if (memcmp(buf
,"REDIS",5) != 0) {
4112 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
4115 rdbver
= atoi(buf
+5);
4118 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
4126 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
4127 if (type
== REDIS_EXPIRETIME
) {
4128 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
4129 /* We read the time so we need to read the object type again */
4130 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
4132 if (type
== REDIS_EOF
) break;
4133 /* Handle SELECT DB opcode as a special case */
4134 if (type
== REDIS_SELECTDB
) {
4135 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
4137 if (dbid
>= (unsigned)server
.dbnum
) {
4138 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
4141 db
= server
.db
+dbid
;
4146 if ((key
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
4148 if ((val
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
4149 /* Check if the key already expired */
4150 if (expiretime
!= -1 && expiretime
< now
) {
4155 /* Add the new object in the hash table */
4156 retval
= dictAdd(d
,key
,val
);
4157 if (retval
== DICT_ERR
) {
4158 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", key
->ptr
);
4162 /* Set the expire time if needed */
4163 if (expiretime
!= -1) setExpire(db
,key
,expiretime
);
4165 /* Handle swapping while loading big datasets when VM is on */
4167 /* If we detecter we are hopeless about fitting something in memory
4168 * we just swap every new key on disk. Directly...
4169 * Note that's important to check for this condition before resorting
4170 * to random sampling, otherwise we may try to swap already
4172 if (swap_all_values
) {
4173 dictEntry
*de
= dictFind(d
,key
);
4175 /* de may be NULL since the key already expired */
4177 key
= dictGetEntryKey(de
);
4178 val
= dictGetEntryVal(de
);
4180 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
4181 dictGetEntryVal(de
) = NULL
;
4187 /* If we have still some hope of having some value fitting memory
4188 * then we try random sampling. */
4189 if (!swap_all_values
&& server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
4190 while (zmalloc_used_memory() > server
.vm_max_memory
) {
4191 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
4193 if (zmalloc_used_memory() > server
.vm_max_memory
)
4194 swap_all_values
= 1; /* We are already using too much mem */
4200 eoferr
: /* unexpected end of file is handled here with a fatal exit */
4201 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
4203 return REDIS_ERR
; /* Just to avoid warning */
4206 /*================================== Shutdown =============================== */
4207 static int prepareForShutdown() {
4208 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4209 /* Kill the saving child if there is a background saving in progress.
4210 We want to avoid race conditions, for instance our saving child may
4211 overwrite the synchronous saving did by SHUTDOWN. */
4212 if (server
.bgsavechildpid
!= -1) {
4213 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4214 kill(server
.bgsavechildpid
,SIGKILL
);
4215 rdbRemoveTempFile(server
.bgsavechildpid
);
4217 if (server
.appendonly
) {
4218 /* Append only file: fsync() the AOF and exit */
4219 aof_fsync(server
.appendfd
);
4220 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4222 /* Snapshotting. Perform a SYNC SAVE and exit */
4223 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4224 if (server
.daemonize
)
4225 unlink(server
.pidfile
);
4226 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4228 /* Ooops.. error saving! The best we can do is to continue
4229 * operating. Note that if there was a background saving process,
4230 * in the next cron() Redis will be notified that the background
4231 * saving aborted, handling special stuff like slaves pending for
4232 * synchronization... */
4233 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4237 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4241 /*================================== Commands =============================== */
4243 static void authCommand(redisClient
*c
) {
4244 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
4245 c
->authenticated
= 1;
4246 addReply(c
,shared
.ok
);
4248 c
->authenticated
= 0;
4249 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
4253 static void pingCommand(redisClient
*c
) {
4254 addReply(c
,shared
.pong
);
4257 static void echoCommand(redisClient
*c
) {
4258 addReplyBulk(c
,c
->argv
[1]);
4261 /*=================================== Strings =============================== */
4263 static void setGenericCommand(redisClient
*c
, int nx
, robj
*key
, robj
*val
, robj
*expire
) {
4265 long seconds
= 0; /* initialized to avoid an harmness warning */
4268 if (getLongFromObjectOrReply(c
, expire
, &seconds
, NULL
) != REDIS_OK
)
4271 addReplySds(c
,sdsnew("-ERR invalid expire time in SETEX\r\n"));
4276 touchWatchedKey(c
->db
,key
);
4277 if (nx
) deleteIfVolatile(c
->db
,key
);
4278 retval
= dictAdd(c
->db
->dict
,key
,val
);
4279 if (retval
== DICT_ERR
) {
4281 /* If the key is about a swapped value, we want a new key object
4282 * to overwrite the old. So we delete the old key in the database.
4283 * This will also make sure that swap pages about the old object
4284 * will be marked as free. */
4285 if (server
.vm_enabled
&& deleteIfSwapped(c
->db
,key
))
4287 dictReplace(c
->db
->dict
,key
,val
);
4290 addReply(c
,shared
.czero
);
4298 removeExpire(c
->db
,key
);
4299 if (expire
) setExpire(c
->db
,key
,time(NULL
)+seconds
);
4300 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4303 static void setCommand(redisClient
*c
) {
4304 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[2],NULL
);
4307 static void setnxCommand(redisClient
*c
) {
4308 setGenericCommand(c
,1,c
->argv
[1],c
->argv
[2],NULL
);
4311 static void setexCommand(redisClient
*c
) {
4312 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[3],c
->argv
[2]);
4315 static int getGenericCommand(redisClient
*c
) {
4318 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
4321 if (o
->type
!= REDIS_STRING
) {
4322 addReply(c
,shared
.wrongtypeerr
);
4330 static void getCommand(redisClient
*c
) {
4331 getGenericCommand(c
);
4334 static void getsetCommand(redisClient
*c
) {
4335 if (getGenericCommand(c
) == REDIS_ERR
) return;
4336 if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) {
4337 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4339 incrRefCount(c
->argv
[1]);
4341 incrRefCount(c
->argv
[2]);
4343 removeExpire(c
->db
,c
->argv
[1]);
4346 static void mgetCommand(redisClient
*c
) {
4349 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
4350 for (j
= 1; j
< c
->argc
; j
++) {
4351 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
4353 addReply(c
,shared
.nullbulk
);
4355 if (o
->type
!= REDIS_STRING
) {
4356 addReply(c
,shared
.nullbulk
);
4364 static void msetGenericCommand(redisClient
*c
, int nx
) {
4365 int j
, busykeys
= 0;
4367 if ((c
->argc
% 2) == 0) {
4368 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
4371 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
4372 * set nothing at all if at least one already key exists. */
4374 for (j
= 1; j
< c
->argc
; j
+= 2) {
4375 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
4381 addReply(c
, shared
.czero
);
4385 for (j
= 1; j
< c
->argc
; j
+= 2) {
4388 c
->argv
[j
+1] = tryObjectEncoding(c
->argv
[j
+1]);
4389 retval
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
4390 if (retval
== DICT_ERR
) {
4391 dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
4392 incrRefCount(c
->argv
[j
+1]);
4394 incrRefCount(c
->argv
[j
]);
4395 incrRefCount(c
->argv
[j
+1]);
4397 removeExpire(c
->db
,c
->argv
[j
]);
4399 server
.dirty
+= (c
->argc
-1)/2;
4400 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4403 static void msetCommand(redisClient
*c
) {
4404 msetGenericCommand(c
,0);
4407 static void msetnxCommand(redisClient
*c
) {
4408 msetGenericCommand(c
,1);
4411 static void incrDecrCommand(redisClient
*c
, long long incr
) {
4416 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4417 if (o
!= NULL
&& checkType(c
,o
,REDIS_STRING
)) return;
4418 if (getLongLongFromObjectOrReply(c
,o
,&value
,NULL
) != REDIS_OK
) return;
4421 o
= createStringObjectFromLongLong(value
);
4422 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],o
);
4423 if (retval
== DICT_ERR
) {
4424 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4425 removeExpire(c
->db
,c
->argv
[1]);
4427 incrRefCount(c
->argv
[1]);
4430 addReply(c
,shared
.colon
);
4432 addReply(c
,shared
.crlf
);
4435 static void incrCommand(redisClient
*c
) {
4436 incrDecrCommand(c
,1);
4439 static void decrCommand(redisClient
*c
) {
4440 incrDecrCommand(c
,-1);
4443 static void incrbyCommand(redisClient
*c
) {
4446 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4447 incrDecrCommand(c
,incr
);
4450 static void decrbyCommand(redisClient
*c
) {
4453 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4454 incrDecrCommand(c
,-incr
);
4457 static void appendCommand(redisClient
*c
) {
4462 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4464 /* Create the key */
4465 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4466 incrRefCount(c
->argv
[1]);
4467 incrRefCount(c
->argv
[2]);
4468 totlen
= stringObjectLen(c
->argv
[2]);
4472 de
= dictFind(c
->db
->dict
,c
->argv
[1]);
4475 o
= dictGetEntryVal(de
);
4476 if (o
->type
!= REDIS_STRING
) {
4477 addReply(c
,shared
.wrongtypeerr
);
4480 /* If the object is specially encoded or shared we have to make
4482 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
4483 robj
*decoded
= getDecodedObject(o
);
4485 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
4486 decrRefCount(decoded
);
4487 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4490 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
4491 o
->ptr
= sdscatlen(o
->ptr
,
4492 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
4494 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
4495 (unsigned long) c
->argv
[2]->ptr
);
4497 totlen
= sdslen(o
->ptr
);
4500 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
4503 static void substrCommand(redisClient
*c
) {
4505 long start
= atoi(c
->argv
[2]->ptr
);
4506 long end
= atoi(c
->argv
[3]->ptr
);
4507 size_t rangelen
, strlen
;
4510 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4511 checkType(c
,o
,REDIS_STRING
)) return;
4513 o
= getDecodedObject(o
);
4514 strlen
= sdslen(o
->ptr
);
4516 /* convert negative indexes */
4517 if (start
< 0) start
= strlen
+start
;
4518 if (end
< 0) end
= strlen
+end
;
4519 if (start
< 0) start
= 0;
4520 if (end
< 0) end
= 0;
4522 /* indexes sanity checks */
4523 if (start
> end
|| (size_t)start
>= strlen
) {
4524 /* Out of range start or start > end result in null reply */
4525 addReply(c
,shared
.nullbulk
);
4529 if ((size_t)end
>= strlen
) end
= strlen
-1;
4530 rangelen
= (end
-start
)+1;
4532 /* Return the result */
4533 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4534 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4535 addReplySds(c
,range
);
4536 addReply(c
,shared
.crlf
);
4540 /* ========================= Type agnostic commands ========================= */
4542 static void delCommand(redisClient
*c
) {
4545 for (j
= 1; j
< c
->argc
; j
++) {
4546 if (deleteKey(c
->db
,c
->argv
[j
])) {
4547 touchWatchedKey(c
->db
,c
->argv
[j
]);
4552 addReplyLongLong(c
,deleted
);
4555 static void existsCommand(redisClient
*c
) {
4556 expireIfNeeded(c
->db
,c
->argv
[1]);
4557 if (dictFind(c
->db
->dict
,c
->argv
[1])) {
4558 addReply(c
, shared
.cone
);
4560 addReply(c
, shared
.czero
);
4564 static void selectCommand(redisClient
*c
) {
4565 int id
= atoi(c
->argv
[1]->ptr
);
4567 if (selectDb(c
,id
) == REDIS_ERR
) {
4568 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4570 addReply(c
,shared
.ok
);
4574 static void randomkeyCommand(redisClient
*c
) {
4579 de
= dictGetRandomKey(c
->db
->dict
);
4580 if (!de
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break;
4584 addReply(c
,shared
.nullbulk
);
4588 key
= dictGetEntryKey(de
);
4589 if (server
.vm_enabled
) {
4590 key
= dupStringObject(key
);
4591 addReplyBulk(c
,key
);
4594 addReplyBulk(c
,key
);
4598 static void keysCommand(redisClient
*c
) {
4601 sds pattern
= c
->argv
[1]->ptr
;
4602 int plen
= sdslen(pattern
);
4603 unsigned long numkeys
= 0;
4604 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4606 di
= dictGetIterator(c
->db
->dict
);
4608 decrRefCount(lenobj
);
4609 while((de
= dictNext(di
)) != NULL
) {
4610 robj
*keyobj
= dictGetEntryKey(de
);
4612 sds key
= keyobj
->ptr
;
4613 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4614 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4615 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4616 addReplyBulk(c
,keyobj
);
4621 dictReleaseIterator(di
);
4622 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4625 static void dbsizeCommand(redisClient
*c
) {
4627 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4630 static void lastsaveCommand(redisClient
*c
) {
4632 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4635 static void typeCommand(redisClient
*c
) {
4639 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4644 case REDIS_STRING
: type
= "+string"; break;
4645 case REDIS_LIST
: type
= "+list"; break;
4646 case REDIS_SET
: type
= "+set"; break;
4647 case REDIS_ZSET
: type
= "+zset"; break;
4648 case REDIS_HASH
: type
= "+hash"; break;
4649 default: type
= "+unknown"; break;
4652 addReplySds(c
,sdsnew(type
));
4653 addReply(c
,shared
.crlf
);
4656 static void saveCommand(redisClient
*c
) {
4657 if (server
.bgsavechildpid
!= -1) {
4658 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4661 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4662 addReply(c
,shared
.ok
);
4664 addReply(c
,shared
.err
);
4668 static void bgsaveCommand(redisClient
*c
) {
4669 if (server
.bgsavechildpid
!= -1) {
4670 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4673 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4674 char *status
= "+Background saving started\r\n";
4675 addReplySds(c
,sdsnew(status
));
4677 addReply(c
,shared
.err
);
4681 static void shutdownCommand(redisClient
*c
) {
4682 if (prepareForShutdown() == REDIS_OK
)
4684 addReplySds(c
, sdsnew("-ERR Errors trying to SHUTDOWN. Check logs.\r\n"));
4687 static void renameGenericCommand(redisClient
*c
, int nx
) {
4690 /* To use the same key as src and dst is probably an error */
4691 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4692 addReply(c
,shared
.sameobjecterr
);
4696 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4700 deleteIfVolatile(c
->db
,c
->argv
[2]);
4701 if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) {
4704 addReply(c
,shared
.czero
);
4707 dictReplace(c
->db
->dict
,c
->argv
[2],o
);
4709 incrRefCount(c
->argv
[2]);
4711 deleteKey(c
->db
,c
->argv
[1]);
4712 touchWatchedKey(c
->db
,c
->argv
[2]);
4714 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4717 static void renameCommand(redisClient
*c
) {
4718 renameGenericCommand(c
,0);
4721 static void renamenxCommand(redisClient
*c
) {
4722 renameGenericCommand(c
,1);
4725 static void moveCommand(redisClient
*c
) {
4730 /* Obtain source and target DB pointers */
4733 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4734 addReply(c
,shared
.outofrangeerr
);
4738 selectDb(c
,srcid
); /* Back to the source DB */
4740 /* If the user is moving using as target the same
4741 * DB as the source DB it is probably an error. */
4743 addReply(c
,shared
.sameobjecterr
);
4747 /* Check if the element exists and get a reference */
4748 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4750 addReply(c
,shared
.czero
);
4754 /* Try to add the element to the target DB */
4755 deleteIfVolatile(dst
,c
->argv
[1]);
4756 if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) {
4757 addReply(c
,shared
.czero
);
4760 incrRefCount(c
->argv
[1]);
4763 /* OK! key moved, free the entry in the source DB */
4764 deleteKey(src
,c
->argv
[1]);
4766 addReply(c
,shared
.cone
);
4769 /* =================================== Lists ================================ */
4770 static void pushGenericCommand(redisClient
*c
, int where
) {
4774 lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4776 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4777 addReply(c
,shared
.cone
);
4780 lobj
= createListObject();
4782 if (where
== REDIS_HEAD
) {
4783 listAddNodeHead(list
,c
->argv
[2]);
4785 listAddNodeTail(list
,c
->argv
[2]);
4787 dictAdd(c
->db
->dict
,c
->argv
[1],lobj
);
4788 incrRefCount(c
->argv
[1]);
4789 incrRefCount(c
->argv
[2]);
4791 if (lobj
->type
!= REDIS_LIST
) {
4792 addReply(c
,shared
.wrongtypeerr
);
4795 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4796 addReply(c
,shared
.cone
);
4800 if (where
== REDIS_HEAD
) {
4801 listAddNodeHead(list
,c
->argv
[2]);
4803 listAddNodeTail(list
,c
->argv
[2]);
4805 incrRefCount(c
->argv
[2]);
4808 addReplyLongLong(c
,listLength(list
));
4811 static void lpushCommand(redisClient
*c
) {
4812 pushGenericCommand(c
,REDIS_HEAD
);
4815 static void rpushCommand(redisClient
*c
) {
4816 pushGenericCommand(c
,REDIS_TAIL
);
4819 static void llenCommand(redisClient
*c
) {
4823 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4824 checkType(c
,o
,REDIS_LIST
)) return;
4827 addReplyUlong(c
,listLength(l
));
4830 static void lindexCommand(redisClient
*c
) {
4832 int index
= atoi(c
->argv
[2]->ptr
);
4836 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4837 checkType(c
,o
,REDIS_LIST
)) return;
4840 ln
= listIndex(list
, index
);
4842 addReply(c
,shared
.nullbulk
);
4844 robj
*ele
= listNodeValue(ln
);
4845 addReplyBulk(c
,ele
);
4849 static void lsetCommand(redisClient
*c
) {
4851 int index
= atoi(c
->argv
[2]->ptr
);
4855 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
||
4856 checkType(c
,o
,REDIS_LIST
)) return;
4859 ln
= listIndex(list
, index
);
4861 addReply(c
,shared
.outofrangeerr
);
4863 robj
*ele
= listNodeValue(ln
);
4866 listNodeValue(ln
) = c
->argv
[3];
4867 incrRefCount(c
->argv
[3]);
4868 addReply(c
,shared
.ok
);
4873 static void popGenericCommand(redisClient
*c
, int where
) {
4878 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4879 checkType(c
,o
,REDIS_LIST
)) return;
4882 if (where
== REDIS_HEAD
)
4883 ln
= listFirst(list
);
4885 ln
= listLast(list
);
4888 addReply(c
,shared
.nullbulk
);
4890 robj
*ele
= listNodeValue(ln
);
4891 addReplyBulk(c
,ele
);
4892 listDelNode(list
,ln
);
4893 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4898 static void lpopCommand(redisClient
*c
) {
4899 popGenericCommand(c
,REDIS_HEAD
);
4902 static void rpopCommand(redisClient
*c
) {
4903 popGenericCommand(c
,REDIS_TAIL
);
4906 static void lrangeCommand(redisClient
*c
) {
4908 int start
= atoi(c
->argv
[2]->ptr
);
4909 int end
= atoi(c
->argv
[3]->ptr
);
4916 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
4917 || checkType(c
,o
,REDIS_LIST
)) return;
4919 llen
= listLength(list
);
4921 /* convert negative indexes */
4922 if (start
< 0) start
= llen
+start
;
4923 if (end
< 0) end
= llen
+end
;
4924 if (start
< 0) start
= 0;
4925 if (end
< 0) end
= 0;
4927 /* indexes sanity checks */
4928 if (start
> end
|| start
>= llen
) {
4929 /* Out of range start or start > end result in empty list */
4930 addReply(c
,shared
.emptymultibulk
);
4933 if (end
>= llen
) end
= llen
-1;
4934 rangelen
= (end
-start
)+1;
4936 /* Return the result in form of a multi-bulk reply */
4937 ln
= listIndex(list
, start
);
4938 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
4939 for (j
= 0; j
< rangelen
; j
++) {
4940 ele
= listNodeValue(ln
);
4941 addReplyBulk(c
,ele
);
4946 static void ltrimCommand(redisClient
*c
) {
4948 int start
= atoi(c
->argv
[2]->ptr
);
4949 int end
= atoi(c
->argv
[3]->ptr
);
4951 int j
, ltrim
, rtrim
;
4955 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
4956 checkType(c
,o
,REDIS_LIST
)) return;
4958 llen
= listLength(list
);
4960 /* convert negative indexes */
4961 if (start
< 0) start
= llen
+start
;
4962 if (end
< 0) end
= llen
+end
;
4963 if (start
< 0) start
= 0;
4964 if (end
< 0) end
= 0;
4966 /* indexes sanity checks */
4967 if (start
> end
|| start
>= llen
) {
4968 /* Out of range start or start > end result in empty list */
4972 if (end
>= llen
) end
= llen
-1;
4977 /* Remove list elements to perform the trim */
4978 for (j
= 0; j
< ltrim
; j
++) {
4979 ln
= listFirst(list
);
4980 listDelNode(list
,ln
);
4982 for (j
= 0; j
< rtrim
; j
++) {
4983 ln
= listLast(list
);
4984 listDelNode(list
,ln
);
4986 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4988 addReply(c
,shared
.ok
);
4991 static void lremCommand(redisClient
*c
) {
4994 listNode
*ln
, *next
;
4995 int toremove
= atoi(c
->argv
[2]->ptr
);
4999 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5000 checkType(c
,o
,REDIS_LIST
)) return;
5004 toremove
= -toremove
;
5007 ln
= fromtail
? list
->tail
: list
->head
;
5009 robj
*ele
= listNodeValue(ln
);
5011 next
= fromtail
? ln
->prev
: ln
->next
;
5012 if (equalStringObjects(ele
,c
->argv
[3])) {
5013 listDelNode(list
,ln
);
5016 if (toremove
&& removed
== toremove
) break;
5020 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5021 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
5024 /* This is the semantic of this command:
5025 * RPOPLPUSH srclist dstlist:
5026 * IF LLEN(srclist) > 0
5027 * element = RPOP srclist
5028 * LPUSH dstlist element
5035 * The idea is to be able to get an element from a list in a reliable way
5036 * since the element is not just returned but pushed against another list
5037 * as well. This command was originally proposed by Ezra Zygmuntowicz.
5039 static void rpoplpushcommand(redisClient
*c
) {
5044 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5045 checkType(c
,sobj
,REDIS_LIST
)) return;
5046 srclist
= sobj
->ptr
;
5047 ln
= listLast(srclist
);
5050 addReply(c
,shared
.nullbulk
);
5052 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
5053 robj
*ele
= listNodeValue(ln
);
5056 if (dobj
&& dobj
->type
!= REDIS_LIST
) {
5057 addReply(c
,shared
.wrongtypeerr
);
5061 /* Add the element to the target list (unless it's directly
5062 * passed to some BLPOP-ing client */
5063 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) {
5065 /* Create the list if the key does not exist */
5066 dobj
= createListObject();
5067 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
);
5068 incrRefCount(c
->argv
[2]);
5070 dstlist
= dobj
->ptr
;
5071 listAddNodeHead(dstlist
,ele
);
5075 /* Send the element to the client as reply as well */
5076 addReplyBulk(c
,ele
);
5078 /* Finally remove the element from the source list */
5079 listDelNode(srclist
,ln
);
5080 if (listLength(srclist
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5085 /* ==================================== Sets ================================ */
5087 static void saddCommand(redisClient
*c
) {
5090 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5092 set
= createSetObject();
5093 dictAdd(c
->db
->dict
,c
->argv
[1],set
);
5094 incrRefCount(c
->argv
[1]);
5096 if (set
->type
!= REDIS_SET
) {
5097 addReply(c
,shared
.wrongtypeerr
);
5101 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
5102 incrRefCount(c
->argv
[2]);
5104 addReply(c
,shared
.cone
);
5106 addReply(c
,shared
.czero
);
5110 static void sremCommand(redisClient
*c
) {
5113 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5114 checkType(c
,set
,REDIS_SET
)) return;
5116 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
5118 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
5119 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5120 addReply(c
,shared
.cone
);
5122 addReply(c
,shared
.czero
);
5126 static void smoveCommand(redisClient
*c
) {
5127 robj
*srcset
, *dstset
;
5129 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5130 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
5132 /* If the source key does not exist return 0, if it's of the wrong type
5134 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
5135 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
5138 /* Error if the destination key is not a set as well */
5139 if (dstset
&& dstset
->type
!= REDIS_SET
) {
5140 addReply(c
,shared
.wrongtypeerr
);
5143 /* Remove the element from the source set */
5144 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
5145 /* Key not found in the src set! return zero */
5146 addReply(c
,shared
.czero
);
5149 if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset
!= dstset
)
5150 deleteKey(c
->db
,c
->argv
[1]);
5152 /* Add the element to the destination set */
5154 dstset
= createSetObject();
5155 dictAdd(c
->db
->dict
,c
->argv
[2],dstset
);
5156 incrRefCount(c
->argv
[2]);
5158 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
5159 incrRefCount(c
->argv
[3]);
5160 addReply(c
,shared
.cone
);
5163 static void sismemberCommand(redisClient
*c
) {
5166 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5167 checkType(c
,set
,REDIS_SET
)) return;
5169 if (dictFind(set
->ptr
,c
->argv
[2]))
5170 addReply(c
,shared
.cone
);
5172 addReply(c
,shared
.czero
);
5175 static void scardCommand(redisClient
*c
) {
5179 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5180 checkType(c
,o
,REDIS_SET
)) return;
5183 addReplyUlong(c
,dictSize(s
));
5186 static void spopCommand(redisClient
*c
) {
5190 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5191 checkType(c
,set
,REDIS_SET
)) return;
5193 de
= dictGetRandomKey(set
->ptr
);
5195 addReply(c
,shared
.nullbulk
);
5197 robj
*ele
= dictGetEntryKey(de
);
5199 addReplyBulk(c
,ele
);
5200 dictDelete(set
->ptr
,ele
);
5201 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
5202 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5207 static void srandmemberCommand(redisClient
*c
) {
5211 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5212 checkType(c
,set
,REDIS_SET
)) return;
5214 de
= dictGetRandomKey(set
->ptr
);
5216 addReply(c
,shared
.nullbulk
);
5218 robj
*ele
= dictGetEntryKey(de
);
5220 addReplyBulk(c
,ele
);
5224 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
5225 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
5227 return dictSize(*d1
)-dictSize(*d2
);
5230 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
5231 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
5234 robj
*lenobj
= NULL
, *dstset
= NULL
;
5235 unsigned long j
, cardinality
= 0;
5237 for (j
= 0; j
< setsnum
; j
++) {
5241 lookupKeyWrite(c
->db
,setskeys
[j
]) :
5242 lookupKeyRead(c
->db
,setskeys
[j
]);
5246 if (deleteKey(c
->db
,dstkey
))
5248 addReply(c
,shared
.czero
);
5250 addReply(c
,shared
.emptymultibulk
);
5254 if (setobj
->type
!= REDIS_SET
) {
5256 addReply(c
,shared
.wrongtypeerr
);
5259 dv
[j
] = setobj
->ptr
;
5261 /* Sort sets from the smallest to largest, this will improve our
5262 * algorithm's performace */
5263 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
5265 /* The first thing we should output is the total number of elements...
5266 * since this is a multi-bulk write, but at this stage we don't know
5267 * the intersection set size, so we use a trick, append an empty object
5268 * to the output list and save the pointer to later modify it with the
5271 lenobj
= createObject(REDIS_STRING
,NULL
);
5273 decrRefCount(lenobj
);
5275 /* If we have a target key where to store the resulting set
5276 * create this key with an empty set inside */
5277 dstset
= createSetObject();
5280 /* Iterate all the elements of the first (smallest) set, and test
5281 * the element against all the other sets, if at least one set does
5282 * not include the element it is discarded */
5283 di
= dictGetIterator(dv
[0]);
5285 while((de
= dictNext(di
)) != NULL
) {
5288 for (j
= 1; j
< setsnum
; j
++)
5289 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
5291 continue; /* at least one set does not contain the member */
5292 ele
= dictGetEntryKey(de
);
5294 addReplyBulk(c
,ele
);
5297 dictAdd(dstset
->ptr
,ele
,NULL
);
5301 dictReleaseIterator(di
);
5304 /* Store the resulting set into the target, if the intersection
5305 * is not an empty set. */
5306 deleteKey(c
->db
,dstkey
);
5307 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5308 dictAdd(c
->db
->dict
,dstkey
,dstset
);
5309 incrRefCount(dstkey
);
5310 addReplyLongLong(c
,dictSize((dict
*)dstset
->ptr
));
5312 decrRefCount(dstset
);
5313 addReply(c
,shared
.czero
);
5317 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
5322 static void sinterCommand(redisClient
*c
) {
5323 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
5326 static void sinterstoreCommand(redisClient
*c
) {
5327 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
5330 #define REDIS_OP_UNION 0
5331 #define REDIS_OP_DIFF 1
5332 #define REDIS_OP_INTER 2
5334 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
5335 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
5338 robj
*dstset
= NULL
;
5339 int j
, cardinality
= 0;
5341 for (j
= 0; j
< setsnum
; j
++) {
5345 lookupKeyWrite(c
->db
,setskeys
[j
]) :
5346 lookupKeyRead(c
->db
,setskeys
[j
]);
5351 if (setobj
->type
!= REDIS_SET
) {
5353 addReply(c
,shared
.wrongtypeerr
);
5356 dv
[j
] = setobj
->ptr
;
5359 /* We need a temp set object to store our union. If the dstkey
5360 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
5361 * this set object will be the resulting object to set into the target key*/
5362 dstset
= createSetObject();
5364 /* Iterate all the elements of all the sets, add every element a single
5365 * time to the result set */
5366 for (j
= 0; j
< setsnum
; j
++) {
5367 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
5368 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
5370 di
= dictGetIterator(dv
[j
]);
5372 while((de
= dictNext(di
)) != NULL
) {
5375 /* dictAdd will not add the same element multiple times */
5376 ele
= dictGetEntryKey(de
);
5377 if (op
== REDIS_OP_UNION
|| j
== 0) {
5378 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
5382 } else if (op
== REDIS_OP_DIFF
) {
5383 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
5388 dictReleaseIterator(di
);
5390 /* result set is empty? Exit asap. */
5391 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
5394 /* Output the content of the resulting set, if not in STORE mode */
5396 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
5397 di
= dictGetIterator(dstset
->ptr
);
5398 while((de
= dictNext(di
)) != NULL
) {
5401 ele
= dictGetEntryKey(de
);
5402 addReplyBulk(c
,ele
);
5404 dictReleaseIterator(di
);
5405 decrRefCount(dstset
);
5407 /* If we have a target key where to store the resulting set
5408 * create this key with the result set inside */
5409 deleteKey(c
->db
,dstkey
);
5410 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5411 dictAdd(c
->db
->dict
,dstkey
,dstset
);
5412 incrRefCount(dstkey
);
5413 addReplyLongLong(c
,dictSize((dict
*)dstset
->ptr
));
5415 decrRefCount(dstset
);
5416 addReply(c
,shared
.czero
);
5423 static void sunionCommand(redisClient
*c
) {
5424 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
5427 static void sunionstoreCommand(redisClient
*c
) {
5428 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
5431 static void sdiffCommand(redisClient
*c
) {
5432 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
5435 static void sdiffstoreCommand(redisClient
*c
) {
5436 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
5439 /* ==================================== ZSets =============================== */
5441 /* ZSETs are ordered sets using two data structures to hold the same elements
5442 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
5445 * The elements are added to an hash table mapping Redis objects to scores.
5446 * At the same time the elements are added to a skip list mapping scores
5447 * to Redis objects (so objects are sorted by scores in this "view"). */
5449 /* This skiplist implementation is almost a C translation of the original
5450 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
5451 * Alternative to Balanced Trees", modified in three ways:
5452 * a) this implementation allows for repeated values.
5453 * b) the comparison is not just by key (our 'score') but by satellite data.
5454 * c) there is a back pointer, so it's a doubly linked list with the back
5455 * pointers being only at "level 1". This allows to traverse the list
5456 * from tail to head, useful for ZREVRANGE. */
5458 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
5459 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
5461 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
5463 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
5471 static zskiplist
*zslCreate(void) {
5475 zsl
= zmalloc(sizeof(*zsl
));
5478 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
5479 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
5480 zsl
->header
->forward
[j
] = NULL
;
5482 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
5483 if (j
< ZSKIPLIST_MAXLEVEL
-1)
5484 zsl
->header
->span
[j
] = 0;
5486 zsl
->header
->backward
= NULL
;
5491 static void zslFreeNode(zskiplistNode
*node
) {
5492 decrRefCount(node
->obj
);
5493 zfree(node
->forward
);
5498 static void zslFree(zskiplist
*zsl
) {
5499 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
5501 zfree(zsl
->header
->forward
);
5502 zfree(zsl
->header
->span
);
5505 next
= node
->forward
[0];
5512 static int zslRandomLevel(void) {
5514 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
5516 return (level
<ZSKIPLIST_MAXLEVEL
) ? level
: ZSKIPLIST_MAXLEVEL
;
5519 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
5520 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5521 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
5525 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5526 /* store rank that is crossed to reach the insert position */
5527 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
5529 while (x
->forward
[i
] &&
5530 (x
->forward
[i
]->score
< score
||
5531 (x
->forward
[i
]->score
== score
&&
5532 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
5533 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
5538 /* we assume the key is not already inside, since we allow duplicated
5539 * scores, and the re-insertion of score and redis object should never
5540 * happpen since the caller of zslInsert() should test in the hash table
5541 * if the element is already inside or not. */
5542 level
= zslRandomLevel();
5543 if (level
> zsl
->level
) {
5544 for (i
= zsl
->level
; i
< level
; i
++) {
5546 update
[i
] = zsl
->header
;
5547 update
[i
]->span
[i
-1] = zsl
->length
;
5551 x
= zslCreateNode(level
,score
,obj
);
5552 for (i
= 0; i
< level
; i
++) {
5553 x
->forward
[i
] = update
[i
]->forward
[i
];
5554 update
[i
]->forward
[i
] = x
;
5556 /* update span covered by update[i] as x is inserted here */
5558 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5559 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5563 /* increment span for untouched levels */
5564 for (i
= level
; i
< zsl
->level
; i
++) {
5565 update
[i
]->span
[i
-1]++;
5568 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
5570 x
->forward
[0]->backward
= x
;
5576 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
5577 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
5579 for (i
= 0; i
< zsl
->level
; i
++) {
5580 if (update
[i
]->forward
[i
] == x
) {
5582 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5584 update
[i
]->forward
[i
] = x
->forward
[i
];
5586 /* invariant: i > 0, because update[0]->forward[0]
5587 * is always equal to x */
5588 update
[i
]->span
[i
-1] -= 1;
5591 if (x
->forward
[0]) {
5592 x
->forward
[0]->backward
= x
->backward
;
5594 zsl
->tail
= x
->backward
;
5596 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5601 /* Delete an element with matching score/object from the skiplist. */
5602 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
5603 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5607 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5608 while (x
->forward
[i
] &&
5609 (x
->forward
[i
]->score
< score
||
5610 (x
->forward
[i
]->score
== score
&&
5611 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
5615 /* We may have multiple elements with the same score, what we need
5616 * is to find the element with both the right score and object. */
5618 if (x
&& score
== x
->score
&& equalStringObjects(x
->obj
,obj
)) {
5619 zslDeleteNode(zsl
, x
, update
);
5623 return 0; /* not found */
5625 return 0; /* not found */
5628 /* Delete all the elements with score between min and max from the skiplist.
5629 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5630 * Note that this function takes the reference to the hash table view of the
5631 * sorted set, in order to remove the elements from the hash table too. */
5632 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
5633 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5634 unsigned long removed
= 0;
5638 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5639 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
5643 /* We may have multiple elements with the same score, what we need
5644 * is to find the element with both the right score and object. */
5646 while (x
&& x
->score
<= max
) {
5647 zskiplistNode
*next
= x
->forward
[0];
5648 zslDeleteNode(zsl
, x
, update
);
5649 dictDelete(dict
,x
->obj
);
5654 return removed
; /* not found */
5657 /* Delete all the elements with rank between start and end from the skiplist.
5658 * Start and end are inclusive. Note that start and end need to be 1-based */
5659 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
5660 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5661 unsigned long traversed
= 0, removed
= 0;
5665 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5666 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
5667 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5675 while (x
&& traversed
<= end
) {
5676 zskiplistNode
*next
= x
->forward
[0];
5677 zslDeleteNode(zsl
, x
, update
);
5678 dictDelete(dict
,x
->obj
);
5687 /* Find the first node having a score equal or greater than the specified one.
5688 * Returns NULL if there is no match. */
5689 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
5694 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5695 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
5698 /* We may have multiple elements with the same score, what we need
5699 * is to find the element with both the right score and object. */
5700 return x
->forward
[0];
5703 /* Find the rank for an element by both score and key.
5704 * Returns 0 when the element cannot be found, rank otherwise.
5705 * Note that the rank is 1-based due to the span of zsl->header to the
5707 static unsigned long zslGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
5709 unsigned long rank
= 0;
5713 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5714 while (x
->forward
[i
] &&
5715 (x
->forward
[i
]->score
< score
||
5716 (x
->forward
[i
]->score
== score
&&
5717 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
5718 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
5722 /* x might be equal to zsl->header, so test if obj is non-NULL */
5723 if (x
->obj
&& equalStringObjects(x
->obj
,o
)) {
5730 /* Finds an element by its rank. The rank argument needs to be 1-based. */
5731 zskiplistNode
* zslGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
5733 unsigned long traversed
= 0;
5737 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5738 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
5740 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5743 if (traversed
== rank
) {
5750 /* The actual Z-commands implementations */
5752 /* This generic command implements both ZADD and ZINCRBY.
5753 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
5754 * the increment if the operation is a ZINCRBY (doincrement == 1). */
5755 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
5760 if (isnan(scoreval
)) {
5761 addReplySds(c
,sdsnew("-ERR provide score is Not A Number (nan)\r\n"));
5765 zsetobj
= lookupKeyWrite(c
->db
,key
);
5766 if (zsetobj
== NULL
) {
5767 zsetobj
= createZsetObject();
5768 dictAdd(c
->db
->dict
,key
,zsetobj
);
5771 if (zsetobj
->type
!= REDIS_ZSET
) {
5772 addReply(c
,shared
.wrongtypeerr
);
5778 /* Ok now since we implement both ZADD and ZINCRBY here the code
5779 * needs to handle the two different conditions. It's all about setting
5780 * '*score', that is, the new score to set, to the right value. */
5781 score
= zmalloc(sizeof(double));
5785 /* Read the old score. If the element was not present starts from 0 */
5786 de
= dictFind(zs
->dict
,ele
);
5788 double *oldscore
= dictGetEntryVal(de
);
5789 *score
= *oldscore
+ scoreval
;
5793 if (isnan(*score
)) {
5795 sdsnew("-ERR resulting score is Not A Number (nan)\r\n"));
5797 /* Note that we don't need to check if the zset may be empty and
5798 * should be removed here, as we can only obtain Nan as score if
5799 * there was already an element in the sorted set. */
5806 /* What follows is a simple remove and re-insert operation that is common
5807 * to both ZADD and ZINCRBY... */
5808 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
5809 /* case 1: New element */
5810 incrRefCount(ele
); /* added to hash */
5811 zslInsert(zs
->zsl
,*score
,ele
);
5812 incrRefCount(ele
); /* added to skiplist */
5815 addReplyDouble(c
,*score
);
5817 addReply(c
,shared
.cone
);
5822 /* case 2: Score update operation */
5823 de
= dictFind(zs
->dict
,ele
);
5824 redisAssert(de
!= NULL
);
5825 oldscore
= dictGetEntryVal(de
);
5826 if (*score
!= *oldscore
) {
5829 /* Remove and insert the element in the skip list with new score */
5830 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
5831 redisAssert(deleted
!= 0);
5832 zslInsert(zs
->zsl
,*score
,ele
);
5834 /* Update the score in the hash table */
5835 dictReplace(zs
->dict
,ele
,score
);
5841 addReplyDouble(c
,*score
);
5843 addReply(c
,shared
.czero
);
5847 static void zaddCommand(redisClient
*c
) {
5850 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
5851 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
5854 static void zincrbyCommand(redisClient
*c
) {
5857 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
5858 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
5861 static void zremCommand(redisClient
*c
) {
5868 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5869 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5872 de
= dictFind(zs
->dict
,c
->argv
[2]);
5874 addReply(c
,shared
.czero
);
5877 /* Delete from the skiplist */
5878 oldscore
= dictGetEntryVal(de
);
5879 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
5880 redisAssert(deleted
!= 0);
5882 /* Delete from the hash table */
5883 dictDelete(zs
->dict
,c
->argv
[2]);
5884 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5885 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5887 addReply(c
,shared
.cone
);
5890 static void zremrangebyscoreCommand(redisClient
*c
) {
5897 if ((getDoubleFromObjectOrReply(c
, c
->argv
[2], &min
, NULL
) != REDIS_OK
) ||
5898 (getDoubleFromObjectOrReply(c
, c
->argv
[3], &max
, NULL
) != REDIS_OK
)) return;
5900 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5901 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5904 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
5905 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5906 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5907 server
.dirty
+= deleted
;
5908 addReplyLongLong(c
,deleted
);
5911 static void zremrangebyrankCommand(redisClient
*c
) {
5919 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
5920 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
5922 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5923 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5925 llen
= zs
->zsl
->length
;
5927 /* convert negative indexes */
5928 if (start
< 0) start
= llen
+start
;
5929 if (end
< 0) end
= llen
+end
;
5930 if (start
< 0) start
= 0;
5931 if (end
< 0) end
= 0;
5933 /* indexes sanity checks */
5934 if (start
> end
|| start
>= llen
) {
5935 addReply(c
,shared
.czero
);
5938 if (end
>= llen
) end
= llen
-1;
5940 /* increment start and end because zsl*Rank functions
5941 * use 1-based rank */
5942 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
5943 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5944 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5945 server
.dirty
+= deleted
;
5946 addReplyLongLong(c
, deleted
);
5954 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
5955 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
5956 unsigned long size1
, size2
;
5957 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
5958 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
5959 return size1
- size2
;
5962 #define REDIS_AGGR_SUM 1
5963 #define REDIS_AGGR_MIN 2
5964 #define REDIS_AGGR_MAX 3
5965 #define zunionInterDictValue(_e) (dictGetEntryVal(_e) == NULL ? 1.0 : *(double*)dictGetEntryVal(_e))
5967 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
5968 if (aggregate
== REDIS_AGGR_SUM
) {
5969 *target
= *target
+ val
;
5970 } else if (aggregate
== REDIS_AGGR_MIN
) {
5971 *target
= val
< *target
? val
: *target
;
5972 } else if (aggregate
== REDIS_AGGR_MAX
) {
5973 *target
= val
> *target
? val
: *target
;
5976 redisPanic("Unknown ZUNION/INTER aggregate type");
5980 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
5982 int aggregate
= REDIS_AGGR_SUM
;
5989 /* expect setnum input keys to be given */
5990 setnum
= atoi(c
->argv
[2]->ptr
);
5992 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNIONSTORE/ZINTERSTORE\r\n"));
5996 /* test if the expected number of keys would overflow */
5997 if (3+setnum
> c
->argc
) {
5998 addReply(c
,shared
.syntaxerr
);
6002 /* read keys to be used for input */
6003 src
= zmalloc(sizeof(zsetopsrc
) * setnum
);
6004 for (i
= 0, j
= 3; i
< setnum
; i
++, j
++) {
6005 robj
*obj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
6009 if (obj
->type
== REDIS_ZSET
) {
6010 src
[i
].dict
= ((zset
*)obj
->ptr
)->dict
;
6011 } else if (obj
->type
== REDIS_SET
) {
6012 src
[i
].dict
= (obj
->ptr
);
6015 addReply(c
,shared
.wrongtypeerr
);
6020 /* default all weights to 1 */
6021 src
[i
].weight
= 1.0;
6024 /* parse optional extra arguments */
6026 int remaining
= c
->argc
- j
;
6029 if (remaining
>= (setnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
6031 for (i
= 0; i
< setnum
; i
++, j
++, remaining
--) {
6032 if (getDoubleFromObjectOrReply(c
, c
->argv
[j
], &src
[i
].weight
, NULL
) != REDIS_OK
)
6035 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
6037 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
6038 aggregate
= REDIS_AGGR_SUM
;
6039 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
6040 aggregate
= REDIS_AGGR_MIN
;
6041 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
6042 aggregate
= REDIS_AGGR_MAX
;
6045 addReply(c
,shared
.syntaxerr
);
6051 addReply(c
,shared
.syntaxerr
);
6057 /* sort sets from the smallest to largest, this will improve our
6058 * algorithm's performance */
6059 qsort(src
,setnum
,sizeof(zsetopsrc
),qsortCompareZsetopsrcByCardinality
);
6061 dstobj
= createZsetObject();
6062 dstzset
= dstobj
->ptr
;
6064 if (op
== REDIS_OP_INTER
) {
6065 /* skip going over all entries if the smallest zset is NULL or empty */
6066 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
6067 /* precondition: as src[0].dict is non-empty and the zsets are ordered
6068 * from small to large, all src[i > 0].dict are non-empty too */
6069 di
= dictGetIterator(src
[0].dict
);
6070 while((de
= dictNext(di
)) != NULL
) {
6071 double *score
= zmalloc(sizeof(double)), value
;
6072 *score
= src
[0].weight
* zunionInterDictValue(de
);
6074 for (j
= 1; j
< setnum
; j
++) {
6075 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
6077 value
= src
[j
].weight
* zunionInterDictValue(other
);
6078 zunionInterAggregate(score
, value
, aggregate
);
6084 /* skip entry when not present in every source dict */
6088 robj
*o
= dictGetEntryKey(de
);
6089 dictAdd(dstzset
->dict
,o
,score
);
6090 incrRefCount(o
); /* added to dictionary */
6091 zslInsert(dstzset
->zsl
,*score
,o
);
6092 incrRefCount(o
); /* added to skiplist */
6095 dictReleaseIterator(di
);
6097 } else if (op
== REDIS_OP_UNION
) {
6098 for (i
= 0; i
< setnum
; i
++) {
6099 if (!src
[i
].dict
) continue;
6101 di
= dictGetIterator(src
[i
].dict
);
6102 while((de
= dictNext(di
)) != NULL
) {
6103 /* skip key when already processed */
6104 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
6106 double *score
= zmalloc(sizeof(double)), value
;
6107 *score
= src
[i
].weight
* zunionInterDictValue(de
);
6109 /* because the zsets are sorted by size, its only possible
6110 * for sets at larger indices to hold this entry */
6111 for (j
= (i
+1); j
< setnum
; j
++) {
6112 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
6114 value
= src
[j
].weight
* zunionInterDictValue(other
);
6115 zunionInterAggregate(score
, value
, aggregate
);
6119 robj
*o
= dictGetEntryKey(de
);
6120 dictAdd(dstzset
->dict
,o
,score
);
6121 incrRefCount(o
); /* added to dictionary */
6122 zslInsert(dstzset
->zsl
,*score
,o
);
6123 incrRefCount(o
); /* added to skiplist */
6125 dictReleaseIterator(di
);
6128 /* unknown operator */
6129 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
6132 deleteKey(c
->db
,dstkey
);
6133 if (dstzset
->zsl
->length
) {
6134 dictAdd(c
->db
->dict
,dstkey
,dstobj
);
6135 incrRefCount(dstkey
);
6136 addReplyLongLong(c
, dstzset
->zsl
->length
);
6139 decrRefCount(dstobj
);
6140 addReply(c
, shared
.czero
);
6145 static void zunionstoreCommand(redisClient
*c
) {
6146 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
6149 static void zinterstoreCommand(redisClient
*c
) {
6150 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
6153 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
6165 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
6166 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
6168 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
6170 } else if (c
->argc
>= 5) {
6171 addReply(c
,shared
.syntaxerr
);
6175 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6176 || checkType(c
,o
,REDIS_ZSET
)) return;
6181 /* convert negative indexes */
6182 if (start
< 0) start
= llen
+start
;
6183 if (end
< 0) end
= llen
+end
;
6184 if (start
< 0) start
= 0;
6185 if (end
< 0) end
= 0;
6187 /* indexes sanity checks */
6188 if (start
> end
|| start
>= llen
) {
6189 /* Out of range start or start > end result in empty list */
6190 addReply(c
,shared
.emptymultibulk
);
6193 if (end
>= llen
) end
= llen
-1;
6194 rangelen
= (end
-start
)+1;
6196 /* check if starting point is trivial, before searching
6197 * the element in log(N) time */
6199 ln
= start
== 0 ? zsl
->tail
: zslGetElementByRank(zsl
, llen
-start
);
6202 zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1);
6205 /* Return the result in form of a multi-bulk reply */
6206 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
6207 withscores
? (rangelen
*2) : rangelen
));
6208 for (j
= 0; j
< rangelen
; j
++) {
6210 addReplyBulk(c
,ele
);
6212 addReplyDouble(c
,ln
->score
);
6213 ln
= reverse
? ln
->backward
: ln
->forward
[0];
6217 static void zrangeCommand(redisClient
*c
) {
6218 zrangeGenericCommand(c
,0);
6221 static void zrevrangeCommand(redisClient
*c
) {
6222 zrangeGenericCommand(c
,1);
6225 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
6226 * If justcount is non-zero, just the count is returned. */
6227 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
6230 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
6231 int offset
= 0, limit
= -1;
6235 /* Parse the min-max interval. If one of the values is prefixed
6236 * by the "(" character, it's considered "open". For instance
6237 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
6238 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
6239 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
6240 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
6243 min
= strtod(c
->argv
[2]->ptr
,NULL
);
6245 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
6246 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
6249 max
= strtod(c
->argv
[3]->ptr
,NULL
);
6252 /* Parse "WITHSCORES": note that if the command was called with
6253 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
6254 * enter the following paths to parse WITHSCORES and LIMIT. */
6255 if (c
->argc
== 5 || c
->argc
== 8) {
6256 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
6261 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
6265 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
6270 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
6271 addReply(c
,shared
.syntaxerr
);
6273 } else if (c
->argc
== (7 + withscores
)) {
6274 offset
= atoi(c
->argv
[5]->ptr
);
6275 limit
= atoi(c
->argv
[6]->ptr
);
6276 if (offset
< 0) offset
= 0;
6279 /* Ok, lookup the key and get the range */
6280 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
6282 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6284 if (o
->type
!= REDIS_ZSET
) {
6285 addReply(c
,shared
.wrongtypeerr
);
6287 zset
*zsetobj
= o
->ptr
;
6288 zskiplist
*zsl
= zsetobj
->zsl
;
6290 robj
*ele
, *lenobj
= NULL
;
6291 unsigned long rangelen
= 0;
6293 /* Get the first node with the score >= min, or with
6294 * score > min if 'minex' is true. */
6295 ln
= zslFirstWithScore(zsl
,min
);
6296 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
6299 /* No element matching the speciifed interval */
6300 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6304 /* We don't know in advance how many matching elements there
6305 * are in the list, so we push this object that will represent
6306 * the multi-bulk length in the output buffer, and will "fix"
6309 lenobj
= createObject(REDIS_STRING
,NULL
);
6311 decrRefCount(lenobj
);
6314 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
6317 ln
= ln
->forward
[0];
6320 if (limit
== 0) break;
6323 addReplyBulk(c
,ele
);
6325 addReplyDouble(c
,ln
->score
);
6327 ln
= ln
->forward
[0];
6329 if (limit
> 0) limit
--;
6332 addReplyLongLong(c
,(long)rangelen
);
6334 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
6335 withscores
? (rangelen
*2) : rangelen
);
6341 static void zrangebyscoreCommand(redisClient
*c
) {
6342 genericZrangebyscoreCommand(c
,0);
6345 static void zcountCommand(redisClient
*c
) {
6346 genericZrangebyscoreCommand(c
,1);
6349 static void zcardCommand(redisClient
*c
) {
6353 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6354 checkType(c
,o
,REDIS_ZSET
)) return;
6357 addReplyUlong(c
,zs
->zsl
->length
);
6360 static void zscoreCommand(redisClient
*c
) {
6365 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6366 checkType(c
,o
,REDIS_ZSET
)) return;
6369 de
= dictFind(zs
->dict
,c
->argv
[2]);
6371 addReply(c
,shared
.nullbulk
);
6373 double *score
= dictGetEntryVal(de
);
6375 addReplyDouble(c
,*score
);
6379 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
6387 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6388 checkType(c
,o
,REDIS_ZSET
)) return;
6392 de
= dictFind(zs
->dict
,c
->argv
[2]);
6394 addReply(c
,shared
.nullbulk
);
6398 score
= dictGetEntryVal(de
);
6399 rank
= zslGetRank(zsl
, *score
, c
->argv
[2]);
6402 addReplyLongLong(c
, zsl
->length
- rank
);
6404 addReplyLongLong(c
, rank
-1);
6407 addReply(c
,shared
.nullbulk
);
6411 static void zrankCommand(redisClient
*c
) {
6412 zrankGenericCommand(c
, 0);
6415 static void zrevrankCommand(redisClient
*c
) {
6416 zrankGenericCommand(c
, 1);
6419 /* ========================= Hashes utility functions ======================= */
6420 #define REDIS_HASH_KEY 1
6421 #define REDIS_HASH_VALUE 2
6423 /* Check the length of a number of objects to see if we need to convert a
6424 * zipmap to a real hash. Note that we only check string encoded objects
6425 * as their string length can be queried in constant time. */
6426 static void hashTryConversion(robj
*subject
, robj
**argv
, int start
, int end
) {
6428 if (subject
->encoding
!= REDIS_ENCODING_ZIPMAP
) return;
6430 for (i
= start
; i
<= end
; i
++) {
6431 if (argv
[i
]->encoding
== REDIS_ENCODING_RAW
&&
6432 sdslen(argv
[i
]->ptr
) > server
.hash_max_zipmap_value
)
6434 convertToRealHash(subject
);
6440 /* Encode given objects in-place when the hash uses a dict. */
6441 static void hashTryObjectEncoding(robj
*subject
, robj
**o1
, robj
**o2
) {
6442 if (subject
->encoding
== REDIS_ENCODING_HT
) {
6443 if (o1
) *o1
= tryObjectEncoding(*o1
);
6444 if (o2
) *o2
= tryObjectEncoding(*o2
);
6448 /* Get the value from a hash identified by key. Returns either a string
6449 * object or NULL if the value cannot be found. The refcount of the object
6450 * is always increased by 1 when the value was found. */
6451 static robj
*hashGet(robj
*o
, robj
*key
) {
6453 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6456 key
= getDecodedObject(key
);
6457 if (zipmapGet(o
->ptr
,key
->ptr
,sdslen(key
->ptr
),&v
,&vlen
)) {
6458 value
= createStringObject((char*)v
,vlen
);
6462 dictEntry
*de
= dictFind(o
->ptr
,key
);
6464 value
= dictGetEntryVal(de
);
6465 incrRefCount(value
);
6471 /* Test if the key exists in the given hash. Returns 1 if the key
6472 * exists and 0 when it doesn't. */
6473 static int hashExists(robj
*o
, robj
*key
) {
6474 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6475 key
= getDecodedObject(key
);
6476 if (zipmapExists(o
->ptr
,key
->ptr
,sdslen(key
->ptr
))) {
6482 if (dictFind(o
->ptr
,key
) != NULL
) {
6489 /* Add an element, discard the old if the key already exists.
6490 * Return 0 on insert and 1 on update. */
6491 static int hashSet(robj
*o
, robj
*key
, robj
*value
) {
6493 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6494 key
= getDecodedObject(key
);
6495 value
= getDecodedObject(value
);
6496 o
->ptr
= zipmapSet(o
->ptr
,
6497 key
->ptr
,sdslen(key
->ptr
),
6498 value
->ptr
,sdslen(value
->ptr
), &update
);
6500 decrRefCount(value
);
6502 /* Check if the zipmap needs to be upgraded to a real hash table */
6503 if (zipmapLen(o
->ptr
) > server
.hash_max_zipmap_entries
)
6504 convertToRealHash(o
);
6506 if (dictReplace(o
->ptr
,key
,value
)) {
6513 incrRefCount(value
);
6518 /* Delete an element from a hash.
6519 * Return 1 on deleted and 0 on not found. */
6520 static int hashDelete(robj
*o
, robj
*key
) {
6522 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6523 key
= getDecodedObject(key
);
6524 o
->ptr
= zipmapDel(o
->ptr
,key
->ptr
,sdslen(key
->ptr
), &deleted
);
6527 deleted
= dictDelete((dict
*)o
->ptr
,key
) == DICT_OK
;
6528 /* Always check if the dictionary needs a resize after a delete. */
6529 if (deleted
&& htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
6534 /* Return the number of elements in a hash. */
6535 static unsigned long hashLength(robj
*o
) {
6536 return (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
6537 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
6540 /* Structure to hold hash iteration abstration. Note that iteration over
6541 * hashes involves both fields and values. Because it is possible that
6542 * not both are required, store pointers in the iterator to avoid
6543 * unnecessary memory allocation for fields/values. */
6547 unsigned char *zk
, *zv
;
6548 unsigned int zklen
, zvlen
;
6554 static hashIterator
*hashInitIterator(robj
*subject
) {
6555 hashIterator
*hi
= zmalloc(sizeof(hashIterator
));
6556 hi
->encoding
= subject
->encoding
;
6557 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6558 hi
->zi
= zipmapRewind(subject
->ptr
);
6559 } else if (hi
->encoding
== REDIS_ENCODING_HT
) {
6560 hi
->di
= dictGetIterator(subject
->ptr
);
6567 static void hashReleaseIterator(hashIterator
*hi
) {
6568 if (hi
->encoding
== REDIS_ENCODING_HT
) {
6569 dictReleaseIterator(hi
->di
);
6574 /* Move to the next entry in the hash. Return REDIS_OK when the next entry
6575 * could be found and REDIS_ERR when the iterator reaches the end. */
6576 static int hashNext(hashIterator
*hi
) {
6577 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6578 if ((hi
->zi
= zipmapNext(hi
->zi
, &hi
->zk
, &hi
->zklen
,
6579 &hi
->zv
, &hi
->zvlen
)) == NULL
) return REDIS_ERR
;
6581 if ((hi
->de
= dictNext(hi
->di
)) == NULL
) return REDIS_ERR
;
6586 /* Get key or value object at current iteration position.
6587 * This increases the refcount of the field object by 1. */
6588 static robj
*hashCurrent(hashIterator
*hi
, int what
) {
6590 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6591 if (what
& REDIS_HASH_KEY
) {
6592 o
= createStringObject((char*)hi
->zk
,hi
->zklen
);
6594 o
= createStringObject((char*)hi
->zv
,hi
->zvlen
);
6597 if (what
& REDIS_HASH_KEY
) {
6598 o
= dictGetEntryKey(hi
->de
);
6600 o
= dictGetEntryVal(hi
->de
);
6607 static robj
*hashLookupWriteOrCreate(redisClient
*c
, robj
*key
) {
6608 robj
*o
= lookupKeyWrite(c
->db
,key
);
6610 o
= createHashObject();
6611 dictAdd(c
->db
->dict
,key
,o
);
6614 if (o
->type
!= REDIS_HASH
) {
6615 addReply(c
,shared
.wrongtypeerr
);
6622 /* ============================= Hash commands ============================== */
6623 static void hsetCommand(redisClient
*c
) {
6627 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6628 hashTryConversion(o
,c
->argv
,2,3);
6629 hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
6630 update
= hashSet(o
,c
->argv
[2],c
->argv
[3]);
6631 addReply(c
, update
? shared
.czero
: shared
.cone
);
6635 static void hsetnxCommand(redisClient
*c
) {
6637 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6638 hashTryConversion(o
,c
->argv
,2,3);
6640 if (hashExists(o
, c
->argv
[2])) {
6641 addReply(c
, shared
.czero
);
6643 hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
6644 hashSet(o
,c
->argv
[2],c
->argv
[3]);
6645 addReply(c
, shared
.cone
);
6650 static void hmsetCommand(redisClient
*c
) {
6654 if ((c
->argc
% 2) == 1) {
6655 addReplySds(c
,sdsnew("-ERR wrong number of arguments for HMSET\r\n"));
6659 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6660 hashTryConversion(o
,c
->argv
,2,c
->argc
-1);
6661 for (i
= 2; i
< c
->argc
; i
+= 2) {
6662 hashTryObjectEncoding(o
,&c
->argv
[i
], &c
->argv
[i
+1]);
6663 hashSet(o
,c
->argv
[i
],c
->argv
[i
+1]);
6665 addReply(c
, shared
.ok
);
6669 static void hincrbyCommand(redisClient
*c
) {
6670 long long value
, incr
;
6671 robj
*o
, *current
, *new;
6673 if (getLongLongFromObjectOrReply(c
,c
->argv
[3],&incr
,NULL
) != REDIS_OK
) return;
6674 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6675 if ((current
= hashGet(o
,c
->argv
[2])) != NULL
) {
6676 if (getLongLongFromObjectOrReply(c
,current
,&value
,
6677 "hash value is not an integer") != REDIS_OK
) {
6678 decrRefCount(current
);
6681 decrRefCount(current
);
6687 new = createStringObjectFromLongLong(value
);
6688 hashTryObjectEncoding(o
,&c
->argv
[2],NULL
);
6689 hashSet(o
,c
->argv
[2],new);
6691 addReplyLongLong(c
,value
);
6695 static void hgetCommand(redisClient
*c
) {
6697 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6698 checkType(c
,o
,REDIS_HASH
)) return;
6700 if ((value
= hashGet(o
,c
->argv
[2])) != NULL
) {
6701 addReplyBulk(c
,value
);
6702 decrRefCount(value
);
6704 addReply(c
,shared
.nullbulk
);
6708 static void hmgetCommand(redisClient
*c
) {
6711 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
6712 if (o
!= NULL
&& o
->type
!= REDIS_HASH
) {
6713 addReply(c
,shared
.wrongtypeerr
);
6716 /* Note the check for o != NULL happens inside the loop. This is
6717 * done because objects that cannot be found are considered to be
6718 * an empty hash. The reply should then be a series of NULLs. */
6719 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-2));
6720 for (i
= 2; i
< c
->argc
; i
++) {
6721 if (o
!= NULL
&& (value
= hashGet(o
,c
->argv
[i
])) != NULL
) {
6722 addReplyBulk(c
,value
);
6723 decrRefCount(value
);
6725 addReply(c
,shared
.nullbulk
);
6730 static void hdelCommand(redisClient
*c
) {
6732 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6733 checkType(c
,o
,REDIS_HASH
)) return;
6735 if (hashDelete(o
,c
->argv
[2])) {
6736 if (hashLength(o
) == 0) deleteKey(c
->db
,c
->argv
[1]);
6737 addReply(c
,shared
.cone
);
6740 addReply(c
,shared
.czero
);
6744 static void hlenCommand(redisClient
*c
) {
6746 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6747 checkType(c
,o
,REDIS_HASH
)) return;
6749 addReplyUlong(c
,hashLength(o
));
6752 static void genericHgetallCommand(redisClient
*c
, int flags
) {
6753 robj
*o
, *lenobj
, *obj
;
6754 unsigned long count
= 0;
6757 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6758 || checkType(c
,o
,REDIS_HASH
)) return;
6760 lenobj
= createObject(REDIS_STRING
,NULL
);
6762 decrRefCount(lenobj
);
6764 hi
= hashInitIterator(o
);
6765 while (hashNext(hi
) != REDIS_ERR
) {
6766 if (flags
& REDIS_HASH_KEY
) {
6767 obj
= hashCurrent(hi
,REDIS_HASH_KEY
);
6768 addReplyBulk(c
,obj
);
6772 if (flags
& REDIS_HASH_VALUE
) {
6773 obj
= hashCurrent(hi
,REDIS_HASH_VALUE
);
6774 addReplyBulk(c
,obj
);
6779 hashReleaseIterator(hi
);
6781 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
6784 static void hkeysCommand(redisClient
*c
) {
6785 genericHgetallCommand(c
,REDIS_HASH_KEY
);
6788 static void hvalsCommand(redisClient
*c
) {
6789 genericHgetallCommand(c
,REDIS_HASH_VALUE
);
6792 static void hgetallCommand(redisClient
*c
) {
6793 genericHgetallCommand(c
,REDIS_HASH_KEY
|REDIS_HASH_VALUE
);
6796 static void hexistsCommand(redisClient
*c
) {
6798 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6799 checkType(c
,o
,REDIS_HASH
)) return;
6801 addReply(c
, hashExists(o
,c
->argv
[2]) ? shared
.cone
: shared
.czero
);
6804 static void convertToRealHash(robj
*o
) {
6805 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
6806 unsigned int klen
, vlen
;
6807 dict
*dict
= dictCreate(&hashDictType
,NULL
);
6809 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
6810 p
= zipmapRewind(zm
);
6811 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
6812 robj
*keyobj
, *valobj
;
6814 keyobj
= createStringObject((char*)key
,klen
);
6815 valobj
= createStringObject((char*)val
,vlen
);
6816 keyobj
= tryObjectEncoding(keyobj
);
6817 valobj
= tryObjectEncoding(valobj
);
6818 dictAdd(dict
,keyobj
,valobj
);
6820 o
->encoding
= REDIS_ENCODING_HT
;
6825 /* ========================= Non type-specific commands ==================== */
6827 static void flushdbCommand(redisClient
*c
) {
6828 server
.dirty
+= dictSize(c
->db
->dict
);
6829 touchWatchedKeysOnFlush(c
->db
->id
);
6830 dictEmpty(c
->db
->dict
);
6831 dictEmpty(c
->db
->expires
);
6832 addReply(c
,shared
.ok
);
6835 static void flushallCommand(redisClient
*c
) {
6836 touchWatchedKeysOnFlush(-1);
6837 server
.dirty
+= emptyDb();
6838 addReply(c
,shared
.ok
);
6839 if (server
.bgsavechildpid
!= -1) {
6840 kill(server
.bgsavechildpid
,SIGKILL
);
6841 rdbRemoveTempFile(server
.bgsavechildpid
);
6843 rdbSave(server
.dbfilename
);
6847 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
6848 redisSortOperation
*so
= zmalloc(sizeof(*so
));
6850 so
->pattern
= pattern
;
6854 /* Return the value associated to the key with a name obtained
6855 * substituting the first occurence of '*' in 'pattern' with 'subst'.
6856 * The returned object will always have its refcount increased by 1
6857 * when it is non-NULL. */
6858 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
6861 robj keyobj
, fieldobj
, *o
;
6862 int prefixlen
, sublen
, postfixlen
, fieldlen
;
6863 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
6867 char buf
[REDIS_SORTKEY_MAX
+1];
6868 } keyname
, fieldname
;
6870 /* If the pattern is "#" return the substitution object itself in order
6871 * to implement the "SORT ... GET #" feature. */
6872 spat
= pattern
->ptr
;
6873 if (spat
[0] == '#' && spat
[1] == '\0') {
6874 incrRefCount(subst
);
6878 /* The substitution object may be specially encoded. If so we create
6879 * a decoded object on the fly. Otherwise getDecodedObject will just
6880 * increment the ref count, that we'll decrement later. */
6881 subst
= getDecodedObject(subst
);
6884 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
6885 p
= strchr(spat
,'*');
6887 decrRefCount(subst
);
6891 /* Find out if we're dealing with a hash dereference. */
6892 if ((f
= strstr(p
+1, "->")) != NULL
) {
6893 fieldlen
= sdslen(spat
)-(f
-spat
);
6894 /* this also copies \0 character */
6895 memcpy(fieldname
.buf
,f
+2,fieldlen
-1);
6896 fieldname
.len
= fieldlen
-2;
6902 sublen
= sdslen(ssub
);
6903 postfixlen
= sdslen(spat
)-(prefixlen
+1)-fieldlen
;
6904 memcpy(keyname
.buf
,spat
,prefixlen
);
6905 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
6906 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
6907 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
6908 keyname
.len
= prefixlen
+sublen
+postfixlen
;
6909 decrRefCount(subst
);
6911 /* Lookup substituted key */
6912 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2));
6913 o
= lookupKeyRead(db
,&keyobj
);
6914 if (o
== NULL
) return NULL
;
6917 if (o
->type
!= REDIS_HASH
|| fieldname
.len
< 1) return NULL
;
6919 /* Retrieve value from hash by the field name. This operation
6920 * already increases the refcount of the returned object. */
6921 initStaticStringObject(fieldobj
,((char*)&fieldname
)+(sizeof(long)*2));
6922 o
= hashGet(o
, &fieldobj
);
6924 if (o
->type
!= REDIS_STRING
) return NULL
;
6926 /* Every object that this function returns needs to have its refcount
6927 * increased. sortCommand decreases it again. */
6934 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
6935 * the additional parameter is not standard but a BSD-specific we have to
6936 * pass sorting parameters via the global 'server' structure */
6937 static int sortCompare(const void *s1
, const void *s2
) {
6938 const redisSortObject
*so1
= s1
, *so2
= s2
;
6941 if (!server
.sort_alpha
) {
6942 /* Numeric sorting. Here it's trivial as we precomputed scores */
6943 if (so1
->u
.score
> so2
->u
.score
) {
6945 } else if (so1
->u
.score
< so2
->u
.score
) {
6951 /* Alphanumeric sorting */
6952 if (server
.sort_bypattern
) {
6953 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
6954 /* At least one compare object is NULL */
6955 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
6957 else if (so1
->u
.cmpobj
== NULL
)
6962 /* We have both the objects, use strcoll */
6963 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
6966 /* Compare elements directly. */
6967 cmp
= compareStringObjects(so1
->obj
,so2
->obj
);
6970 return server
.sort_desc
? -cmp
: cmp
;
6973 /* The SORT command is the most complex command in Redis. Warning: this code
6974 * is optimized for speed and a bit less for readability */
6975 static void sortCommand(redisClient
*c
) {
6978 int desc
= 0, alpha
= 0;
6979 int limit_start
= 0, limit_count
= -1, start
, end
;
6980 int j
, dontsort
= 0, vectorlen
;
6981 int getop
= 0; /* GET operation counter */
6982 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
6983 redisSortObject
*vector
; /* Resulting vector to sort */
6985 /* Lookup the key to sort. It must be of the right types */
6986 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
6987 if (sortval
== NULL
) {
6988 addReply(c
,shared
.emptymultibulk
);
6991 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
6992 sortval
->type
!= REDIS_ZSET
)
6994 addReply(c
,shared
.wrongtypeerr
);
6998 /* Create a list of operations to perform for every sorted element.
6999 * Operations can be GET/DEL/INCR/DECR */
7000 operations
= listCreate();
7001 listSetFreeMethod(operations
,zfree
);
7004 /* Now we need to protect sortval incrementing its count, in the future
7005 * SORT may have options able to overwrite/delete keys during the sorting
7006 * and the sorted key itself may get destroied */
7007 incrRefCount(sortval
);
7009 /* The SORT command has an SQL-alike syntax, parse it */
7010 while(j
< c
->argc
) {
7011 int leftargs
= c
->argc
-j
-1;
7012 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
7014 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
7016 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
7018 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
7019 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
7020 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
7022 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
7023 storekey
= c
->argv
[j
+1];
7025 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
7026 sortby
= c
->argv
[j
+1];
7027 /* If the BY pattern does not contain '*', i.e. it is constant,
7028 * we don't need to sort nor to lookup the weight keys. */
7029 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
7031 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
7032 listAddNodeTail(operations
,createSortOperation(
7033 REDIS_SORT_GET
,c
->argv
[j
+1]));
7037 decrRefCount(sortval
);
7038 listRelease(operations
);
7039 addReply(c
,shared
.syntaxerr
);
7045 /* Load the sorting vector with all the objects to sort */
7046 switch(sortval
->type
) {
7047 case REDIS_LIST
: vectorlen
= listLength((list
*)sortval
->ptr
); break;
7048 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
7049 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
7050 default: vectorlen
= 0; redisPanic("Bad SORT type"); /* Avoid GCC warning */
7052 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
7055 if (sortval
->type
== REDIS_LIST
) {
7056 list
*list
= sortval
->ptr
;
7060 listRewind(list
,&li
);
7061 while((ln
= listNext(&li
))) {
7062 robj
*ele
= ln
->value
;
7063 vector
[j
].obj
= ele
;
7064 vector
[j
].u
.score
= 0;
7065 vector
[j
].u
.cmpobj
= NULL
;
7073 if (sortval
->type
== REDIS_SET
) {
7076 zset
*zs
= sortval
->ptr
;
7080 di
= dictGetIterator(set
);
7081 while((setele
= dictNext(di
)) != NULL
) {
7082 vector
[j
].obj
= dictGetEntryKey(setele
);
7083 vector
[j
].u
.score
= 0;
7084 vector
[j
].u
.cmpobj
= NULL
;
7087 dictReleaseIterator(di
);
7089 redisAssert(j
== vectorlen
);
7091 /* Now it's time to load the right scores in the sorting vector */
7092 if (dontsort
== 0) {
7093 for (j
= 0; j
< vectorlen
; j
++) {
7096 /* lookup value to sort by */
7097 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
7098 if (!byval
) continue;
7100 /* use object itself to sort by */
7101 byval
= vector
[j
].obj
;
7105 if (sortby
) vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
7107 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
7108 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
7109 } else if (byval
->encoding
== REDIS_ENCODING_INT
) {
7110 /* Don't need to decode the object if it's
7111 * integer-encoded (the only encoding supported) so
7112 * far. We can just cast it */
7113 vector
[j
].u
.score
= (long)byval
->ptr
;
7115 redisAssert(1 != 1);
7119 /* when the object was retrieved using lookupKeyByPattern,
7120 * its refcount needs to be decreased. */
7122 decrRefCount(byval
);
7127 /* We are ready to sort the vector... perform a bit of sanity check
7128 * on the LIMIT option too. We'll use a partial version of quicksort. */
7129 start
= (limit_start
< 0) ? 0 : limit_start
;
7130 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
7131 if (start
>= vectorlen
) {
7132 start
= vectorlen
-1;
7135 if (end
>= vectorlen
) end
= vectorlen
-1;
7137 if (dontsort
== 0) {
7138 server
.sort_desc
= desc
;
7139 server
.sort_alpha
= alpha
;
7140 server
.sort_bypattern
= sortby
? 1 : 0;
7141 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
7142 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
7144 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
7147 /* Send command output to the output buffer, performing the specified
7148 * GET/DEL/INCR/DECR operations if any. */
7149 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
7150 if (storekey
== NULL
) {
7151 /* STORE option not specified, sent the sorting result to client */
7152 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
7153 for (j
= start
; j
<= end
; j
++) {
7157 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
7158 listRewind(operations
,&li
);
7159 while((ln
= listNext(&li
))) {
7160 redisSortOperation
*sop
= ln
->value
;
7161 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
7164 if (sop
->type
== REDIS_SORT_GET
) {
7166 addReply(c
,shared
.nullbulk
);
7168 addReplyBulk(c
,val
);
7172 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
7177 robj
*listObject
= createListObject();
7178 list
*listPtr
= (list
*) listObject
->ptr
;
7180 /* STORE option specified, set the sorting result as a List object */
7181 for (j
= start
; j
<= end
; j
++) {
7186 listAddNodeTail(listPtr
,vector
[j
].obj
);
7187 incrRefCount(vector
[j
].obj
);
7189 listRewind(operations
,&li
);
7190 while((ln
= listNext(&li
))) {
7191 redisSortOperation
*sop
= ln
->value
;
7192 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
7195 if (sop
->type
== REDIS_SORT_GET
) {
7197 listAddNodeTail(listPtr
,createStringObject("",0));
7199 /* We should do a incrRefCount on val because it is
7200 * added to the list, but also a decrRefCount because
7201 * it is returned by lookupKeyByPattern. This results
7202 * in doing nothing at all. */
7203 listAddNodeTail(listPtr
,val
);
7206 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
7210 if (dictReplace(c
->db
->dict
,storekey
,listObject
)) {
7211 incrRefCount(storekey
);
7213 /* Note: we add 1 because the DB is dirty anyway since even if the
7214 * SORT result is empty a new key is set and maybe the old content
7216 server
.dirty
+= 1+outputlen
;
7217 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
7221 decrRefCount(sortval
);
7222 listRelease(operations
);
7223 for (j
= 0; j
< vectorlen
; j
++) {
7224 if (alpha
&& vector
[j
].u
.cmpobj
)
7225 decrRefCount(vector
[j
].u
.cmpobj
);
7230 /* Convert an amount of bytes into a human readable string in the form
7231 * of 100B, 2G, 100M, 4K, and so forth. */
7232 static void bytesToHuman(char *s
, unsigned long long n
) {
7237 sprintf(s
,"%lluB",n
);
7239 } else if (n
< (1024*1024)) {
7240 d
= (double)n
/(1024);
7241 sprintf(s
,"%.2fK",d
);
7242 } else if (n
< (1024LL*1024*1024)) {
7243 d
= (double)n
/(1024*1024);
7244 sprintf(s
,"%.2fM",d
);
7245 } else if (n
< (1024LL*1024*1024*1024)) {
7246 d
= (double)n
/(1024LL*1024*1024);
7247 sprintf(s
,"%.2fG",d
);
7251 /* Create the string returned by the INFO command. This is decoupled
7252 * by the INFO command itself as we need to report the same information
7253 * on memory corruption problems. */
7254 static sds
genRedisInfoString(void) {
7256 time_t uptime
= time(NULL
)-server
.stat_starttime
;
7260 bytesToHuman(hmem
,zmalloc_used_memory());
7261 info
= sdscatprintf(sdsempty(),
7262 "redis_version:%s\r\n"
7263 "redis_git_sha1:%s\r\n"
7264 "redis_git_dirty:%d\r\n"
7266 "multiplexing_api:%s\r\n"
7267 "process_id:%ld\r\n"
7268 "uptime_in_seconds:%ld\r\n"
7269 "uptime_in_days:%ld\r\n"
7270 "connected_clients:%d\r\n"
7271 "connected_slaves:%d\r\n"
7272 "blocked_clients:%d\r\n"
7273 "used_memory:%zu\r\n"
7274 "used_memory_human:%s\r\n"
7275 "changes_since_last_save:%lld\r\n"
7276 "bgsave_in_progress:%d\r\n"
7277 "last_save_time:%ld\r\n"
7278 "bgrewriteaof_in_progress:%d\r\n"
7279 "total_connections_received:%lld\r\n"
7280 "total_commands_processed:%lld\r\n"
7281 "expired_keys:%lld\r\n"
7282 "hash_max_zipmap_entries:%zu\r\n"
7283 "hash_max_zipmap_value:%zu\r\n"
7284 "pubsub_channels:%ld\r\n"
7285 "pubsub_patterns:%u\r\n"
7290 strtol(REDIS_GIT_DIRTY
,NULL
,10) > 0,
7291 (sizeof(long) == 8) ? "64" : "32",
7296 listLength(server
.clients
)-listLength(server
.slaves
),
7297 listLength(server
.slaves
),
7298 server
.blpop_blocked_clients
,
7299 zmalloc_used_memory(),
7302 server
.bgsavechildpid
!= -1,
7304 server
.bgrewritechildpid
!= -1,
7305 server
.stat_numconnections
,
7306 server
.stat_numcommands
,
7307 server
.stat_expiredkeys
,
7308 server
.hash_max_zipmap_entries
,
7309 server
.hash_max_zipmap_value
,
7310 dictSize(server
.pubsub_channels
),
7311 listLength(server
.pubsub_patterns
),
7312 server
.vm_enabled
!= 0,
7313 server
.masterhost
== NULL
? "master" : "slave"
7315 if (server
.masterhost
) {
7316 info
= sdscatprintf(info
,
7317 "master_host:%s\r\n"
7318 "master_port:%d\r\n"
7319 "master_link_status:%s\r\n"
7320 "master_last_io_seconds_ago:%d\r\n"
7323 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
7325 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
7328 if (server
.vm_enabled
) {
7330 info
= sdscatprintf(info
,
7331 "vm_conf_max_memory:%llu\r\n"
7332 "vm_conf_page_size:%llu\r\n"
7333 "vm_conf_pages:%llu\r\n"
7334 "vm_stats_used_pages:%llu\r\n"
7335 "vm_stats_swapped_objects:%llu\r\n"
7336 "vm_stats_swappin_count:%llu\r\n"
7337 "vm_stats_swappout_count:%llu\r\n"
7338 "vm_stats_io_newjobs_len:%lu\r\n"
7339 "vm_stats_io_processing_len:%lu\r\n"
7340 "vm_stats_io_processed_len:%lu\r\n"
7341 "vm_stats_io_active_threads:%lu\r\n"
7342 "vm_stats_blocked_clients:%lu\r\n"
7343 ,(unsigned long long) server
.vm_max_memory
,
7344 (unsigned long long) server
.vm_page_size
,
7345 (unsigned long long) server
.vm_pages
,
7346 (unsigned long long) server
.vm_stats_used_pages
,
7347 (unsigned long long) server
.vm_stats_swapped_objects
,
7348 (unsigned long long) server
.vm_stats_swapins
,
7349 (unsigned long long) server
.vm_stats_swapouts
,
7350 (unsigned long) listLength(server
.io_newjobs
),
7351 (unsigned long) listLength(server
.io_processing
),
7352 (unsigned long) listLength(server
.io_processed
),
7353 (unsigned long) server
.io_active_threads
,
7354 (unsigned long) server
.vm_blocked_clients
7358 for (j
= 0; j
< server
.dbnum
; j
++) {
7359 long long keys
, vkeys
;
7361 keys
= dictSize(server
.db
[j
].dict
);
7362 vkeys
= dictSize(server
.db
[j
].expires
);
7363 if (keys
|| vkeys
) {
7364 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
7371 static void infoCommand(redisClient
*c
) {
7372 sds info
= genRedisInfoString();
7373 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
7374 (unsigned long)sdslen(info
)));
7375 addReplySds(c
,info
);
7376 addReply(c
,shared
.crlf
);
7379 static void monitorCommand(redisClient
*c
) {
7380 /* ignore MONITOR if aleady slave or in monitor mode */
7381 if (c
->flags
& REDIS_SLAVE
) return;
7383 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
7385 listAddNodeTail(server
.monitors
,c
);
7386 addReply(c
,shared
.ok
);
7389 /* ================================= Expire ================================= */
7390 static int removeExpire(redisDb
*db
, robj
*key
) {
7391 if (dictDelete(db
->expires
,key
) == DICT_OK
) {
7398 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
7399 if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) {
7407 /* Return the expire time of the specified key, or -1 if no expire
7408 * is associated with this key (i.e. the key is non volatile) */
7409 static time_t getExpire(redisDb
*db
, robj
*key
) {
7412 /* No expire? return ASAP */
7413 if (dictSize(db
->expires
) == 0 ||
7414 (de
= dictFind(db
->expires
,key
)) == NULL
) return -1;
7416 return (time_t) dictGetEntryVal(de
);
7419 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
7423 /* No expire? return ASAP */
7424 if (dictSize(db
->expires
) == 0 ||
7425 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7427 /* Lookup the expire */
7428 when
= (time_t) dictGetEntryVal(de
);
7429 if (time(NULL
) <= when
) return 0;
7431 /* Delete the key */
7432 dictDelete(db
->expires
,key
);
7433 server
.stat_expiredkeys
++;
7434 return dictDelete(db
->dict
,key
) == DICT_OK
;
7437 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
7440 /* No expire? return ASAP */
7441 if (dictSize(db
->expires
) == 0 ||
7442 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7444 /* Delete the key */
7446 server
.stat_expiredkeys
++;
7447 dictDelete(db
->expires
,key
);
7448 return dictDelete(db
->dict
,key
) == DICT_OK
;
7451 static void expireGenericCommand(redisClient
*c
, robj
*key
, robj
*param
, long offset
) {
7455 if (getLongFromObjectOrReply(c
, param
, &seconds
, NULL
) != REDIS_OK
) return;
7459 de
= dictFind(c
->db
->dict
,key
);
7461 addReply(c
,shared
.czero
);
7465 if (deleteKey(c
->db
,key
)) server
.dirty
++;
7466 addReply(c
, shared
.cone
);
7469 time_t when
= time(NULL
)+seconds
;
7470 if (setExpire(c
->db
,key
,when
)) {
7471 addReply(c
,shared
.cone
);
7474 addReply(c
,shared
.czero
);
7480 static void expireCommand(redisClient
*c
) {
7481 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],0);
7484 static void expireatCommand(redisClient
*c
) {
7485 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],time(NULL
));
7488 static void ttlCommand(redisClient
*c
) {
7492 expire
= getExpire(c
->db
,c
->argv
[1]);
7494 ttl
= (int) (expire
-time(NULL
));
7495 if (ttl
< 0) ttl
= -1;
7497 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
7500 /* ================================ MULTI/EXEC ============================== */
7502 /* Client state initialization for MULTI/EXEC */
7503 static void initClientMultiState(redisClient
*c
) {
7504 c
->mstate
.commands
= NULL
;
7505 c
->mstate
.count
= 0;
7508 /* Release all the resources associated with MULTI/EXEC state */
7509 static void freeClientMultiState(redisClient
*c
) {
7512 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7514 multiCmd
*mc
= c
->mstate
.commands
+j
;
7516 for (i
= 0; i
< mc
->argc
; i
++)
7517 decrRefCount(mc
->argv
[i
]);
7520 zfree(c
->mstate
.commands
);
7523 /* Add a new command into the MULTI commands queue */
7524 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
7528 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
7529 sizeof(multiCmd
)*(c
->mstate
.count
+1));
7530 mc
= c
->mstate
.commands
+c
->mstate
.count
;
7533 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
7534 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
7535 for (j
= 0; j
< c
->argc
; j
++)
7536 incrRefCount(mc
->argv
[j
]);
7540 static void multiCommand(redisClient
*c
) {
7541 if (c
->flags
& REDIS_MULTI
) {
7542 addReplySds(c
,sdsnew("-ERR MULTI calls can not be nested\r\n"));
7545 c
->flags
|= REDIS_MULTI
;
7546 addReply(c
,shared
.ok
);
7549 static void discardCommand(redisClient
*c
) {
7550 if (!(c
->flags
& REDIS_MULTI
)) {
7551 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
7555 freeClientMultiState(c
);
7556 initClientMultiState(c
);
7557 c
->flags
&= (~REDIS_MULTI
);
7558 addReply(c
,shared
.ok
);
7561 /* Send a MULTI command to all the slaves and AOF file. Check the execCommand
7562 * implememntation for more information. */
7563 static void execCommandReplicateMulti(redisClient
*c
) {
7564 struct redisCommand
*cmd
;
7565 robj
*multistring
= createStringObject("MULTI",5);
7567 cmd
= lookupCommand("multi");
7568 if (server
.appendonly
)
7569 feedAppendOnlyFile(cmd
,c
->db
->id
,&multistring
,1);
7570 if (listLength(server
.slaves
))
7571 replicationFeedSlaves(server
.slaves
,c
->db
->id
,&multistring
,1);
7572 decrRefCount(multistring
);
7575 static void execCommand(redisClient
*c
) {
7580 if (!(c
->flags
& REDIS_MULTI
)) {
7581 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
7585 /* Check if we need to abort the EXEC if some WATCHed key was touched.
7586 * A failed EXEC will return a multi bulk nil object. */
7587 if (c
->flags
& REDIS_DIRTY_CAS
) {
7588 freeClientMultiState(c
);
7589 initClientMultiState(c
);
7590 c
->flags
&= ~(REDIS_MULTI
|REDIS_DIRTY_CAS
);
7592 addReply(c
,shared
.nullmultibulk
);
7596 /* Replicate a MULTI request now that we are sure the block is executed.
7597 * This way we'll deliver the MULTI/..../EXEC block as a whole and
7598 * both the AOF and the replication link will have the same consistency
7599 * and atomicity guarantees. */
7600 execCommandReplicateMulti(c
);
7602 /* Exec all the queued commands */
7603 unwatchAllKeys(c
); /* Unwatch ASAP otherwise we'll waste CPU cycles */
7604 orig_argv
= c
->argv
;
7605 orig_argc
= c
->argc
;
7606 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
7607 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7608 c
->argc
= c
->mstate
.commands
[j
].argc
;
7609 c
->argv
= c
->mstate
.commands
[j
].argv
;
7610 call(c
,c
->mstate
.commands
[j
].cmd
);
7612 c
->argv
= orig_argv
;
7613 c
->argc
= orig_argc
;
7614 freeClientMultiState(c
);
7615 initClientMultiState(c
);
7616 c
->flags
&= ~(REDIS_MULTI
|REDIS_DIRTY_CAS
);
7617 /* Make sure the EXEC command is always replicated / AOF, since we
7618 * always send the MULTI command (we can't know beforehand if the
7619 * next operations will contain at least a modification to the DB). */
7623 /* =========================== Blocking Operations ========================= */
7625 /* Currently Redis blocking operations support is limited to list POP ops,
7626 * so the current implementation is not fully generic, but it is also not
7627 * completely specific so it will not require a rewrite to support new
7628 * kind of blocking operations in the future.
7630 * Still it's important to note that list blocking operations can be already
7631 * used as a notification mechanism in order to implement other blocking
7632 * operations at application level, so there must be a very strong evidence
7633 * of usefulness and generality before new blocking operations are implemented.
7635 * This is how the current blocking POP works, we use BLPOP as example:
7636 * - If the user calls BLPOP and the key exists and contains a non empty list
7637 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
7638 * if there is not to block.
7639 * - If instead BLPOP is called and the key does not exists or the list is
7640 * empty we need to block. In order to do so we remove the notification for
7641 * new data to read in the client socket (so that we'll not serve new
7642 * requests if the blocking request is not served). Also we put the client
7643 * in a dictionary (db->blocking_keys) mapping keys to a list of clients
7644 * blocking for this keys.
7645 * - If a PUSH operation against a key with blocked clients waiting is
7646 * performed, we serve the first in the list: basically instead to push
7647 * the new element inside the list we return it to the (first / oldest)
7648 * blocking client, unblock the client, and remove it form the list.
7650 * The above comment and the source code should be enough in order to understand
7651 * the implementation and modify / fix it later.
7654 /* Set a client in blocking mode for the specified key, with the specified
7656 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
7661 c
->blocking_keys
= zmalloc(sizeof(robj
*)*numkeys
);
7662 c
->blocking_keys_num
= numkeys
;
7663 c
->blockingto
= timeout
;
7664 for (j
= 0; j
< numkeys
; j
++) {
7665 /* Add the key in the client structure, to map clients -> keys */
7666 c
->blocking_keys
[j
] = keys
[j
];
7667 incrRefCount(keys
[j
]);
7669 /* And in the other "side", to map keys -> clients */
7670 de
= dictFind(c
->db
->blocking_keys
,keys
[j
]);
7674 /* For every key we take a list of clients blocked for it */
7676 retval
= dictAdd(c
->db
->blocking_keys
,keys
[j
],l
);
7677 incrRefCount(keys
[j
]);
7678 assert(retval
== DICT_OK
);
7680 l
= dictGetEntryVal(de
);
7682 listAddNodeTail(l
,c
);
7684 /* Mark the client as a blocked client */
7685 c
->flags
|= REDIS_BLOCKED
;
7686 server
.blpop_blocked_clients
++;
7689 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
7690 static void unblockClientWaitingData(redisClient
*c
) {
7695 assert(c
->blocking_keys
!= NULL
);
7696 /* The client may wait for multiple keys, so unblock it for every key. */
7697 for (j
= 0; j
< c
->blocking_keys_num
; j
++) {
7698 /* Remove this client from the list of clients waiting for this key. */
7699 de
= dictFind(c
->db
->blocking_keys
,c
->blocking_keys
[j
]);
7701 l
= dictGetEntryVal(de
);
7702 listDelNode(l
,listSearchKey(l
,c
));
7703 /* If the list is empty we need to remove it to avoid wasting memory */
7704 if (listLength(l
) == 0)
7705 dictDelete(c
->db
->blocking_keys
,c
->blocking_keys
[j
]);
7706 decrRefCount(c
->blocking_keys
[j
]);
7708 /* Cleanup the client structure */
7709 zfree(c
->blocking_keys
);
7710 c
->blocking_keys
= NULL
;
7711 c
->flags
&= (~REDIS_BLOCKED
);
7712 server
.blpop_blocked_clients
--;
7713 /* We want to process data if there is some command waiting
7714 * in the input buffer. Note that this is safe even if
7715 * unblockClientWaitingData() gets called from freeClient() because
7716 * freeClient() will be smart enough to call this function
7717 * *after* c->querybuf was set to NULL. */
7718 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
7721 /* This should be called from any function PUSHing into lists.
7722 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
7723 * 'ele' is the element pushed.
7725 * If the function returns 0 there was no client waiting for a list push
7728 * If the function returns 1 there was a client waiting for a list push
7729 * against this key, the element was passed to this client thus it's not
7730 * needed to actually add it to the list and the caller should return asap. */
7731 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
7732 struct dictEntry
*de
;
7733 redisClient
*receiver
;
7737 de
= dictFind(c
->db
->blocking_keys
,key
);
7738 if (de
== NULL
) return 0;
7739 l
= dictGetEntryVal(de
);
7742 receiver
= ln
->value
;
7744 addReplySds(receiver
,sdsnew("*2\r\n"));
7745 addReplyBulk(receiver
,key
);
7746 addReplyBulk(receiver
,ele
);
7747 unblockClientWaitingData(receiver
);
7751 /* Blocking RPOP/LPOP */
7752 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
7757 for (j
= 1; j
< c
->argc
-1; j
++) {
7758 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
7760 if (o
->type
!= REDIS_LIST
) {
7761 addReply(c
,shared
.wrongtypeerr
);
7764 list
*list
= o
->ptr
;
7765 if (listLength(list
) != 0) {
7766 /* If the list contains elements fall back to the usual
7767 * non-blocking POP operation */
7768 robj
*argv
[2], **orig_argv
;
7771 /* We need to alter the command arguments before to call
7772 * popGenericCommand() as the command takes a single key. */
7773 orig_argv
= c
->argv
;
7774 orig_argc
= c
->argc
;
7775 argv
[1] = c
->argv
[j
];
7779 /* Also the return value is different, we need to output
7780 * the multi bulk reply header and the key name. The
7781 * "real" command will add the last element (the value)
7782 * for us. If this souds like an hack to you it's just
7783 * because it is... */
7784 addReplySds(c
,sdsnew("*2\r\n"));
7785 addReplyBulk(c
,argv
[1]);
7786 popGenericCommand(c
,where
);
7788 /* Fix the client structure with the original stuff */
7789 c
->argv
= orig_argv
;
7790 c
->argc
= orig_argc
;
7796 /* If the list is empty or the key does not exists we must block */
7797 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
7798 if (timeout
> 0) timeout
+= time(NULL
);
7799 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
7802 static void blpopCommand(redisClient
*c
) {
7803 blockingPopGenericCommand(c
,REDIS_HEAD
);
7806 static void brpopCommand(redisClient
*c
) {
7807 blockingPopGenericCommand(c
,REDIS_TAIL
);
7810 /* =============================== Replication ============================= */
7812 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7813 ssize_t nwritten
, ret
= size
;
7814 time_t start
= time(NULL
);
7818 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
7819 nwritten
= write(fd
,ptr
,size
);
7820 if (nwritten
== -1) return -1;
7824 if ((time(NULL
)-start
) > timeout
) {
7832 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7833 ssize_t nread
, totread
= 0;
7834 time_t start
= time(NULL
);
7838 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
7839 nread
= read(fd
,ptr
,size
);
7840 if (nread
== -1) return -1;
7845 if ((time(NULL
)-start
) > timeout
) {
7853 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7860 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
7863 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
7874 static void syncCommand(redisClient
*c
) {
7875 /* ignore SYNC if aleady slave or in monitor mode */
7876 if (c
->flags
& REDIS_SLAVE
) return;
7878 /* SYNC can't be issued when the server has pending data to send to
7879 * the client about already issued commands. We need a fresh reply
7880 * buffer registering the differences between the BGSAVE and the current
7881 * dataset, so that we can copy to other slaves if needed. */
7882 if (listLength(c
->reply
) != 0) {
7883 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
7887 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
7888 /* Here we need to check if there is a background saving operation
7889 * in progress, or if it is required to start one */
7890 if (server
.bgsavechildpid
!= -1) {
7891 /* Ok a background save is in progress. Let's check if it is a good
7892 * one for replication, i.e. if there is another slave that is
7893 * registering differences since the server forked to save */
7898 listRewind(server
.slaves
,&li
);
7899 while((ln
= listNext(&li
))) {
7901 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
7904 /* Perfect, the server is already registering differences for
7905 * another slave. Set the right state, and copy the buffer. */
7906 listRelease(c
->reply
);
7907 c
->reply
= listDup(slave
->reply
);
7908 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7909 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
7911 /* No way, we need to wait for the next BGSAVE in order to
7912 * register differences */
7913 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7914 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
7917 /* Ok we don't have a BGSAVE in progress, let's start one */
7918 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
7919 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7920 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
7921 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
7924 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7927 c
->flags
|= REDIS_SLAVE
;
7929 listAddNodeTail(server
.slaves
,c
);
7933 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
7934 redisClient
*slave
= privdata
;
7936 REDIS_NOTUSED(mask
);
7937 char buf
[REDIS_IOBUF_LEN
];
7938 ssize_t nwritten
, buflen
;
7940 if (slave
->repldboff
== 0) {
7941 /* Write the bulk write count before to transfer the DB. In theory here
7942 * we don't know how much room there is in the output buffer of the
7943 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
7944 * operations) will never be smaller than the few bytes we need. */
7947 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
7949 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
7957 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
7958 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
7960 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
7961 (buflen
== 0) ? "premature EOF" : strerror(errno
));
7965 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
7966 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
7971 slave
->repldboff
+= nwritten
;
7972 if (slave
->repldboff
== slave
->repldbsize
) {
7973 close(slave
->repldbfd
);
7974 slave
->repldbfd
= -1;
7975 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7976 slave
->replstate
= REDIS_REPL_ONLINE
;
7977 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
7978 sendReplyToClient
, slave
) == AE_ERR
) {
7982 addReplySds(slave
,sdsempty());
7983 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
7987 /* This function is called at the end of every backgrond saving.
7988 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
7989 * otherwise REDIS_ERR is passed to the function.
7991 * The goal of this function is to handle slaves waiting for a successful
7992 * background saving in order to perform non-blocking synchronization. */
7993 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
7995 int startbgsave
= 0;
7998 listRewind(server
.slaves
,&li
);
7999 while((ln
= listNext(&li
))) {
8000 redisClient
*slave
= ln
->value
;
8002 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
8004 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
8005 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
8006 struct redis_stat buf
;
8008 if (bgsaveerr
!= REDIS_OK
) {
8010 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
8013 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
8014 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
8016 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
8019 slave
->repldboff
= 0;
8020 slave
->repldbsize
= buf
.st_size
;
8021 slave
->replstate
= REDIS_REPL_SEND_BULK
;
8022 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
8023 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
8030 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
8033 listRewind(server
.slaves
,&li
);
8034 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
8035 while((ln
= listNext(&li
))) {
8036 redisClient
*slave
= ln
->value
;
8038 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
8045 static int syncWithMaster(void) {
8046 char buf
[1024], tmpfile
[256], authcmd
[1024];
8048 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
8049 int dfd
, maxtries
= 5;
8052 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
8057 /* AUTH with the master if required. */
8058 if(server
.masterauth
) {
8059 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
8060 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
8062 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
8066 /* Read the AUTH result. */
8067 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
8069 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
8073 if (buf
[0] != '+') {
8075 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
8080 /* Issue the SYNC command */
8081 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
8083 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
8087 /* Read the bulk write count */
8088 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
8090 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
8094 if (buf
[0] != '$') {
8096 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
8099 dumpsize
= strtol(buf
+1,NULL
,10);
8100 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
8101 /* Read the bulk write data on a temp file */
8103 snprintf(tmpfile
,256,
8104 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
8105 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
8106 if (dfd
!= -1) break;
8111 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
8115 int nread
, nwritten
;
8117 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
8119 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
8125 nwritten
= write(dfd
,buf
,nread
);
8126 if (nwritten
== -1) {
8127 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
8135 if (rename(tmpfile
,server
.dbfilename
) == -1) {
8136 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
8142 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
8143 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
8147 server
.master
= createClient(fd
);
8148 server
.master
->flags
|= REDIS_MASTER
;
8149 server
.master
->authenticated
= 1;
8150 server
.replstate
= REDIS_REPL_CONNECTED
;
8154 static void slaveofCommand(redisClient
*c
) {
8155 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
8156 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
8157 if (server
.masterhost
) {
8158 sdsfree(server
.masterhost
);
8159 server
.masterhost
= NULL
;
8160 if (server
.master
) freeClient(server
.master
);
8161 server
.replstate
= REDIS_REPL_NONE
;
8162 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
8165 sdsfree(server
.masterhost
);
8166 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
8167 server
.masterport
= atoi(c
->argv
[2]->ptr
);
8168 if (server
.master
) freeClient(server
.master
);
8169 server
.replstate
= REDIS_REPL_CONNECT
;
8170 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
8171 server
.masterhost
, server
.masterport
);
8173 addReply(c
,shared
.ok
);
8176 /* ============================ Maxmemory directive ======================== */
8178 /* Try to free one object form the pre-allocated objects free list.
8179 * This is useful under low mem conditions as by default we take 1 million
8180 * free objects allocated. On success REDIS_OK is returned, otherwise
8182 static int tryFreeOneObjectFromFreelist(void) {
8185 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
8186 if (listLength(server
.objfreelist
)) {
8187 listNode
*head
= listFirst(server
.objfreelist
);
8188 o
= listNodeValue(head
);
8189 listDelNode(server
.objfreelist
,head
);
8190 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
8194 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
8199 /* This function gets called when 'maxmemory' is set on the config file to limit
8200 * the max memory used by the server, and we are out of memory.
8201 * This function will try to, in order:
8203 * - Free objects from the free list
8204 * - Try to remove keys with an EXPIRE set
8206 * It is not possible to free enough memory to reach used-memory < maxmemory
8207 * the server will start refusing commands that will enlarge even more the
8210 static void freeMemoryIfNeeded(void) {
8211 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
8212 int j
, k
, freed
= 0;
8214 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
8215 for (j
= 0; j
< server
.dbnum
; j
++) {
8217 robj
*minkey
= NULL
;
8218 struct dictEntry
*de
;
8220 if (dictSize(server
.db
[j
].expires
)) {
8222 /* From a sample of three keys drop the one nearest to
8223 * the natural expire */
8224 for (k
= 0; k
< 3; k
++) {
8227 de
= dictGetRandomKey(server
.db
[j
].expires
);
8228 t
= (time_t) dictGetEntryVal(de
);
8229 if (minttl
== -1 || t
< minttl
) {
8230 minkey
= dictGetEntryKey(de
);
8234 deleteKey(server
.db
+j
,minkey
);
8237 if (!freed
) return; /* nothing to free... */
8241 /* ============================== Append Only file ========================== */
8243 /* Write the append only file buffer on disk.
8245 * Since we are required to write the AOF before replying to the client,
8246 * and the only way the client socket can get a write is entering when the
8247 * the event loop, we accumulate all the AOF writes in a memory
8248 * buffer and write it on disk using this function just before entering
8249 * the event loop again. */
8250 static void flushAppendOnlyFile(void) {
8254 if (sdslen(server
.aofbuf
) == 0) return;
8256 /* We want to perform a single write. This should be guaranteed atomic
8257 * at least if the filesystem we are writing is a real physical one.
8258 * While this will save us against the server being killed I don't think
8259 * there is much to do about the whole server stopping for power problems
8261 nwritten
= write(server
.appendfd
,server
.aofbuf
,sdslen(server
.aofbuf
));
8262 if (nwritten
!= (signed)sdslen(server
.aofbuf
)) {
8263 /* Ooops, we are in troubles. The best thing to do for now is
8264 * aborting instead of giving the illusion that everything is
8265 * working as expected. */
8266 if (nwritten
== -1) {
8267 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
8269 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
8273 sdsfree(server
.aofbuf
);
8274 server
.aofbuf
= sdsempty();
8276 /* Don't Fsync if no-appendfsync-on-rewrite is set to yes and we have
8277 * childs performing heavy I/O on disk. */
8278 if (server
.no_appendfsync_on_rewrite
&&
8279 (server
.bgrewritechildpid
!= -1 || server
.bgsavechildpid
!= -1))
8281 /* Fsync if needed */
8283 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
8284 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
8285 now
-server
.lastfsync
> 1))
8287 /* aof_fsync is defined as fdatasync() for Linux in order to avoid
8288 * flushing metadata. */
8289 aof_fsync(server
.appendfd
); /* Let's try to get this data on the disk */
8290 server
.lastfsync
= now
;
8294 static sds
catAppendOnlyGenericCommand(sds buf
, int argc
, robj
**argv
) {
8296 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
8297 for (j
= 0; j
< argc
; j
++) {
8298 robj
*o
= getDecodedObject(argv
[j
]);
8299 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
8300 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
8301 buf
= sdscatlen(buf
,"\r\n",2);
8307 static sds
catAppendOnlyExpireAtCommand(sds buf
, robj
*key
, robj
*seconds
) {
8312 /* Make sure we can use strtol */
8313 seconds
= getDecodedObject(seconds
);
8314 when
= time(NULL
)+strtol(seconds
->ptr
,NULL
,10);
8315 decrRefCount(seconds
);
8317 argv
[0] = createStringObject("EXPIREAT",8);
8319 argv
[2] = createObject(REDIS_STRING
,
8320 sdscatprintf(sdsempty(),"%ld",when
));
8321 buf
= catAppendOnlyGenericCommand(buf
, argc
, argv
);
8322 decrRefCount(argv
[0]);
8323 decrRefCount(argv
[2]);
8327 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
8328 sds buf
= sdsempty();
8331 /* The DB this command was targetting is not the same as the last command
8332 * we appendend. To issue a SELECT command is needed. */
8333 if (dictid
!= server
.appendseldb
) {
8336 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
8337 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
8338 (unsigned long)strlen(seldb
),seldb
);
8339 server
.appendseldb
= dictid
;
8342 if (cmd
->proc
== expireCommand
) {
8343 /* Translate EXPIRE into EXPIREAT */
8344 buf
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]);
8345 } else if (cmd
->proc
== setexCommand
) {
8346 /* Translate SETEX to SET and EXPIREAT */
8347 tmpargv
[0] = createStringObject("SET",3);
8348 tmpargv
[1] = argv
[1];
8349 tmpargv
[2] = argv
[3];
8350 buf
= catAppendOnlyGenericCommand(buf
,3,tmpargv
);
8351 decrRefCount(tmpargv
[0]);
8352 buf
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]);
8354 buf
= catAppendOnlyGenericCommand(buf
,argc
,argv
);
8357 /* Append to the AOF buffer. This will be flushed on disk just before
8358 * of re-entering the event loop, so before the client will get a
8359 * positive reply about the operation performed. */
8360 server
.aofbuf
= sdscatlen(server
.aofbuf
,buf
,sdslen(buf
));
8362 /* If a background append only file rewriting is in progress we want to
8363 * accumulate the differences between the child DB and the current one
8364 * in a buffer, so that when the child process will do its work we
8365 * can append the differences to the new append only file. */
8366 if (server
.bgrewritechildpid
!= -1)
8367 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
8372 /* In Redis commands are always executed in the context of a client, so in
8373 * order to load the append only file we need to create a fake client. */
8374 static struct redisClient
*createFakeClient(void) {
8375 struct redisClient
*c
= zmalloc(sizeof(*c
));
8379 c
->querybuf
= sdsempty();
8383 /* We set the fake client as a slave waiting for the synchronization
8384 * so that Redis will not try to send replies to this client. */
8385 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
8386 c
->reply
= listCreate();
8387 listSetFreeMethod(c
->reply
,decrRefCount
);
8388 listSetDupMethod(c
->reply
,dupClientReplyValue
);
8389 initClientMultiState(c
);
8393 static void freeFakeClient(struct redisClient
*c
) {
8394 sdsfree(c
->querybuf
);
8395 listRelease(c
->reply
);
8396 freeClientMultiState(c
);
8400 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
8401 * error (the append only file is zero-length) REDIS_ERR is returned. On
8402 * fatal error an error message is logged and the program exists. */
8403 int loadAppendOnlyFile(char *filename
) {
8404 struct redisClient
*fakeClient
;
8405 FILE *fp
= fopen(filename
,"r");
8406 struct redis_stat sb
;
8407 unsigned long long loadedkeys
= 0;
8408 int appendonly
= server
.appendonly
;
8410 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
8414 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
8418 /* Temporarily disable AOF, to prevent EXEC from feeding a MULTI
8419 * to the same file we're about to read. */
8420 server
.appendonly
= 0;
8422 fakeClient
= createFakeClient();
8429 struct redisCommand
*cmd
;
8431 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
8437 if (buf
[0] != '*') goto fmterr
;
8439 argv
= zmalloc(sizeof(robj
*)*argc
);
8440 for (j
= 0; j
< argc
; j
++) {
8441 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
8442 if (buf
[0] != '$') goto fmterr
;
8443 len
= strtol(buf
+1,NULL
,10);
8444 argsds
= sdsnewlen(NULL
,len
);
8445 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
8446 argv
[j
] = createObject(REDIS_STRING
,argsds
);
8447 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
8450 /* Command lookup */
8451 cmd
= lookupCommand(argv
[0]->ptr
);
8453 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
8456 /* Try object encoding */
8457 if (cmd
->flags
& REDIS_CMD_BULK
)
8458 argv
[argc
-1] = tryObjectEncoding(argv
[argc
-1]);
8459 /* Run the command in the context of a fake client */
8460 fakeClient
->argc
= argc
;
8461 fakeClient
->argv
= argv
;
8462 cmd
->proc(fakeClient
);
8463 /* Discard the reply objects list from the fake client */
8464 while(listLength(fakeClient
->reply
))
8465 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
8466 /* Clean up, ready for the next command */
8467 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
8469 /* Handle swapping while loading big datasets when VM is on */
8471 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
8472 while (zmalloc_used_memory() > server
.vm_max_memory
) {
8473 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
8478 /* This point can only be reached when EOF is reached without errors.
8479 * If the client is in the middle of a MULTI/EXEC, log error and quit. */
8480 if (fakeClient
->flags
& REDIS_MULTI
) goto readerr
;
8483 freeFakeClient(fakeClient
);
8484 server
.appendonly
= appendonly
;
8489 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
8491 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
8495 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
8499 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
8500 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
8504 /* Avoid the incr/decr ref count business if possible to help
8505 * copy-on-write (we are often in a child process when this function
8507 * Also makes sure that key objects don't get incrRefCount-ed when VM
8509 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
8510 obj
= getDecodedObject(obj
);
8513 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
));
8514 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
;
8515 if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0)
8517 if (fwrite("\r\n",2,1,fp
) == 0) goto err
;
8518 if (decrrc
) decrRefCount(obj
);
8521 if (decrrc
) decrRefCount(obj
);
8525 /* Write binary-safe string into a file in the bulkformat
8526 * $<count>\r\n<payload>\r\n */
8527 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
8530 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
);
8531 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8532 if (len
&& fwrite(s
,len
,1,fp
) == 0) return 0;
8533 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
8537 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
8538 static int fwriteBulkDouble(FILE *fp
, double d
) {
8539 char buf
[128], dbuf
[128];
8541 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
8542 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
8543 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8544 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
8548 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
8549 static int fwriteBulkLong(FILE *fp
, long l
) {
8550 char buf
[128], lbuf
[128];
8552 snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
);
8553 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2);
8554 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8555 if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0;
8559 /* Write a sequence of commands able to fully rebuild the dataset into
8560 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
8561 static int rewriteAppendOnlyFile(char *filename
) {
8562 dictIterator
*di
= NULL
;
8567 time_t now
= time(NULL
);
8569 /* Note that we have to use a different temp name here compared to the
8570 * one used by rewriteAppendOnlyFileBackground() function. */
8571 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
8572 fp
= fopen(tmpfile
,"w");
8574 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
8577 for (j
= 0; j
< server
.dbnum
; j
++) {
8578 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
8579 redisDb
*db
= server
.db
+j
;
8581 if (dictSize(d
) == 0) continue;
8582 di
= dictGetIterator(d
);
8588 /* SELECT the new DB */
8589 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
8590 if (fwriteBulkLong(fp
,j
) == 0) goto werr
;
8592 /* Iterate this DB writing every entry */
8593 while((de
= dictNext(di
)) != NULL
) {
8598 key
= dictGetEntryKey(de
);
8599 /* If the value for this key is swapped, load a preview in memory.
8600 * We use a "swapped" flag to remember if we need to free the
8601 * value object instead to just increment the ref count anyway
8602 * in order to avoid copy-on-write of pages if we are forked() */
8603 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
8604 key
->storage
== REDIS_VM_SWAPPING
) {
8605 o
= dictGetEntryVal(de
);
8608 o
= vmPreviewObject(key
);
8611 expiretime
= getExpire(db
,key
);
8613 /* Save the key and associated value */
8614 if (o
->type
== REDIS_STRING
) {
8615 /* Emit a SET command */
8616 char cmd
[]="*3\r\n$3\r\nSET\r\n";
8617 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8619 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8620 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
8621 } else if (o
->type
== REDIS_LIST
) {
8622 /* Emit the RPUSHes needed to rebuild the list */
8623 list
*list
= o
->ptr
;
8627 listRewind(list
,&li
);
8628 while((ln
= listNext(&li
))) {
8629 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
8630 robj
*eleobj
= listNodeValue(ln
);
8632 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8633 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8634 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8636 } else if (o
->type
== REDIS_SET
) {
8637 /* Emit the SADDs needed to rebuild the set */
8639 dictIterator
*di
= dictGetIterator(set
);
8642 while((de
= dictNext(di
)) != NULL
) {
8643 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
8644 robj
*eleobj
= dictGetEntryKey(de
);
8646 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8647 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8648 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8650 dictReleaseIterator(di
);
8651 } else if (o
->type
== REDIS_ZSET
) {
8652 /* Emit the ZADDs needed to rebuild the sorted set */
8654 dictIterator
*di
= dictGetIterator(zs
->dict
);
8657 while((de
= dictNext(di
)) != NULL
) {
8658 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
8659 robj
*eleobj
= dictGetEntryKey(de
);
8660 double *score
= dictGetEntryVal(de
);
8662 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8663 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8664 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
8665 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8667 dictReleaseIterator(di
);
8668 } else if (o
->type
== REDIS_HASH
) {
8669 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
8671 /* Emit the HSETs needed to rebuild the hash */
8672 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8673 unsigned char *p
= zipmapRewind(o
->ptr
);
8674 unsigned char *field
, *val
;
8675 unsigned int flen
, vlen
;
8677 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
8678 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8679 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8680 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
8682 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
8686 dictIterator
*di
= dictGetIterator(o
->ptr
);
8689 while((de
= dictNext(di
)) != NULL
) {
8690 robj
*field
= dictGetEntryKey(de
);
8691 robj
*val
= dictGetEntryVal(de
);
8693 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8694 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8695 if (fwriteBulkObject(fp
,field
) == -1) return -1;
8696 if (fwriteBulkObject(fp
,val
) == -1) return -1;
8698 dictReleaseIterator(di
);
8701 redisPanic("Unknown object type");
8703 /* Save the expire time */
8704 if (expiretime
!= -1) {
8705 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
8706 /* If this key is already expired skip it */
8707 if (expiretime
< now
) continue;
8708 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8709 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8710 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
;
8712 if (swapped
) decrRefCount(o
);
8714 dictReleaseIterator(di
);
8717 /* Make sure data will not remain on the OS's output buffers */
8719 aof_fsync(fileno(fp
));
8722 /* Use RENAME to make sure the DB file is changed atomically only
8723 * if the generate DB file is ok. */
8724 if (rename(tmpfile
,filename
) == -1) {
8725 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
8729 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
8735 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
8736 if (di
) dictReleaseIterator(di
);
8740 /* This is how rewriting of the append only file in background works:
8742 * 1) The user calls BGREWRITEAOF
8743 * 2) Redis calls this function, that forks():
8744 * 2a) the child rewrite the append only file in a temp file.
8745 * 2b) the parent accumulates differences in server.bgrewritebuf.
8746 * 3) When the child finished '2a' exists.
8747 * 4) The parent will trap the exit code, if it's OK, will append the
8748 * data accumulated into server.bgrewritebuf into the temp file, and
8749 * finally will rename(2) the temp file in the actual file name.
8750 * The the new file is reopened as the new append only file. Profit!
8752 static int rewriteAppendOnlyFileBackground(void) {
8755 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
8756 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
8757 if ((childpid
= fork()) == 0) {
8761 if (server
.vm_enabled
) vmReopenSwapFile();
8763 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
8764 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
8771 if (childpid
== -1) {
8772 redisLog(REDIS_WARNING
,
8773 "Can't rewrite append only file in background: fork: %s",
8777 redisLog(REDIS_NOTICE
,
8778 "Background append only file rewriting started by pid %d",childpid
);
8779 server
.bgrewritechildpid
= childpid
;
8780 updateDictResizePolicy();
8781 /* We set appendseldb to -1 in order to force the next call to the
8782 * feedAppendOnlyFile() to issue a SELECT command, so the differences
8783 * accumulated by the parent into server.bgrewritebuf will start
8784 * with a SELECT statement and it will be safe to merge. */
8785 server
.appendseldb
= -1;
8788 return REDIS_OK
; /* unreached */
8791 static void bgrewriteaofCommand(redisClient
*c
) {
8792 if (server
.bgrewritechildpid
!= -1) {
8793 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
8796 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
8797 char *status
= "+Background append only file rewriting started\r\n";
8798 addReplySds(c
,sdsnew(status
));
8800 addReply(c
,shared
.err
);
8804 static void aofRemoveTempFile(pid_t childpid
) {
8807 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
8811 /* Virtual Memory is composed mainly of two subsystems:
8812 * - Blocking Virutal Memory
8813 * - Threaded Virtual Memory I/O
8814 * The two parts are not fully decoupled, but functions are split among two
8815 * different sections of the source code (delimited by comments) in order to
8816 * make more clear what functionality is about the blocking VM and what about
8817 * the threaded (not blocking) VM.
8821 * Redis VM is a blocking VM (one that blocks reading swapped values from
8822 * disk into memory when a value swapped out is needed in memory) that is made
8823 * unblocking by trying to examine the command argument vector in order to
8824 * load in background values that will likely be needed in order to exec
8825 * the command. The command is executed only once all the relevant keys
8826 * are loaded into memory.
8828 * This basically is almost as simple of a blocking VM, but almost as parallel
8829 * as a fully non-blocking VM.
8832 /* Called when the user switches from "appendonly yes" to "appendonly no"
8833 * at runtime using the CONFIG command. */
8834 static void stopAppendOnly(void) {
8835 flushAppendOnlyFile();
8836 aof_fsync(server
.appendfd
);
8837 close(server
.appendfd
);
8839 server
.appendfd
= -1;
8840 server
.appendseldb
= -1;
8841 server
.appendonly
= 0;
8842 /* rewrite operation in progress? kill it, wait child exit */
8843 if (server
.bgsavechildpid
!= -1) {
8846 if (kill(server
.bgsavechildpid
,SIGKILL
) != -1)
8847 wait3(&statloc
,0,NULL
);
8848 /* reset the buffer accumulating changes while the child saves */
8849 sdsfree(server
.bgrewritebuf
);
8850 server
.bgrewritebuf
= sdsempty();
8851 server
.bgsavechildpid
= -1;
8855 /* Called when the user switches from "appendonly no" to "appendonly yes"
8856 * at runtime using the CONFIG command. */
8857 static int startAppendOnly(void) {
8858 server
.appendonly
= 1;
8859 server
.lastfsync
= time(NULL
);
8860 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
8861 if (server
.appendfd
== -1) {
8862 redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, but I can't open the AOF file: %s",strerror(errno
));
8865 if (rewriteAppendOnlyFileBackground() == REDIS_ERR
) {
8866 server
.appendonly
= 0;
8867 close(server
.appendfd
);
8868 redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, I can't trigger a background AOF rewrite operation. Check the above logs for more info about the error.",strerror(errno
));
8874 /* =================== Virtual Memory - Blocking Side ====================== */
8876 static void vmInit(void) {
8882 if (server
.vm_max_threads
!= 0)
8883 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
8885 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
8886 /* Try to open the old swap file, otherwise create it */
8887 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
8888 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
8890 if (server
.vm_fp
== NULL
) {
8891 redisLog(REDIS_WARNING
,
8892 "Can't open the swap file: %s. Exiting.",
8896 server
.vm_fd
= fileno(server
.vm_fp
);
8897 /* Lock the swap file for writing, this is useful in order to avoid
8898 * another instance to use the same swap file for a config error. */
8899 fl
.l_type
= F_WRLCK
;
8900 fl
.l_whence
= SEEK_SET
;
8901 fl
.l_start
= fl
.l_len
= 0;
8902 if (fcntl(server
.vm_fd
,F_SETLK
,&fl
) == -1) {
8903 redisLog(REDIS_WARNING
,
8904 "Can't lock the swap file at '%s': %s. Make sure it is not used by another Redis instance.", server
.vm_swap_file
, strerror(errno
));
8908 server
.vm_next_page
= 0;
8909 server
.vm_near_pages
= 0;
8910 server
.vm_stats_used_pages
= 0;
8911 server
.vm_stats_swapped_objects
= 0;
8912 server
.vm_stats_swapouts
= 0;
8913 server
.vm_stats_swapins
= 0;
8914 totsize
= server
.vm_pages
*server
.vm_page_size
;
8915 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
8916 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
8917 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
8921 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
8923 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
8924 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
8925 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
8926 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
8928 /* Initialize threaded I/O (used by Virtual Memory) */
8929 server
.io_newjobs
= listCreate();
8930 server
.io_processing
= listCreate();
8931 server
.io_processed
= listCreate();
8932 server
.io_ready_clients
= listCreate();
8933 pthread_mutex_init(&server
.io_mutex
,NULL
);
8934 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
8935 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
8936 server
.io_active_threads
= 0;
8937 if (pipe(pipefds
) == -1) {
8938 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
8942 server
.io_ready_pipe_read
= pipefds
[0];
8943 server
.io_ready_pipe_write
= pipefds
[1];
8944 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
8945 /* LZF requires a lot of stack */
8946 pthread_attr_init(&server
.io_threads_attr
);
8947 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
8948 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
8949 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
8950 /* Listen for events in the threaded I/O pipe */
8951 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
8952 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
8953 oom("creating file event");
8956 /* Mark the page as used */
8957 static void vmMarkPageUsed(off_t page
) {
8958 off_t byte
= page
/8;
8960 redisAssert(vmFreePage(page
) == 1);
8961 server
.vm_bitmap
[byte
] |= 1<<bit
;
8964 /* Mark N contiguous pages as used, with 'page' being the first. */
8965 static void vmMarkPagesUsed(off_t page
, off_t count
) {
8968 for (j
= 0; j
< count
; j
++)
8969 vmMarkPageUsed(page
+j
);
8970 server
.vm_stats_used_pages
+= count
;
8971 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
8972 (long long)count
, (long long)page
);
8975 /* Mark the page as free */
8976 static void vmMarkPageFree(off_t page
) {
8977 off_t byte
= page
/8;
8979 redisAssert(vmFreePage(page
) == 0);
8980 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
8983 /* Mark N contiguous pages as free, with 'page' being the first. */
8984 static void vmMarkPagesFree(off_t page
, off_t count
) {
8987 for (j
= 0; j
< count
; j
++)
8988 vmMarkPageFree(page
+j
);
8989 server
.vm_stats_used_pages
-= count
;
8990 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
8991 (long long)count
, (long long)page
);
8994 /* Test if the page is free */
8995 static int vmFreePage(off_t page
) {
8996 off_t byte
= page
/8;
8998 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
9001 /* Find N contiguous free pages storing the first page of the cluster in *first.
9002 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
9003 * REDIS_ERR is returned.
9005 * This function uses a simple algorithm: we try to allocate
9006 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
9007 * again from the start of the swap file searching for free spaces.
9009 * If it looks pretty clear that there are no free pages near our offset
9010 * we try to find less populated places doing a forward jump of
9011 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
9012 * without hurry, and then we jump again and so forth...
9014 * This function can be improved using a free list to avoid to guess
9015 * too much, since we could collect data about freed pages.
9017 * note: I implemented this function just after watching an episode of
9018 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
9020 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
9021 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
9023 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
9024 server
.vm_near_pages
= 0;
9025 server
.vm_next_page
= 0;
9027 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
9028 base
= server
.vm_next_page
;
9030 while(offset
< server
.vm_pages
) {
9031 off_t
this = base
+offset
;
9033 /* If we overflow, restart from page zero */
9034 if (this >= server
.vm_pages
) {
9035 this -= server
.vm_pages
;
9037 /* Just overflowed, what we found on tail is no longer
9038 * interesting, as it's no longer contiguous. */
9042 if (vmFreePage(this)) {
9043 /* This is a free page */
9045 /* Already got N free pages? Return to the caller, with success */
9047 *first
= this-(n
-1);
9048 server
.vm_next_page
= this+1;
9049 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
9053 /* The current one is not a free page */
9057 /* Fast-forward if the current page is not free and we already
9058 * searched enough near this place. */
9060 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
9061 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
9063 /* Note that even if we rewind after the jump, we are don't need
9064 * to make sure numfree is set to zero as we only jump *if* it
9065 * is set to zero. */
9067 /* Otherwise just check the next page */
9074 /* Write the specified object at the specified page of the swap file */
9075 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
9076 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
9077 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
9078 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9079 redisLog(REDIS_WARNING
,
9080 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
9084 rdbSaveObject(server
.vm_fp
,o
);
9085 fflush(server
.vm_fp
);
9086 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9090 /* Swap the 'val' object relative to 'key' into disk. Store all the information
9091 * needed to later retrieve the object into the key object.
9092 * If we can't find enough contiguous empty pages to swap the object on disk
9093 * REDIS_ERR is returned. */
9094 static int vmSwapObjectBlocking(robj
*key
, robj
*val
) {
9095 off_t pages
= rdbSavedObjectPages(val
,NULL
);
9098 assert(key
->storage
== REDIS_VM_MEMORY
);
9099 assert(key
->refcount
== 1);
9100 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
;
9101 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
;
9102 key
->vm
.page
= page
;
9103 key
->vm
.usedpages
= pages
;
9104 key
->storage
= REDIS_VM_SWAPPED
;
9105 key
->vtype
= val
->type
;
9106 decrRefCount(val
); /* Deallocate the object from memory. */
9107 vmMarkPagesUsed(page
,pages
);
9108 redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)",
9109 (unsigned char*) key
->ptr
,
9110 (unsigned long long) page
, (unsigned long long) pages
);
9111 server
.vm_stats_swapped_objects
++;
9112 server
.vm_stats_swapouts
++;
9116 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
9119 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
9120 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
9121 redisLog(REDIS_WARNING
,
9122 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
9126 o
= rdbLoadObject(type
,server
.vm_fp
);
9128 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
9131 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9135 /* Load the value object relative to the 'key' object from swap to memory.
9136 * The newly allocated object is returned.
9138 * If preview is true the unserialized object is returned to the caller but
9139 * no changes are made to the key object, nor the pages are marked as freed */
9140 static robj
*vmGenericLoadObject(robj
*key
, int preview
) {
9143 redisAssert(key
->storage
== REDIS_VM_SWAPPED
|| key
->storage
== REDIS_VM_LOADING
);
9144 val
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
);
9146 key
->storage
= REDIS_VM_MEMORY
;
9147 key
->vm
.atime
= server
.unixtime
;
9148 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
9149 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk",
9150 (unsigned char*) key
->ptr
);
9151 server
.vm_stats_swapped_objects
--;
9153 redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk",
9154 (unsigned char*) key
->ptr
);
9156 server
.vm_stats_swapins
++;
9160 /* Plain object loading, from swap to memory */
9161 static robj
*vmLoadObject(robj
*key
) {
9162 /* If we are loading the object in background, stop it, we
9163 * need to load this object synchronously ASAP. */
9164 if (key
->storage
== REDIS_VM_LOADING
)
9165 vmCancelThreadedIOJob(key
);
9166 return vmGenericLoadObject(key
,0);
9169 /* Just load the value on disk, without to modify the key.
9170 * This is useful when we want to perform some operation on the value
9171 * without to really bring it from swap to memory, like while saving the
9172 * dataset or rewriting the append only log. */
9173 static robj
*vmPreviewObject(robj
*key
) {
9174 return vmGenericLoadObject(key
,1);
9177 /* How a good candidate is this object for swapping?
9178 * The better candidate it is, the greater the returned value.
9180 * Currently we try to perform a fast estimation of the object size in
9181 * memory, and combine it with aging informations.
9183 * Basically swappability = idle-time * log(estimated size)
9185 * Bigger objects are preferred over smaller objects, but not
9186 * proportionally, this is why we use the logarithm. This algorithm is
9187 * just a first try and will probably be tuned later. */
9188 static double computeObjectSwappability(robj
*o
) {
9189 time_t age
= server
.unixtime
- o
->vm
.atime
;
9193 struct dictEntry
*de
;
9196 if (age
<= 0) return 0;
9199 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
9202 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
9207 listNode
*ln
= listFirst(l
);
9209 asize
= sizeof(list
);
9211 robj
*ele
= ln
->value
;
9214 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9215 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9217 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
9222 z
= (o
->type
== REDIS_ZSET
);
9223 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
9225 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
9226 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
9231 de
= dictGetRandomKey(d
);
9232 ele
= dictGetEntryKey(de
);
9233 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9234 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9236 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
9237 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
9241 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
9242 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
9243 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
9244 unsigned int klen
, vlen
;
9245 unsigned char *key
, *val
;
9247 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
9251 asize
= len
*(klen
+vlen
+3);
9252 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
9254 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
9259 de
= dictGetRandomKey(d
);
9260 ele
= dictGetEntryKey(de
);
9261 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9262 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9264 ele
= dictGetEntryVal(de
);
9265 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9266 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9268 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
9273 return (double)age
*log(1+asize
);
9276 /* Try to swap an object that's a good candidate for swapping.
9277 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
9278 * to swap any object at all.
9280 * If 'usethreaded' is true, Redis will try to swap the object in background
9281 * using I/O threads. */
9282 static int vmSwapOneObject(int usethreads
) {
9284 struct dictEntry
*best
= NULL
;
9285 double best_swappability
= 0;
9286 redisDb
*best_db
= NULL
;
9289 for (j
= 0; j
< server
.dbnum
; j
++) {
9290 redisDb
*db
= server
.db
+j
;
9291 /* Why maxtries is set to 100?
9292 * Because this way (usually) we'll find 1 object even if just 1% - 2%
9293 * are swappable objects */
9296 if (dictSize(db
->dict
) == 0) continue;
9297 for (i
= 0; i
< 5; i
++) {
9299 double swappability
;
9301 if (maxtries
) maxtries
--;
9302 de
= dictGetRandomKey(db
->dict
);
9303 key
= dictGetEntryKey(de
);
9304 val
= dictGetEntryVal(de
);
9305 /* Only swap objects that are currently in memory.
9307 * Also don't swap shared objects if threaded VM is on, as we
9308 * try to ensure that the main thread does not touch the
9309 * object while the I/O thread is using it, but we can't
9310 * control other keys without adding additional mutex. */
9311 if (key
->storage
!= REDIS_VM_MEMORY
||
9312 (server
.vm_max_threads
!= 0 && val
->refcount
!= 1)) {
9313 if (maxtries
) i
--; /* don't count this try */
9316 swappability
= computeObjectSwappability(val
);
9317 if (!best
|| swappability
> best_swappability
) {
9319 best_swappability
= swappability
;
9324 if (best
== NULL
) return REDIS_ERR
;
9325 key
= dictGetEntryKey(best
);
9326 val
= dictGetEntryVal(best
);
9328 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
9329 key
->ptr
, best_swappability
);
9331 /* Unshare the key if needed */
9332 if (key
->refcount
> 1) {
9333 robj
*newkey
= dupStringObject(key
);
9335 key
= dictGetEntryKey(best
) = newkey
;
9339 vmSwapObjectThreaded(key
,val
,best_db
);
9342 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
9343 dictGetEntryVal(best
) = NULL
;
9351 static int vmSwapOneObjectBlocking() {
9352 return vmSwapOneObject(0);
9355 static int vmSwapOneObjectThreaded() {
9356 return vmSwapOneObject(1);
9359 /* Return true if it's safe to swap out objects in a given moment.
9360 * Basically we don't want to swap objects out while there is a BGSAVE
9361 * or a BGAEOREWRITE running in backgroud. */
9362 static int vmCanSwapOut(void) {
9363 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
9366 /* Delete a key if swapped. Returns 1 if the key was found, was swapped
9367 * and was deleted. Otherwise 0 is returned. */
9368 static int deleteIfSwapped(redisDb
*db
, robj
*key
) {
9372 if ((de
= dictFind(db
->dict
,key
)) == NULL
) return 0;
9373 foundkey
= dictGetEntryKey(de
);
9374 if (foundkey
->storage
== REDIS_VM_MEMORY
) return 0;
9379 /* =================== Virtual Memory - Threaded I/O ======================= */
9381 static void freeIOJob(iojob
*j
) {
9382 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
9383 j
->type
== REDIS_IOJOB_DO_SWAP
||
9384 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
9385 decrRefCount(j
->val
);
9386 /* We don't decrRefCount the j->key field as we did't incremented
9387 * the count creating IO Jobs. This is because the key field here is
9388 * just used as an indentifier and if a key is removed the Job should
9389 * never be touched again. */
9393 /* Every time a thread finished a Job, it writes a byte into the write side
9394 * of an unix pipe in order to "awake" the main thread, and this function
9396 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
9400 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
9402 REDIS_NOTUSED(mask
);
9403 REDIS_NOTUSED(privdata
);
9405 /* For every byte we read in the read side of the pipe, there is one
9406 * I/O job completed to process. */
9407 while((retval
= read(fd
,buf
,1)) == 1) {
9411 struct dictEntry
*de
;
9413 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
9415 /* Get the processed element (the oldest one) */
9417 assert(listLength(server
.io_processed
) != 0);
9418 if (toprocess
== -1) {
9419 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
9420 if (toprocess
<= 0) toprocess
= 1;
9422 ln
= listFirst(server
.io_processed
);
9424 listDelNode(server
.io_processed
,ln
);
9426 /* If this job is marked as canceled, just ignore it */
9431 /* Post process it in the main thread, as there are things we
9432 * can do just here to avoid race conditions and/or invasive locks */
9433 redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
);
9434 de
= dictFind(j
->db
->dict
,j
->key
);
9436 key
= dictGetEntryKey(de
);
9437 if (j
->type
== REDIS_IOJOB_LOAD
) {
9440 /* Key loaded, bring it at home */
9441 key
->storage
= REDIS_VM_MEMORY
;
9442 key
->vm
.atime
= server
.unixtime
;
9443 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
9444 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
9445 (unsigned char*) key
->ptr
);
9446 server
.vm_stats_swapped_objects
--;
9447 server
.vm_stats_swapins
++;
9448 dictGetEntryVal(de
) = j
->val
;
9449 incrRefCount(j
->val
);
9452 /* Handle clients waiting for this key to be loaded. */
9453 handleClientsBlockedOnSwappedKey(db
,key
);
9454 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9455 /* Now we know the amount of pages required to swap this object.
9456 * Let's find some space for it, and queue this task again
9457 * rebranded as REDIS_IOJOB_DO_SWAP. */
9458 if (!vmCanSwapOut() ||
9459 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
9461 /* Ooops... no space or we can't swap as there is
9462 * a fork()ed Redis trying to save stuff on disk. */
9464 key
->storage
= REDIS_VM_MEMORY
; /* undo operation */
9466 /* Note that we need to mark this pages as used now,
9467 * if the job will be canceled, we'll mark them as freed
9469 vmMarkPagesUsed(j
->page
,j
->pages
);
9470 j
->type
= REDIS_IOJOB_DO_SWAP
;
9475 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9478 /* Key swapped. We can finally free some memory. */
9479 if (key
->storage
!= REDIS_VM_SWAPPING
) {
9480 printf("key->storage: %d\n",key
->storage
);
9481 printf("key->name: %s\n",(char*)key
->ptr
);
9482 printf("key->refcount: %d\n",key
->refcount
);
9483 printf("val: %p\n",(void*)j
->val
);
9484 printf("val->type: %d\n",j
->val
->type
);
9485 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
9487 redisAssert(key
->storage
== REDIS_VM_SWAPPING
);
9488 val
= dictGetEntryVal(de
);
9489 key
->vm
.page
= j
->page
;
9490 key
->vm
.usedpages
= j
->pages
;
9491 key
->storage
= REDIS_VM_SWAPPED
;
9492 key
->vtype
= j
->val
->type
;
9493 decrRefCount(val
); /* Deallocate the object from memory. */
9494 dictGetEntryVal(de
) = NULL
;
9495 redisLog(REDIS_DEBUG
,
9496 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
9497 (unsigned char*) key
->ptr
,
9498 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
9499 server
.vm_stats_swapped_objects
++;
9500 server
.vm_stats_swapouts
++;
9502 /* Put a few more swap requests in queue if we are still
9504 if (trytoswap
&& vmCanSwapOut() &&
9505 zmalloc_used_memory() > server
.vm_max_memory
)
9510 more
= listLength(server
.io_newjobs
) <
9511 (unsigned) server
.vm_max_threads
;
9513 /* Don't waste CPU time if swappable objects are rare. */
9514 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
9522 if (processed
== toprocess
) return;
9524 if (retval
< 0 && errno
!= EAGAIN
) {
9525 redisLog(REDIS_WARNING
,
9526 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
9531 static void lockThreadedIO(void) {
9532 pthread_mutex_lock(&server
.io_mutex
);
9535 static void unlockThreadedIO(void) {
9536 pthread_mutex_unlock(&server
.io_mutex
);
9539 /* Remove the specified object from the threaded I/O queue if still not
9540 * processed, otherwise make sure to flag it as canceled. */
9541 static void vmCancelThreadedIOJob(robj
*o
) {
9543 server
.io_newjobs
, /* 0 */
9544 server
.io_processing
, /* 1 */
9545 server
.io_processed
/* 2 */
9549 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
9552 /* Search for a matching key in one of the queues */
9553 for (i
= 0; i
< 3; i
++) {
9557 listRewind(lists
[i
],&li
);
9558 while ((ln
= listNext(&li
)) != NULL
) {
9559 iojob
*job
= ln
->value
;
9561 if (job
->canceled
) continue; /* Skip this, already canceled. */
9562 if (job
->key
== o
) {
9563 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n",
9564 (void*)job
, (char*)o
->ptr
, job
->type
, i
);
9565 /* Mark the pages as free since the swap didn't happened
9566 * or happened but is now discarded. */
9567 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
9568 vmMarkPagesFree(job
->page
,job
->pages
);
9569 /* Cancel the job. It depends on the list the job is
9572 case 0: /* io_newjobs */
9573 /* If the job was yet not processed the best thing to do
9574 * is to remove it from the queue at all */
9576 listDelNode(lists
[i
],ln
);
9578 case 1: /* io_processing */
9579 /* Oh Shi- the thread is messing with the Job:
9581 * Probably it's accessing the object if this is a
9582 * PREPARE_SWAP or DO_SWAP job.
9583 * If it's a LOAD job it may be reading from disk and
9584 * if we don't wait for the job to terminate before to
9585 * cancel it, maybe in a few microseconds data can be
9586 * corrupted in this pages. So the short story is:
9588 * Better to wait for the job to move into the
9589 * next queue (processed)... */
9591 /* We try again and again until the job is completed. */
9593 /* But let's wait some time for the I/O thread
9594 * to finish with this job. After all this condition
9595 * should be very rare. */
9598 case 2: /* io_processed */
9599 /* The job was already processed, that's easy...
9600 * just mark it as canceled so that we'll ignore it
9601 * when processing completed jobs. */
9605 /* Finally we have to adjust the storage type of the object
9606 * in order to "UNDO" the operaiton. */
9607 if (o
->storage
== REDIS_VM_LOADING
)
9608 o
->storage
= REDIS_VM_SWAPPED
;
9609 else if (o
->storage
== REDIS_VM_SWAPPING
)
9610 o
->storage
= REDIS_VM_MEMORY
;
9617 assert(1 != 1); /* We should never reach this */
9620 static void *IOThreadEntryPoint(void *arg
) {
9625 pthread_detach(pthread_self());
9627 /* Get a new job to process */
9629 if (listLength(server
.io_newjobs
) == 0) {
9630 /* No new jobs in queue, exit. */
9631 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
9632 (long) pthread_self());
9633 server
.io_active_threads
--;
9637 ln
= listFirst(server
.io_newjobs
);
9639 listDelNode(server
.io_newjobs
,ln
);
9640 /* Add the job in the processing queue */
9641 j
->thread
= pthread_self();
9642 listAddNodeTail(server
.io_processing
,j
);
9643 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
9645 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
9646 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
9648 /* Process the Job */
9649 if (j
->type
== REDIS_IOJOB_LOAD
) {
9650 j
->val
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
);
9651 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9652 FILE *fp
= fopen("/dev/null","w+");
9653 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
9655 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9656 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
9660 /* Done: insert the job into the processed queue */
9661 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
9662 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
9664 listDelNode(server
.io_processing
,ln
);
9665 listAddNodeTail(server
.io_processed
,j
);
9668 /* Signal the main thread there is new stuff to process */
9669 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
9671 return NULL
; /* never reached */
9674 static void spawnIOThread(void) {
9676 sigset_t mask
, omask
;
9680 sigaddset(&mask
,SIGCHLD
);
9681 sigaddset(&mask
,SIGHUP
);
9682 sigaddset(&mask
,SIGPIPE
);
9683 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
9684 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
9685 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
9689 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
9690 server
.io_active_threads
++;
9693 /* We need to wait for the last thread to exit before we are able to
9694 * fork() in order to BGSAVE or BGREWRITEAOF. */
9695 static void waitEmptyIOJobsQueue(void) {
9697 int io_processed_len
;
9700 if (listLength(server
.io_newjobs
) == 0 &&
9701 listLength(server
.io_processing
) == 0 &&
9702 server
.io_active_threads
== 0)
9707 /* While waiting for empty jobs queue condition we post-process some
9708 * finshed job, as I/O threads may be hanging trying to write against
9709 * the io_ready_pipe_write FD but there are so much pending jobs that
9711 io_processed_len
= listLength(server
.io_processed
);
9713 if (io_processed_len
) {
9714 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
9715 usleep(1000); /* 1 millisecond */
9717 usleep(10000); /* 10 milliseconds */
9722 static void vmReopenSwapFile(void) {
9723 /* Note: we don't close the old one as we are in the child process
9724 * and don't want to mess at all with the original file object. */
9725 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
9726 if (server
.vm_fp
== NULL
) {
9727 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
9728 server
.vm_swap_file
);
9731 server
.vm_fd
= fileno(server
.vm_fp
);
9734 /* This function must be called while with threaded IO locked */
9735 static void queueIOJob(iojob
*j
) {
9736 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
9737 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
9738 listAddNodeTail(server
.io_newjobs
,j
);
9739 if (server
.io_active_threads
< server
.vm_max_threads
)
9743 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
9746 assert(key
->storage
== REDIS_VM_MEMORY
);
9747 assert(key
->refcount
== 1);
9749 j
= zmalloc(sizeof(*j
));
9750 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
9756 j
->thread
= (pthread_t
) -1;
9757 key
->storage
= REDIS_VM_SWAPPING
;
9765 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
9767 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
9768 * If there is not already a job loading the key, it is craeted.
9769 * The key is added to the io_keys list in the client structure, and also
9770 * in the hash table mapping swapped keys to waiting clients, that is,
9771 * server.io_waited_keys. */
9772 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
9773 struct dictEntry
*de
;
9777 /* If the key does not exist or is already in RAM we don't need to
9778 * block the client at all. */
9779 de
= dictFind(c
->db
->dict
,key
);
9780 if (de
== NULL
) return 0;
9781 o
= dictGetEntryKey(de
);
9782 if (o
->storage
== REDIS_VM_MEMORY
) {
9784 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
9785 /* We were swapping the key, undo it! */
9786 vmCancelThreadedIOJob(o
);
9790 /* OK: the key is either swapped, or being loaded just now. */
9792 /* Add the key to the list of keys this client is waiting for.
9793 * This maps clients to keys they are waiting for. */
9794 listAddNodeTail(c
->io_keys
,key
);
9797 /* Add the client to the swapped keys => clients waiting map. */
9798 de
= dictFind(c
->db
->io_keys
,key
);
9802 /* For every key we take a list of clients blocked for it */
9804 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
9806 assert(retval
== DICT_OK
);
9808 l
= dictGetEntryVal(de
);
9810 listAddNodeTail(l
,c
);
9812 /* Are we already loading the key from disk? If not create a job */
9813 if (o
->storage
== REDIS_VM_SWAPPED
) {
9816 o
->storage
= REDIS_VM_LOADING
;
9817 j
= zmalloc(sizeof(*j
));
9818 j
->type
= REDIS_IOJOB_LOAD
;
9821 j
->key
->vtype
= o
->vtype
;
9822 j
->page
= o
->vm
.page
;
9825 j
->thread
= (pthread_t
) -1;
9833 /* Preload keys for any command with first, last and step values for
9834 * the command keys prototype, as defined in the command table. */
9835 static void waitForMultipleSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
9837 if (cmd
->vm_firstkey
== 0) return;
9838 last
= cmd
->vm_lastkey
;
9839 if (last
< 0) last
= argc
+last
;
9840 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
) {
9841 redisAssert(j
< argc
);
9842 waitForSwappedKey(c
,argv
[j
]);
9846 /* Preload keys needed for the ZUNIONSTORE and ZINTERSTORE commands.
9847 * Note that the number of keys to preload is user-defined, so we need to
9848 * apply a sanity check against argc. */
9849 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
9853 num
= atoi(argv
[2]->ptr
);
9854 if (num
> (argc
-3)) return;
9855 for (i
= 0; i
< num
; i
++) {
9856 waitForSwappedKey(c
,argv
[3+i
]);
9860 /* Preload keys needed to execute the entire MULTI/EXEC block.
9862 * This function is called by blockClientOnSwappedKeys when EXEC is issued,
9863 * and will block the client when any command requires a swapped out value. */
9864 static void execBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
9866 struct redisCommand
*mcmd
;
9869 REDIS_NOTUSED(argc
);
9870 REDIS_NOTUSED(argv
);
9872 if (!(c
->flags
& REDIS_MULTI
)) return;
9873 for (i
= 0; i
< c
->mstate
.count
; i
++) {
9874 mcmd
= c
->mstate
.commands
[i
].cmd
;
9875 margc
= c
->mstate
.commands
[i
].argc
;
9876 margv
= c
->mstate
.commands
[i
].argv
;
9878 if (mcmd
->vm_preload_proc
!= NULL
) {
9879 mcmd
->vm_preload_proc(c
,mcmd
,margc
,margv
);
9881 waitForMultipleSwappedKeys(c
,mcmd
,margc
,margv
);
9886 /* Is this client attempting to run a command against swapped keys?
9887 * If so, block it ASAP, load the keys in background, then resume it.
9889 * The important idea about this function is that it can fail! If keys will
9890 * still be swapped when the client is resumed, this key lookups will
9891 * just block loading keys from disk. In practical terms this should only
9892 * happen with SORT BY command or if there is a bug in this function.
9894 * Return 1 if the client is marked as blocked, 0 if the client can
9895 * continue as the keys it is going to access appear to be in memory. */
9896 static int blockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
) {
9897 if (cmd
->vm_preload_proc
!= NULL
) {
9898 cmd
->vm_preload_proc(c
,cmd
,c
->argc
,c
->argv
);
9900 waitForMultipleSwappedKeys(c
,cmd
,c
->argc
,c
->argv
);
9903 /* If the client was blocked for at least one key, mark it as blocked. */
9904 if (listLength(c
->io_keys
)) {
9905 c
->flags
|= REDIS_IO_WAIT
;
9906 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
9907 server
.vm_blocked_clients
++;
9914 /* Remove the 'key' from the list of blocked keys for a given client.
9916 * The function returns 1 when there are no longer blocking keys after
9917 * the current one was removed (and the client can be unblocked). */
9918 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
9922 struct dictEntry
*de
;
9924 /* Remove the key from the list of keys this client is waiting for. */
9925 listRewind(c
->io_keys
,&li
);
9926 while ((ln
= listNext(&li
)) != NULL
) {
9927 if (equalStringObjects(ln
->value
,key
)) {
9928 listDelNode(c
->io_keys
,ln
);
9934 /* Remove the client form the key => waiting clients map. */
9935 de
= dictFind(c
->db
->io_keys
,key
);
9937 l
= dictGetEntryVal(de
);
9938 ln
= listSearchKey(l
,c
);
9941 if (listLength(l
) == 0)
9942 dictDelete(c
->db
->io_keys
,key
);
9944 return listLength(c
->io_keys
) == 0;
9947 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
9948 struct dictEntry
*de
;
9953 de
= dictFind(db
->io_keys
,key
);
9956 l
= dictGetEntryVal(de
);
9957 len
= listLength(l
);
9958 /* Note: we can't use something like while(listLength(l)) as the list
9959 * can be freed by the calling function when we remove the last element. */
9962 redisClient
*c
= ln
->value
;
9964 if (dontWaitForSwappedKey(c
,key
)) {
9965 /* Put the client in the list of clients ready to go as we
9966 * loaded all the keys about it. */
9967 listAddNodeTail(server
.io_ready_clients
,c
);
9972 /* =========================== Remote Configuration ========================= */
9974 static void configSetCommand(redisClient
*c
) {
9975 robj
*o
= getDecodedObject(c
->argv
[3]);
9978 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
9979 zfree(server
.dbfilename
);
9980 server
.dbfilename
= zstrdup(o
->ptr
);
9981 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
9982 zfree(server
.requirepass
);
9983 server
.requirepass
= zstrdup(o
->ptr
);
9984 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
9985 zfree(server
.masterauth
);
9986 server
.masterauth
= zstrdup(o
->ptr
);
9987 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
9988 if (getLongLongFromObject(o
,&ll
) == REDIS_ERR
||
9989 ll
< 0) goto badfmt
;
9990 server
.maxmemory
= ll
;
9991 } else if (!strcasecmp(c
->argv
[2]->ptr
,"timeout")) {
9992 if (getLongLongFromObject(o
,&ll
) == REDIS_ERR
||
9993 ll
< 0 || ll
> LONG_MAX
) goto badfmt
;
9994 server
.maxidletime
= ll
;
9995 } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendfsync")) {
9996 if (!strcasecmp(o
->ptr
,"no")) {
9997 server
.appendfsync
= APPENDFSYNC_NO
;
9998 } else if (!strcasecmp(o
->ptr
,"everysec")) {
9999 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
10000 } else if (!strcasecmp(o
->ptr
,"always")) {
10001 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
10005 } else if (!strcasecmp(c
->argv
[2]->ptr
,"no-appendfsync-on-rewrite")) {
10006 int yn
= yesnotoi(o
->ptr
);
10008 if (yn
== -1) goto badfmt
;
10009 server
.no_appendfsync_on_rewrite
= yn
;
10010 } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendonly")) {
10011 int old
= server
.appendonly
;
10012 int new = yesnotoi(o
->ptr
);
10014 if (new == -1) goto badfmt
;
10019 if (startAppendOnly() == REDIS_ERR
) {
10020 addReplySds(c
,sdscatprintf(sdsempty(),
10021 "-ERR Unable to turn on AOF. Check server logs.\r\n"));
10027 } else if (!strcasecmp(c
->argv
[2]->ptr
,"save")) {
10029 sds
*v
= sdssplitlen(o
->ptr
,sdslen(o
->ptr
)," ",1,&vlen
);
10031 /* Perform sanity check before setting the new config:
10032 * - Even number of args
10033 * - Seconds >= 1, changes >= 0 */
10035 sdsfreesplitres(v
,vlen
);
10038 for (j
= 0; j
< vlen
; j
++) {
10042 val
= strtoll(v
[j
], &eptr
, 10);
10043 if (eptr
[0] != '\0' ||
10044 ((j
& 1) == 0 && val
< 1) ||
10045 ((j
& 1) == 1 && val
< 0)) {
10046 sdsfreesplitres(v
,vlen
);
10050 /* Finally set the new config */
10051 resetServerSaveParams();
10052 for (j
= 0; j
< vlen
; j
+= 2) {
10056 seconds
= strtoll(v
[j
],NULL
,10);
10057 changes
= strtoll(v
[j
+1],NULL
,10);
10058 appendServerSaveParams(seconds
, changes
);
10060 sdsfreesplitres(v
,vlen
);
10062 addReplySds(c
,sdscatprintf(sdsempty(),
10063 "-ERR not supported CONFIG parameter %s\r\n",
10064 (char*)c
->argv
[2]->ptr
));
10069 addReply(c
,shared
.ok
);
10072 badfmt
: /* Bad format errors */
10073 addReplySds(c
,sdscatprintf(sdsempty(),
10074 "-ERR invalid argument '%s' for CONFIG SET '%s'\r\n",
10076 (char*)c
->argv
[2]->ptr
));
10080 static void configGetCommand(redisClient
*c
) {
10081 robj
*o
= getDecodedObject(c
->argv
[2]);
10082 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
10083 char *pattern
= o
->ptr
;
10086 addReply(c
,lenobj
);
10087 decrRefCount(lenobj
);
10089 if (stringmatch(pattern
,"dbfilename",0)) {
10090 addReplyBulkCString(c
,"dbfilename");
10091 addReplyBulkCString(c
,server
.dbfilename
);
10094 if (stringmatch(pattern
,"requirepass",0)) {
10095 addReplyBulkCString(c
,"requirepass");
10096 addReplyBulkCString(c
,server
.requirepass
);
10099 if (stringmatch(pattern
,"masterauth",0)) {
10100 addReplyBulkCString(c
,"masterauth");
10101 addReplyBulkCString(c
,server
.masterauth
);
10104 if (stringmatch(pattern
,"maxmemory",0)) {
10107 ll2string(buf
,128,server
.maxmemory
);
10108 addReplyBulkCString(c
,"maxmemory");
10109 addReplyBulkCString(c
,buf
);
10112 if (stringmatch(pattern
,"timeout",0)) {
10115 ll2string(buf
,128,server
.maxidletime
);
10116 addReplyBulkCString(c
,"timeout");
10117 addReplyBulkCString(c
,buf
);
10120 if (stringmatch(pattern
,"appendonly",0)) {
10121 addReplyBulkCString(c
,"appendonly");
10122 addReplyBulkCString(c
,server
.appendonly
? "yes" : "no");
10125 if (stringmatch(pattern
,"no-appendfsync-on-rewrite",0)) {
10126 addReplyBulkCString(c
,"no-appendfsync-on-rewrite");
10127 addReplyBulkCString(c
,server
.no_appendfsync_on_rewrite
? "yes" : "no");
10130 if (stringmatch(pattern
,"appendfsync",0)) {
10133 switch(server
.appendfsync
) {
10134 case APPENDFSYNC_NO
: policy
= "no"; break;
10135 case APPENDFSYNC_EVERYSEC
: policy
= "everysec"; break;
10136 case APPENDFSYNC_ALWAYS
: policy
= "always"; break;
10137 default: policy
= "unknown"; break; /* too harmless to panic */
10139 addReplyBulkCString(c
,"appendfsync");
10140 addReplyBulkCString(c
,policy
);
10143 if (stringmatch(pattern
,"save",0)) {
10144 sds buf
= sdsempty();
10147 for (j
= 0; j
< server
.saveparamslen
; j
++) {
10148 buf
= sdscatprintf(buf
,"%ld %d",
10149 server
.saveparams
[j
].seconds
,
10150 server
.saveparams
[j
].changes
);
10151 if (j
!= server
.saveparamslen
-1)
10152 buf
= sdscatlen(buf
," ",1);
10154 addReplyBulkCString(c
,"save");
10155 addReplyBulkCString(c
,buf
);
10160 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
10163 static void configCommand(redisClient
*c
) {
10164 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
10165 if (c
->argc
!= 4) goto badarity
;
10166 configSetCommand(c
);
10167 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
10168 if (c
->argc
!= 3) goto badarity
;
10169 configGetCommand(c
);
10170 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
10171 if (c
->argc
!= 2) goto badarity
;
10172 server
.stat_numcommands
= 0;
10173 server
.stat_numconnections
= 0;
10174 server
.stat_expiredkeys
= 0;
10175 server
.stat_starttime
= time(NULL
);
10176 addReply(c
,shared
.ok
);
10178 addReplySds(c
,sdscatprintf(sdsempty(),
10179 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
10184 addReplySds(c
,sdscatprintf(sdsempty(),
10185 "-ERR Wrong number of arguments for CONFIG %s\r\n",
10186 (char*) c
->argv
[1]->ptr
));
10189 /* =========================== Pubsub implementation ======================== */
10191 static void freePubsubPattern(void *p
) {
10192 pubsubPattern
*pat
= p
;
10194 decrRefCount(pat
->pattern
);
10198 static int listMatchPubsubPattern(void *a
, void *b
) {
10199 pubsubPattern
*pa
= a
, *pb
= b
;
10201 return (pa
->client
== pb
->client
) &&
10202 (equalStringObjects(pa
->pattern
,pb
->pattern
));
10205 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
10206 * 0 if the client was already subscribed to that channel. */
10207 static int pubsubSubscribeChannel(redisClient
*c
, robj
*channel
) {
10208 struct dictEntry
*de
;
10209 list
*clients
= NULL
;
10212 /* Add the channel to the client -> channels hash table */
10213 if (dictAdd(c
->pubsub_channels
,channel
,NULL
) == DICT_OK
) {
10215 incrRefCount(channel
);
10216 /* Add the client to the channel -> list of clients hash table */
10217 de
= dictFind(server
.pubsub_channels
,channel
);
10219 clients
= listCreate();
10220 dictAdd(server
.pubsub_channels
,channel
,clients
);
10221 incrRefCount(channel
);
10223 clients
= dictGetEntryVal(de
);
10225 listAddNodeTail(clients
,c
);
10227 /* Notify the client */
10228 addReply(c
,shared
.mbulk3
);
10229 addReply(c
,shared
.subscribebulk
);
10230 addReplyBulk(c
,channel
);
10231 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
10235 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
10236 * 0 if the client was not subscribed to the specified channel. */
10237 static int pubsubUnsubscribeChannel(redisClient
*c
, robj
*channel
, int notify
) {
10238 struct dictEntry
*de
;
10243 /* Remove the channel from the client -> channels hash table */
10244 incrRefCount(channel
); /* channel may be just a pointer to the same object
10245 we have in the hash tables. Protect it... */
10246 if (dictDelete(c
->pubsub_channels
,channel
) == DICT_OK
) {
10248 /* Remove the client from the channel -> clients list hash table */
10249 de
= dictFind(server
.pubsub_channels
,channel
);
10250 assert(de
!= NULL
);
10251 clients
= dictGetEntryVal(de
);
10252 ln
= listSearchKey(clients
,c
);
10253 assert(ln
!= NULL
);
10254 listDelNode(clients
,ln
);
10255 if (listLength(clients
) == 0) {
10256 /* Free the list and associated hash entry at all if this was
10257 * the latest client, so that it will be possible to abuse
10258 * Redis PUBSUB creating millions of channels. */
10259 dictDelete(server
.pubsub_channels
,channel
);
10262 /* Notify the client */
10264 addReply(c
,shared
.mbulk3
);
10265 addReply(c
,shared
.unsubscribebulk
);
10266 addReplyBulk(c
,channel
);
10267 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+
10268 listLength(c
->pubsub_patterns
));
10271 decrRefCount(channel
); /* it is finally safe to release it */
10275 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */
10276 static int pubsubSubscribePattern(redisClient
*c
, robj
*pattern
) {
10279 if (listSearchKey(c
->pubsub_patterns
,pattern
) == NULL
) {
10281 pubsubPattern
*pat
;
10282 listAddNodeTail(c
->pubsub_patterns
,pattern
);
10283 incrRefCount(pattern
);
10284 pat
= zmalloc(sizeof(*pat
));
10285 pat
->pattern
= getDecodedObject(pattern
);
10287 listAddNodeTail(server
.pubsub_patterns
,pat
);
10289 /* Notify the client */
10290 addReply(c
,shared
.mbulk3
);
10291 addReply(c
,shared
.psubscribebulk
);
10292 addReplyBulk(c
,pattern
);
10293 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
10297 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
10298 * 0 if the client was not subscribed to the specified channel. */
10299 static int pubsubUnsubscribePattern(redisClient
*c
, robj
*pattern
, int notify
) {
10304 incrRefCount(pattern
); /* Protect the object. May be the same we remove */
10305 if ((ln
= listSearchKey(c
->pubsub_patterns
,pattern
)) != NULL
) {
10307 listDelNode(c
->pubsub_patterns
,ln
);
10309 pat
.pattern
= pattern
;
10310 ln
= listSearchKey(server
.pubsub_patterns
,&pat
);
10311 listDelNode(server
.pubsub_patterns
,ln
);
10313 /* Notify the client */
10315 addReply(c
,shared
.mbulk3
);
10316 addReply(c
,shared
.punsubscribebulk
);
10317 addReplyBulk(c
,pattern
);
10318 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+
10319 listLength(c
->pubsub_patterns
));
10321 decrRefCount(pattern
);
10325 /* Unsubscribe from all the channels. Return the number of channels the
10326 * client was subscribed from. */
10327 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
) {
10328 dictIterator
*di
= dictGetIterator(c
->pubsub_channels
);
10332 while((de
= dictNext(di
)) != NULL
) {
10333 robj
*channel
= dictGetEntryKey(de
);
10335 count
+= pubsubUnsubscribeChannel(c
,channel
,notify
);
10337 dictReleaseIterator(di
);
10341 /* Unsubscribe from all the patterns. Return the number of patterns the
10342 * client was subscribed from. */
10343 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
) {
10348 listRewind(c
->pubsub_patterns
,&li
);
10349 while ((ln
= listNext(&li
)) != NULL
) {
10350 robj
*pattern
= ln
->value
;
10352 count
+= pubsubUnsubscribePattern(c
,pattern
,notify
);
10357 /* Publish a message */
10358 static int pubsubPublishMessage(robj
*channel
, robj
*message
) {
10360 struct dictEntry
*de
;
10364 /* Send to clients listening for that channel */
10365 de
= dictFind(server
.pubsub_channels
,channel
);
10367 list
*list
= dictGetEntryVal(de
);
10371 listRewind(list
,&li
);
10372 while ((ln
= listNext(&li
)) != NULL
) {
10373 redisClient
*c
= ln
->value
;
10375 addReply(c
,shared
.mbulk3
);
10376 addReply(c
,shared
.messagebulk
);
10377 addReplyBulk(c
,channel
);
10378 addReplyBulk(c
,message
);
10382 /* Send to clients listening to matching channels */
10383 if (listLength(server
.pubsub_patterns
)) {
10384 listRewind(server
.pubsub_patterns
,&li
);
10385 channel
= getDecodedObject(channel
);
10386 while ((ln
= listNext(&li
)) != NULL
) {
10387 pubsubPattern
*pat
= ln
->value
;
10389 if (stringmatchlen((char*)pat
->pattern
->ptr
,
10390 sdslen(pat
->pattern
->ptr
),
10391 (char*)channel
->ptr
,
10392 sdslen(channel
->ptr
),0)) {
10393 addReply(pat
->client
,shared
.mbulk4
);
10394 addReply(pat
->client
,shared
.pmessagebulk
);
10395 addReplyBulk(pat
->client
,pat
->pattern
);
10396 addReplyBulk(pat
->client
,channel
);
10397 addReplyBulk(pat
->client
,message
);
10401 decrRefCount(channel
);
10406 static void subscribeCommand(redisClient
*c
) {
10409 for (j
= 1; j
< c
->argc
; j
++)
10410 pubsubSubscribeChannel(c
,c
->argv
[j
]);
10413 static void unsubscribeCommand(redisClient
*c
) {
10414 if (c
->argc
== 1) {
10415 pubsubUnsubscribeAllChannels(c
,1);
10420 for (j
= 1; j
< c
->argc
; j
++)
10421 pubsubUnsubscribeChannel(c
,c
->argv
[j
],1);
10425 static void psubscribeCommand(redisClient
*c
) {
10428 for (j
= 1; j
< c
->argc
; j
++)
10429 pubsubSubscribePattern(c
,c
->argv
[j
]);
10432 static void punsubscribeCommand(redisClient
*c
) {
10433 if (c
->argc
== 1) {
10434 pubsubUnsubscribeAllPatterns(c
,1);
10439 for (j
= 1; j
< c
->argc
; j
++)
10440 pubsubUnsubscribePattern(c
,c
->argv
[j
],1);
10444 static void publishCommand(redisClient
*c
) {
10445 int receivers
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]);
10446 addReplyLongLong(c
,receivers
);
10449 /* ===================== WATCH (CAS alike for MULTI/EXEC) ===================
10451 * The implementation uses a per-DB hash table mapping keys to list of clients
10452 * WATCHing those keys, so that given a key that is going to be modified
10453 * we can mark all the associated clients as dirty.
10455 * Also every client contains a list of WATCHed keys so that's possible to
10456 * un-watch such keys when the client is freed or when UNWATCH is called. */
10458 /* In the client->watched_keys list we need to use watchedKey structures
10459 * as in order to identify a key in Redis we need both the key name and the
10461 typedef struct watchedKey
{
10466 /* Watch for the specified key */
10467 static void watchForKey(redisClient
*c
, robj
*key
) {
10468 list
*clients
= NULL
;
10473 /* Check if we are already watching for this key */
10474 listRewind(c
->watched_keys
,&li
);
10475 while((ln
= listNext(&li
))) {
10476 wk
= listNodeValue(ln
);
10477 if (wk
->db
== c
->db
&& equalStringObjects(key
,wk
->key
))
10478 return; /* Key already watched */
10480 /* This key is not already watched in this DB. Let's add it */
10481 clients
= dictFetchValue(c
->db
->watched_keys
,key
);
10483 clients
= listCreate();
10484 dictAdd(c
->db
->watched_keys
,key
,clients
);
10487 listAddNodeTail(clients
,c
);
10488 /* Add the new key to the lits of keys watched by this client */
10489 wk
= zmalloc(sizeof(*wk
));
10493 listAddNodeTail(c
->watched_keys
,wk
);
10496 /* Unwatch all the keys watched by this client. To clean the EXEC dirty
10497 * flag is up to the caller. */
10498 static void unwatchAllKeys(redisClient
*c
) {
10502 if (listLength(c
->watched_keys
) == 0) return;
10503 listRewind(c
->watched_keys
,&li
);
10504 while((ln
= listNext(&li
))) {
10508 /* Lookup the watched key -> clients list and remove the client
10510 wk
= listNodeValue(ln
);
10511 clients
= dictFetchValue(wk
->db
->watched_keys
, wk
->key
);
10512 assert(clients
!= NULL
);
10513 listDelNode(clients
,listSearchKey(clients
,c
));
10514 /* Kill the entry at all if this was the only client */
10515 if (listLength(clients
) == 0)
10516 dictDelete(wk
->db
->watched_keys
, wk
->key
);
10517 /* Remove this watched key from the client->watched list */
10518 listDelNode(c
->watched_keys
,ln
);
10519 decrRefCount(wk
->key
);
10524 /* "Touch" a key, so that if this key is being WATCHed by some client the
10525 * next EXEC will fail. */
10526 static void touchWatchedKey(redisDb
*db
, robj
*key
) {
10531 if (dictSize(db
->watched_keys
) == 0) return;
10532 clients
= dictFetchValue(db
->watched_keys
, key
);
10533 if (!clients
) return;
10535 /* Mark all the clients watching this key as REDIS_DIRTY_CAS */
10536 /* Check if we are already watching for this key */
10537 listRewind(clients
,&li
);
10538 while((ln
= listNext(&li
))) {
10539 redisClient
*c
= listNodeValue(ln
);
10541 c
->flags
|= REDIS_DIRTY_CAS
;
10545 /* On FLUSHDB or FLUSHALL all the watched keys that are present before the
10546 * flush but will be deleted as effect of the flushing operation should
10547 * be touched. "dbid" is the DB that's getting the flush. -1 if it is
10548 * a FLUSHALL operation (all the DBs flushed). */
10549 static void touchWatchedKeysOnFlush(int dbid
) {
10553 /* For every client, check all the waited keys */
10554 listRewind(server
.clients
,&li1
);
10555 while((ln
= listNext(&li1
))) {
10556 redisClient
*c
= listNodeValue(ln
);
10557 listRewind(c
->watched_keys
,&li2
);
10558 while((ln
= listNext(&li2
))) {
10559 watchedKey
*wk
= listNodeValue(ln
);
10561 /* For every watched key matching the specified DB, if the
10562 * key exists, mark the client as dirty, as the key will be
10564 if (dbid
== -1 || wk
->db
->id
== dbid
) {
10565 if (dictFind(wk
->db
->dict
, wk
->key
) != NULL
)
10566 c
->flags
|= REDIS_DIRTY_CAS
;
10572 static void watchCommand(redisClient
*c
) {
10575 if (c
->flags
& REDIS_MULTI
) {
10576 addReplySds(c
,sdsnew("-ERR WATCH inside MULTI is not allowed\r\n"));
10579 for (j
= 1; j
< c
->argc
; j
++)
10580 watchForKey(c
,c
->argv
[j
]);
10581 addReply(c
,shared
.ok
);
10584 static void unwatchCommand(redisClient
*c
) {
10586 c
->flags
&= (~REDIS_DIRTY_CAS
);
10587 addReply(c
,shared
.ok
);
10590 /* ================================= Debugging ============================== */
10592 /* Compute the sha1 of string at 's' with 'len' bytes long.
10593 * The SHA1 is then xored againt the string pointed by digest.
10594 * Since xor is commutative, this operation is used in order to
10595 * "add" digests relative to unordered elements.
10597 * So digest(a,b,c,d) will be the same of digest(b,a,c,d) */
10598 static void xorDigest(unsigned char *digest
, void *ptr
, size_t len
) {
10600 unsigned char hash
[20], *s
= ptr
;
10604 SHA1Update(&ctx
,s
,len
);
10605 SHA1Final(hash
,&ctx
);
10607 for (j
= 0; j
< 20; j
++)
10608 digest
[j
] ^= hash
[j
];
10611 static void xorObjectDigest(unsigned char *digest
, robj
*o
) {
10612 o
= getDecodedObject(o
);
10613 xorDigest(digest
,o
->ptr
,sdslen(o
->ptr
));
10617 /* This function instead of just computing the SHA1 and xoring it
10618 * against diget, also perform the digest of "digest" itself and
10619 * replace the old value with the new one.
10621 * So the final digest will be:
10623 * digest = SHA1(digest xor SHA1(data))
10625 * This function is used every time we want to preserve the order so
10626 * that digest(a,b,c,d) will be different than digest(b,c,d,a)
10628 * Also note that mixdigest("foo") followed by mixdigest("bar")
10629 * will lead to a different digest compared to "fo", "obar".
10631 static void mixDigest(unsigned char *digest
, void *ptr
, size_t len
) {
10635 xorDigest(digest
,s
,len
);
10637 SHA1Update(&ctx
,digest
,20);
10638 SHA1Final(digest
,&ctx
);
10641 static void mixObjectDigest(unsigned char *digest
, robj
*o
) {
10642 o
= getDecodedObject(o
);
10643 mixDigest(digest
,o
->ptr
,sdslen(o
->ptr
));
10647 /* Compute the dataset digest. Since keys, sets elements, hashes elements
10648 * are not ordered, we use a trick: every aggregate digest is the xor
10649 * of the digests of their elements. This way the order will not change
10650 * the result. For list instead we use a feedback entering the output digest
10651 * as input in order to ensure that a different ordered list will result in
10652 * a different digest. */
10653 static void computeDatasetDigest(unsigned char *final
) {
10654 unsigned char digest
[20];
10656 dictIterator
*di
= NULL
;
10661 memset(final
,0,20); /* Start with a clean result */
10663 for (j
= 0; j
< server
.dbnum
; j
++) {
10664 redisDb
*db
= server
.db
+j
;
10666 if (dictSize(db
->dict
) == 0) continue;
10667 di
= dictGetIterator(db
->dict
);
10669 /* hash the DB id, so the same dataset moved in a different
10670 * DB will lead to a different digest */
10672 mixDigest(final
,&aux
,sizeof(aux
));
10674 /* Iterate this DB writing every entry */
10675 while((de
= dictNext(di
)) != NULL
) {
10676 robj
*key
, *o
, *kcopy
;
10679 memset(digest
,0,20); /* This key-val digest */
10680 key
= dictGetEntryKey(de
);
10682 if (!server
.vm_enabled
) {
10683 mixObjectDigest(digest
,key
);
10684 o
= dictGetEntryVal(de
);
10686 /* Don't work with the key directly as when VM is active
10687 * this is unsafe: TODO: fix decrRefCount to check if the
10688 * count really reached 0 to avoid this mess */
10689 kcopy
= dupStringObject(key
);
10690 mixObjectDigest(digest
,kcopy
);
10691 o
= lookupKeyRead(db
,kcopy
);
10692 decrRefCount(kcopy
);
10694 aux
= htonl(o
->type
);
10695 mixDigest(digest
,&aux
,sizeof(aux
));
10696 expiretime
= getExpire(db
,key
);
10698 /* Save the key and associated value */
10699 if (o
->type
== REDIS_STRING
) {
10700 mixObjectDigest(digest
,o
);
10701 } else if (o
->type
== REDIS_LIST
) {
10702 list
*list
= o
->ptr
;
10706 listRewind(list
,&li
);
10707 while((ln
= listNext(&li
))) {
10708 robj
*eleobj
= listNodeValue(ln
);
10710 mixObjectDigest(digest
,eleobj
);
10712 } else if (o
->type
== REDIS_SET
) {
10713 dict
*set
= o
->ptr
;
10714 dictIterator
*di
= dictGetIterator(set
);
10717 while((de
= dictNext(di
)) != NULL
) {
10718 robj
*eleobj
= dictGetEntryKey(de
);
10720 xorObjectDigest(digest
,eleobj
);
10722 dictReleaseIterator(di
);
10723 } else if (o
->type
== REDIS_ZSET
) {
10725 dictIterator
*di
= dictGetIterator(zs
->dict
);
10728 while((de
= dictNext(di
)) != NULL
) {
10729 robj
*eleobj
= dictGetEntryKey(de
);
10730 double *score
= dictGetEntryVal(de
);
10731 unsigned char eledigest
[20];
10733 snprintf(buf
,sizeof(buf
),"%.17g",*score
);
10734 memset(eledigest
,0,20);
10735 mixObjectDigest(eledigest
,eleobj
);
10736 mixDigest(eledigest
,buf
,strlen(buf
));
10737 xorDigest(digest
,eledigest
,20);
10739 dictReleaseIterator(di
);
10740 } else if (o
->type
== REDIS_HASH
) {
10744 hi
= hashInitIterator(o
);
10745 while (hashNext(hi
) != REDIS_ERR
) {
10746 unsigned char eledigest
[20];
10748 memset(eledigest
,0,20);
10749 obj
= hashCurrent(hi
,REDIS_HASH_KEY
);
10750 mixObjectDigest(eledigest
,obj
);
10752 obj
= hashCurrent(hi
,REDIS_HASH_VALUE
);
10753 mixObjectDigest(eledigest
,obj
);
10755 xorDigest(digest
,eledigest
,20);
10757 hashReleaseIterator(hi
);
10759 redisPanic("Unknown object type");
10761 /* If the key has an expire, add it to the mix */
10762 if (expiretime
!= -1) xorDigest(digest
,"!!expire!!",10);
10763 /* We can finally xor the key-val digest to the final digest */
10764 xorDigest(final
,digest
,20);
10766 dictReleaseIterator(di
);
10770 static void debugCommand(redisClient
*c
) {
10771 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
10772 *((char*)-1) = 'x';
10773 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
10774 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
10775 addReply(c
,shared
.err
);
10779 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
10780 addReply(c
,shared
.err
);
10783 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
10784 addReply(c
,shared
.ok
);
10785 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
10787 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
10788 addReply(c
,shared
.err
);
10791 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
10792 addReply(c
,shared
.ok
);
10793 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
10794 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
10798 addReply(c
,shared
.nokeyerr
);
10801 key
= dictGetEntryKey(de
);
10802 val
= dictGetEntryVal(de
);
10803 if (!server
.vm_enabled
|| (key
->storage
== REDIS_VM_MEMORY
||
10804 key
->storage
== REDIS_VM_SWAPPING
)) {
10808 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
10809 strenc
= strencoding
[val
->encoding
];
10811 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
10814 addReplySds(c
,sdscatprintf(sdsempty(),
10815 "+Key at:%p refcount:%d, value at:%p refcount:%d "
10816 "encoding:%s serializedlength:%lld\r\n",
10817 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
,
10818 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
10820 addReplySds(c
,sdscatprintf(sdsempty(),
10821 "+Key at:%p refcount:%d, value swapped at: page %llu "
10822 "using %llu pages\r\n",
10823 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
,
10824 (unsigned long long) key
->vm
.usedpages
));
10826 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapin") && c
->argc
== 3) {
10827 lookupKeyRead(c
->db
,c
->argv
[2]);
10828 addReply(c
,shared
.ok
);
10829 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
10830 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
10833 if (!server
.vm_enabled
) {
10834 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
10838 addReply(c
,shared
.nokeyerr
);
10841 key
= dictGetEntryKey(de
);
10842 val
= dictGetEntryVal(de
);
10843 /* If the key is shared we want to create a copy */
10844 if (key
->refcount
> 1) {
10845 robj
*newkey
= dupStringObject(key
);
10847 key
= dictGetEntryKey(de
) = newkey
;
10850 if (key
->storage
!= REDIS_VM_MEMORY
) {
10851 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
10852 } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
10853 dictGetEntryVal(de
) = NULL
;
10854 addReply(c
,shared
.ok
);
10856 addReply(c
,shared
.err
);
10858 } else if (!strcasecmp(c
->argv
[1]->ptr
,"populate") && c
->argc
== 3) {
10863 if (getLongFromObjectOrReply(c
, c
->argv
[2], &keys
, NULL
) != REDIS_OK
)
10865 for (j
= 0; j
< keys
; j
++) {
10866 snprintf(buf
,sizeof(buf
),"key:%lu",j
);
10867 key
= createStringObject(buf
,strlen(buf
));
10868 if (lookupKeyRead(c
->db
,key
) != NULL
) {
10872 snprintf(buf
,sizeof(buf
),"value:%lu",j
);
10873 val
= createStringObject(buf
,strlen(buf
));
10874 dictAdd(c
->db
->dict
,key
,val
);
10876 addReply(c
,shared
.ok
);
10877 } else if (!strcasecmp(c
->argv
[1]->ptr
,"digest") && c
->argc
== 2) {
10878 unsigned char digest
[20];
10879 sds d
= sdsnew("+");
10882 computeDatasetDigest(digest
);
10883 for (j
= 0; j
< 20; j
++)
10884 d
= sdscatprintf(d
, "%02x",digest
[j
]);
10886 d
= sdscatlen(d
,"\r\n",2);
10889 addReplySds(c
,sdsnew(
10890 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n"));
10894 static void _redisAssert(char *estr
, char *file
, int line
) {
10895 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
10896 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true",file
,line
,estr
);
10897 #ifdef HAVE_BACKTRACE
10898 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
10899 *((char*)-1) = 'x';
10903 static void _redisPanic(char *msg
, char *file
, int line
) {
10904 redisLog(REDIS_WARNING
,"!!! Software Failure. Press left mouse button to continue");
10905 redisLog(REDIS_WARNING
,"Guru Meditation: %s #%s:%d",msg
,file
,line
);
10906 #ifdef HAVE_BACKTRACE
10907 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
10908 *((char*)-1) = 'x';
10912 /* =================================== Main! ================================ */
10915 int linuxOvercommitMemoryValue(void) {
10916 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
10919 if (!fp
) return -1;
10920 if (fgets(buf
,64,fp
) == NULL
) {
10929 void linuxOvercommitMemoryWarning(void) {
10930 if (linuxOvercommitMemoryValue() == 0) {
10931 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
10934 #endif /* __linux__ */
10936 static void daemonize(void) {
10940 if (fork() != 0) exit(0); /* parent exits */
10941 setsid(); /* create a new session */
10943 /* Every output goes to /dev/null. If Redis is daemonized but
10944 * the 'logfile' is set to 'stdout' in the configuration file
10945 * it will not log at all. */
10946 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
10947 dup2(fd
, STDIN_FILENO
);
10948 dup2(fd
, STDOUT_FILENO
);
10949 dup2(fd
, STDERR_FILENO
);
10950 if (fd
> STDERR_FILENO
) close(fd
);
10952 /* Try to write the pid file */
10953 fp
= fopen(server
.pidfile
,"w");
10955 fprintf(fp
,"%d\n",getpid());
10960 static void version() {
10961 printf("Redis server version %s (%s:%d)\n", REDIS_VERSION
,
10962 REDIS_GIT_SHA1
, atoi(REDIS_GIT_DIRTY
) > 0);
10966 static void usage() {
10967 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
10968 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
10972 int main(int argc
, char **argv
) {
10975 initServerConfig();
10976 sortCommandTable();
10978 if (strcmp(argv
[1], "-v") == 0 ||
10979 strcmp(argv
[1], "--version") == 0) version();
10980 if (strcmp(argv
[1], "--help") == 0) usage();
10981 resetServerSaveParams();
10982 loadServerConfig(argv
[1]);
10983 } else if ((argc
> 2)) {
10986 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
10988 if (server
.daemonize
) daemonize();
10990 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
10992 linuxOvercommitMemoryWarning();
10994 start
= time(NULL
);
10995 if (server
.appendonly
) {
10996 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
10997 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
10999 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
11000 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
11002 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
11003 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
11005 aeDeleteEventLoop(server
.el
);
11009 /* ============================= Backtrace support ========================= */
11011 #ifdef HAVE_BACKTRACE
11012 static char *findFuncName(void *pointer
, unsigned long *offset
);
11014 static void *getMcontextEip(ucontext_t
*uc
) {
11015 #if defined(__FreeBSD__)
11016 return (void*) uc
->uc_mcontext
.mc_eip
;
11017 #elif defined(__dietlibc__)
11018 return (void*) uc
->uc_mcontext
.eip
;
11019 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
11021 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
11023 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
11025 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
11026 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
11027 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
11029 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
11031 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
11032 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
11033 #elif defined(__ia64__) /* Linux IA64 */
11034 return (void*) uc
->uc_mcontext
.sc_ip
;
11040 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
11042 char **messages
= NULL
;
11043 int i
, trace_size
= 0;
11044 unsigned long offset
=0;
11045 ucontext_t
*uc
= (ucontext_t
*) secret
;
11047 REDIS_NOTUSED(info
);
11049 redisLog(REDIS_WARNING
,
11050 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
11051 infostring
= genRedisInfoString();
11052 redisLog(REDIS_WARNING
, "%s",infostring
);
11053 /* It's not safe to sdsfree() the returned string under memory
11054 * corruption conditions. Let it leak as we are going to abort */
11056 trace_size
= backtrace(trace
, 100);
11057 /* overwrite sigaction with caller's address */
11058 if (getMcontextEip(uc
) != NULL
) {
11059 trace
[1] = getMcontextEip(uc
);
11061 messages
= backtrace_symbols(trace
, trace_size
);
11063 for (i
=1; i
<trace_size
; ++i
) {
11064 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
11066 p
= strchr(messages
[i
],'+');
11067 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
11068 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
11070 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
11073 /* free(messages); Don't call free() with possibly corrupted memory. */
11077 static void sigtermHandler(int sig
) {
11078 REDIS_NOTUSED(sig
);
11080 redisLog(REDIS_WARNING
,"SIGTERM received, scheduling shutting down...");
11081 server
.shutdown_asap
= 1;
11084 static void setupSigSegvAction(void) {
11085 struct sigaction act
;
11087 sigemptyset (&act
.sa_mask
);
11088 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
11089 * is used. Otherwise, sa_handler is used */
11090 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
11091 act
.sa_sigaction
= segvHandler
;
11092 sigaction (SIGSEGV
, &act
, NULL
);
11093 sigaction (SIGBUS
, &act
, NULL
);
11094 sigaction (SIGFPE
, &act
, NULL
);
11095 sigaction (SIGILL
, &act
, NULL
);
11096 sigaction (SIGBUS
, &act
, NULL
);
11098 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
;
11099 act
.sa_handler
= sigtermHandler
;
11100 sigaction (SIGTERM
, &act
, NULL
);
11104 #include "staticsymbols.h"
11105 /* This function try to convert a pointer into a function name. It's used in
11106 * oreder to provide a backtrace under segmentation fault that's able to
11107 * display functions declared as static (otherwise the backtrace is useless). */
11108 static char *findFuncName(void *pointer
, unsigned long *offset
){
11110 unsigned long off
, minoff
= 0;
11112 /* Try to match against the Symbol with the smallest offset */
11113 for (i
=0; symsTable
[i
].pointer
; i
++) {
11114 unsigned long lp
= (unsigned long) pointer
;
11116 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
11117 off
=lp
-symsTable
[i
].pointer
;
11118 if (ret
< 0 || off
< minoff
) {
11124 if (ret
== -1) return NULL
;
11126 return symsTable
[ret
].name
;
11128 #else /* HAVE_BACKTRACE */
11129 static void setupSigSegvAction(void) {
11131 #endif /* HAVE_BACKTRACE */