2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "1.3.8"
40 #define __USE_POSIX199309
47 #endif /* HAVE_BACKTRACE */
55 #include <arpa/inet.h>
59 #include <sys/resource.h>
66 #include "solarisfixes.h"
70 #include "ae.h" /* Event driven programming library */
71 #include "sds.h" /* Dynamic safe strings */
72 #include "anet.h" /* Networking the easy way */
73 #include "dict.h" /* Hash tables */
74 #include "adlist.h" /* Linked lists */
75 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
76 #include "lzf.h" /* LZF compression library */
77 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
84 /* Static server configuration */
85 #define REDIS_SERVERPORT 6379 /* TCP port */
86 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
87 #define REDIS_IOBUF_LEN 1024
88 #define REDIS_LOADBUF_LEN 1024
89 #define REDIS_STATIC_ARGS 8
90 #define REDIS_DEFAULT_DBNUM 16
91 #define REDIS_CONFIGLINE_MAX 1024
92 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
93 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
94 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* try to expire 10 keys/loop */
95 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
96 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
98 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
99 #define REDIS_WRITEV_THRESHOLD 3
100 /* Max number of iovecs used for each writev call */
101 #define REDIS_WRITEV_IOVEC_COUNT 256
103 /* Hash table parameters */
104 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
107 #define REDIS_CMD_BULK 1 /* Bulk write command */
108 #define REDIS_CMD_INLINE 2 /* Inline command */
109 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
110 this flags will return an error when the 'maxmemory' option is set in the
111 config file and the server is using more than maxmemory bytes of memory.
112 In short this commands are denied on low memory conditions. */
113 #define REDIS_CMD_DENYOOM 4
114 #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */
117 #define REDIS_STRING 0
123 /* Objects encoding. Some kind of objects like Strings and Hashes can be
124 * internally represented in multiple ways. The 'encoding' field of the object
125 * is set to one of this fields for this object. */
126 #define REDIS_ENCODING_RAW 0 /* Raw representation */
127 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
128 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
129 #define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
131 static char* strencoding
[] = {
132 "raw", "int", "zipmap", "hashtable"
135 /* Object types only used for dumping to disk */
136 #define REDIS_EXPIRETIME 253
137 #define REDIS_SELECTDB 254
138 #define REDIS_EOF 255
140 /* Defines related to the dump file format. To store 32 bits lengths for short
141 * keys requires a lot of space, so we check the most significant 2 bits of
142 * the first byte to interpreter the length:
144 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
145 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
146 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
147 * 11|000000 this means: specially encoded object will follow. The six bits
148 * number specify the kind of object that follows.
149 * See the REDIS_RDB_ENC_* defines.
151 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
152 * values, will fit inside. */
153 #define REDIS_RDB_6BITLEN 0
154 #define REDIS_RDB_14BITLEN 1
155 #define REDIS_RDB_32BITLEN 2
156 #define REDIS_RDB_ENCVAL 3
157 #define REDIS_RDB_LENERR UINT_MAX
159 /* When a length of a string object stored on disk has the first two bits
160 * set, the remaining two bits specify a special encoding for the object
161 * accordingly to the following defines: */
162 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
163 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
164 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
165 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
167 /* Virtual memory object->where field. */
168 #define REDIS_VM_MEMORY 0 /* The object is on memory */
169 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
170 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
171 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
173 /* Virtual memory static configuration stuff.
174 * Check vmFindContiguousPages() to know more about this magic numbers. */
175 #define REDIS_VM_MAX_NEAR_PAGES 65536
176 #define REDIS_VM_MAX_RANDOM_JUMP 4096
177 #define REDIS_VM_MAX_THREADS 32
178 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
179 /* The following is the *percentage* of completed I/O jobs to process when the
180 * handelr is called. While Virtual Memory I/O operations are performed by
181 * threads, this operations must be processed by the main thread when completed
182 * in order to take effect. */
183 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
186 #define REDIS_SLAVE 1 /* This client is a slave server */
187 #define REDIS_MASTER 2 /* This client is a master server */
188 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
189 #define REDIS_MULTI 8 /* This client is in a MULTI context */
190 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
191 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
193 /* Slave replication state - slave side */
194 #define REDIS_REPL_NONE 0 /* No active replication */
195 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
196 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
198 /* Slave replication state - from the point of view of master
199 * Note that in SEND_BULK and ONLINE state the slave receives new updates
200 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
201 * to start the next background saving in order to send updates to it. */
202 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
203 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
204 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
205 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
207 /* List related stuff */
211 /* Sort operations */
212 #define REDIS_SORT_GET 0
213 #define REDIS_SORT_ASC 1
214 #define REDIS_SORT_DESC 2
215 #define REDIS_SORTKEY_MAX 1024
218 #define REDIS_DEBUG 0
219 #define REDIS_VERBOSE 1
220 #define REDIS_NOTICE 2
221 #define REDIS_WARNING 3
223 /* Anti-warning macro... */
224 #define REDIS_NOTUSED(V) ((void) V)
226 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
227 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
229 /* Append only defines */
230 #define APPENDFSYNC_NO 0
231 #define APPENDFSYNC_ALWAYS 1
232 #define APPENDFSYNC_EVERYSEC 2
234 /* Hashes related defaults */
235 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
236 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
238 /* We can print the stacktrace, so our assert is defined this way: */
239 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
240 static void _redisAssert(char *estr
, char *file
, int line
);
242 /*================================= Data types ============================== */
244 /* A redis object, that is a type able to hold a string / list / set */
246 /* The VM object structure */
247 struct redisObjectVM
{
248 off_t page
; /* the page at witch the object is stored on disk */
249 off_t usedpages
; /* number of pages used on disk */
250 time_t atime
; /* Last access time */
253 /* The actual Redis Object */
254 typedef struct redisObject
{
257 unsigned char encoding
;
258 unsigned char storage
; /* If this object is a key, where is the value?
259 * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
260 unsigned char vtype
; /* If this object is a key, and value is swapped out,
261 * this is the type of the swapped out object. */
263 /* VM fields, this are only allocated if VM is active, otherwise the
264 * object allocation function will just allocate
265 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
266 * Redis without VM active will not have any overhead. */
267 struct redisObjectVM vm
;
270 /* Macro used to initalize a Redis object allocated on the stack.
271 * Note that this macro is taken near the structure definition to make sure
272 * we'll update it when the structure is changed, to avoid bugs like
273 * bug #85 introduced exactly in this way. */
274 #define initStaticStringObject(_var,_ptr) do { \
276 _var.type = REDIS_STRING; \
277 _var.encoding = REDIS_ENCODING_RAW; \
279 if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \
282 typedef struct redisDb
{
283 dict
*dict
; /* The keyspace for this DB */
284 dict
*expires
; /* Timeout of keys with a timeout set */
285 dict
*blockingkeys
; /* Keys with clients waiting for data (BLPOP) */
286 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
290 /* Client MULTI/EXEC state */
291 typedef struct multiCmd
{
294 struct redisCommand
*cmd
;
297 typedef struct multiState
{
298 multiCmd
*commands
; /* Array of MULTI commands */
299 int count
; /* Total number of MULTI commands */
302 /* With multiplexing we need to take per-clinet state.
303 * Clients are taken in a liked list. */
304 typedef struct redisClient
{
309 robj
**argv
, **mbargv
;
311 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
312 int multibulk
; /* multi bulk command format active */
315 time_t lastinteraction
; /* time of the last interaction, used for timeout */
316 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
317 int slaveseldb
; /* slave selected db, if this client is a slave */
318 int authenticated
; /* when requirepass is non-NULL */
319 int replstate
; /* replication state if this is a slave */
320 int repldbfd
; /* replication DB file descriptor */
321 long repldboff
; /* replication DB file offset */
322 off_t repldbsize
; /* replication DB file size */
323 multiState mstate
; /* MULTI/EXEC state */
324 robj
**blockingkeys
; /* The key we are waiting to terminate a blocking
325 * operation such as BLPOP. Otherwise NULL. */
326 int blockingkeysnum
; /* Number of blocking keys */
327 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
328 * is >= blockingto then the operation timed out. */
329 list
*io_keys
; /* Keys this client is waiting to be loaded from the
330 * swap file in order to continue. */
331 dict
*pubsub_channels
; /* channels a client is interested in (SUBSCRIBE) */
332 list
*pubsub_patterns
; /* patterns a client is interested in (SUBSCRIBE) */
340 /* Global server state structure */
345 long long dirty
; /* changes to DB from the last save */
347 list
*slaves
, *monitors
;
348 char neterr
[ANET_ERR_LEN
];
350 int cronloops
; /* number of times the cron function run */
351 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
352 time_t lastsave
; /* Unix time of last save succeeede */
353 /* Fields used only for stats */
354 time_t stat_starttime
; /* server start time */
355 long long stat_numcommands
; /* number of processed commands */
356 long long stat_numconnections
; /* number of connections received */
357 long long stat_expiredkeys
; /* number of expired keys */
370 pid_t bgsavechildpid
;
371 pid_t bgrewritechildpid
;
372 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
373 struct saveparam
*saveparams
;
378 char *appendfilename
;
382 /* Replication related */
387 redisClient
*master
; /* client that is master for this slave */
389 unsigned int maxclients
;
390 unsigned long long maxmemory
;
391 unsigned int blpop_blocked_clients
;
392 unsigned int vm_blocked_clients
;
393 /* Sort parameters - qsort_r() is only available under BSD so we
394 * have to take this state global, in order to pass it to sortCompare() */
398 /* Virtual memory configuration */
403 unsigned long long vm_max_memory
;
405 size_t hash_max_zipmap_entries
;
406 size_t hash_max_zipmap_value
;
407 /* Virtual memory state */
410 off_t vm_next_page
; /* Next probably empty page */
411 off_t vm_near_pages
; /* Number of pages allocated sequentially */
412 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
413 time_t unixtime
; /* Unix time sampled every second. */
414 /* Virtual memory I/O threads stuff */
415 /* An I/O thread process an element taken from the io_jobs queue and
416 * put the result of the operation in the io_done list. While the
417 * job is being processed, it's put on io_processing queue. */
418 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
419 list
*io_processing
; /* List of VM I/O jobs being processed */
420 list
*io_processed
; /* List of VM I/O jobs already processed */
421 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
422 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
423 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
424 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
425 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
426 int io_active_threads
; /* Number of running I/O threads */
427 int vm_max_threads
; /* Max number of I/O threads running at the same time */
428 /* Our main thread is blocked on the event loop, locking for sockets ready
429 * to be read or written, so when a threaded I/O operation is ready to be
430 * processed by the main thread, the I/O thread will use a unix pipe to
431 * awake the main thread. The followings are the two pipe FDs. */
432 int io_ready_pipe_read
;
433 int io_ready_pipe_write
;
434 /* Virtual memory stats */
435 unsigned long long vm_stats_used_pages
;
436 unsigned long long vm_stats_swapped_objects
;
437 unsigned long long vm_stats_swapouts
;
438 unsigned long long vm_stats_swapins
;
440 dict
*pubsub_channels
; /* Map channels to list of subscribed clients */
441 list
*pubsub_patterns
; /* A list of pubsub_patterns */
446 typedef struct pubsubPattern
{
451 typedef void redisCommandProc(redisClient
*c
);
452 struct redisCommand
{
454 redisCommandProc
*proc
;
457 /* Use a function to determine which keys need to be loaded
458 * in the background prior to executing this command. Takes precedence
459 * over vm_firstkey and others, ignored when NULL */
460 redisCommandProc
*vm_preload_proc
;
461 /* What keys should be loaded in background when calling this command? */
462 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
463 int vm_lastkey
; /* THe last argument that's a key */
464 int vm_keystep
; /* The step between first and last key */
467 struct redisFunctionSym
{
469 unsigned long pointer
;
472 typedef struct _redisSortObject
{
480 typedef struct _redisSortOperation
{
483 } redisSortOperation
;
485 /* ZSETs use a specialized version of Skiplists */
487 typedef struct zskiplistNode
{
488 struct zskiplistNode
**forward
;
489 struct zskiplistNode
*backward
;
495 typedef struct zskiplist
{
496 struct zskiplistNode
*header
, *tail
;
497 unsigned long length
;
501 typedef struct zset
{
506 /* Our shared "common" objects */
508 struct sharedObjectsStruct
{
509 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
510 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
511 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
512 *outofrangeerr
, *plus
,
513 *select0
, *select1
, *select2
, *select3
, *select4
,
514 *select5
, *select6
, *select7
, *select8
, *select9
,
515 *messagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
,
516 *psubscribebulk
, *punsubscribebulk
;
519 /* Global vars that are actally used as constants. The following double
520 * values are used for double on-disk serialization, and are initialized
521 * at runtime to avoid strange compiler optimizations. */
523 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
525 /* VM threaded I/O request message */
526 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
527 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
528 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
529 typedef struct iojob
{
530 int type
; /* Request type, REDIS_IOJOB_* */
531 redisDb
*db
;/* Redis database */
532 robj
*key
; /* This I/O request is about swapping this key */
533 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
534 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
535 off_t page
; /* Swap page where to read/write the object */
536 off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */
537 int canceled
; /* True if this command was canceled by blocking side of VM */
538 pthread_t thread
; /* ID of the thread processing this entry */
541 /*================================ Prototypes =============================== */
543 static void freeStringObject(robj
*o
);
544 static void freeListObject(robj
*o
);
545 static void freeSetObject(robj
*o
);
546 static void decrRefCount(void *o
);
547 static robj
*createObject(int type
, void *ptr
);
548 static void freeClient(redisClient
*c
);
549 static int rdbLoad(char *filename
);
550 static void addReply(redisClient
*c
, robj
*obj
);
551 static void addReplySds(redisClient
*c
, sds s
);
552 static void incrRefCount(robj
*o
);
553 static int rdbSaveBackground(char *filename
);
554 static robj
*createStringObject(char *ptr
, size_t len
);
555 static robj
*dupStringObject(robj
*o
);
556 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
);
557 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
558 static int syncWithMaster(void);
559 static int tryObjectEncoding(robj
*o
);
560 static robj
*getDecodedObject(robj
*o
);
561 static int removeExpire(redisDb
*db
, robj
*key
);
562 static int expireIfNeeded(redisDb
*db
, robj
*key
);
563 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
564 static int deleteIfSwapped(redisDb
*db
, robj
*key
);
565 static int deleteKey(redisDb
*db
, robj
*key
);
566 static time_t getExpire(redisDb
*db
, robj
*key
);
567 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
568 static void updateSlavesWaitingBgsave(int bgsaveerr
);
569 static void freeMemoryIfNeeded(void);
570 static int processCommand(redisClient
*c
);
571 static void setupSigSegvAction(void);
572 static void rdbRemoveTempFile(pid_t childpid
);
573 static void aofRemoveTempFile(pid_t childpid
);
574 static size_t stringObjectLen(robj
*o
);
575 static void processInputBuffer(redisClient
*c
);
576 static zskiplist
*zslCreate(void);
577 static void zslFree(zskiplist
*zsl
);
578 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
579 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
580 static void initClientMultiState(redisClient
*c
);
581 static void freeClientMultiState(redisClient
*c
);
582 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
583 static void unblockClientWaitingData(redisClient
*c
);
584 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
585 static void vmInit(void);
586 static void vmMarkPagesFree(off_t page
, off_t count
);
587 static robj
*vmLoadObject(robj
*key
);
588 static robj
*vmPreviewObject(robj
*key
);
589 static int vmSwapOneObjectBlocking(void);
590 static int vmSwapOneObjectThreaded(void);
591 static int vmCanSwapOut(void);
592 static int tryFreeOneObjectFromFreelist(void);
593 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
594 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
595 static void vmCancelThreadedIOJob(robj
*o
);
596 static void lockThreadedIO(void);
597 static void unlockThreadedIO(void);
598 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
599 static void freeIOJob(iojob
*j
);
600 static void queueIOJob(iojob
*j
);
601 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
602 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
603 static void waitEmptyIOJobsQueue(void);
604 static void vmReopenSwapFile(void);
605 static int vmFreePage(off_t page
);
606 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
);
607 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
);
608 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
609 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
610 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
611 static struct redisCommand
*lookupCommand(char *name
);
612 static void call(redisClient
*c
, struct redisCommand
*cmd
);
613 static void resetClient(redisClient
*c
);
614 static void convertToRealHash(robj
*o
);
615 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
);
616 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
);
617 static void freePubsubPattern(void *p
);
618 static int listMatchPubsubPattern(void *a
, void *b
);
619 static int compareStringObjects(robj
*a
, robj
*b
);
622 static void authCommand(redisClient
*c
);
623 static void pingCommand(redisClient
*c
);
624 static void echoCommand(redisClient
*c
);
625 static void setCommand(redisClient
*c
);
626 static void setnxCommand(redisClient
*c
);
627 static void getCommand(redisClient
*c
);
628 static void delCommand(redisClient
*c
);
629 static void existsCommand(redisClient
*c
);
630 static void incrCommand(redisClient
*c
);
631 static void decrCommand(redisClient
*c
);
632 static void incrbyCommand(redisClient
*c
);
633 static void decrbyCommand(redisClient
*c
);
634 static void selectCommand(redisClient
*c
);
635 static void randomkeyCommand(redisClient
*c
);
636 static void keysCommand(redisClient
*c
);
637 static void dbsizeCommand(redisClient
*c
);
638 static void lastsaveCommand(redisClient
*c
);
639 static void saveCommand(redisClient
*c
);
640 static void bgsaveCommand(redisClient
*c
);
641 static void bgrewriteaofCommand(redisClient
*c
);
642 static void shutdownCommand(redisClient
*c
);
643 static void moveCommand(redisClient
*c
);
644 static void renameCommand(redisClient
*c
);
645 static void renamenxCommand(redisClient
*c
);
646 static void lpushCommand(redisClient
*c
);
647 static void rpushCommand(redisClient
*c
);
648 static void lpopCommand(redisClient
*c
);
649 static void rpopCommand(redisClient
*c
);
650 static void llenCommand(redisClient
*c
);
651 static void lindexCommand(redisClient
*c
);
652 static void lrangeCommand(redisClient
*c
);
653 static void ltrimCommand(redisClient
*c
);
654 static void typeCommand(redisClient
*c
);
655 static void lsetCommand(redisClient
*c
);
656 static void saddCommand(redisClient
*c
);
657 static void sremCommand(redisClient
*c
);
658 static void smoveCommand(redisClient
*c
);
659 static void sismemberCommand(redisClient
*c
);
660 static void scardCommand(redisClient
*c
);
661 static void spopCommand(redisClient
*c
);
662 static void srandmemberCommand(redisClient
*c
);
663 static void sinterCommand(redisClient
*c
);
664 static void sinterstoreCommand(redisClient
*c
);
665 static void sunionCommand(redisClient
*c
);
666 static void sunionstoreCommand(redisClient
*c
);
667 static void sdiffCommand(redisClient
*c
);
668 static void sdiffstoreCommand(redisClient
*c
);
669 static void syncCommand(redisClient
*c
);
670 static void flushdbCommand(redisClient
*c
);
671 static void flushallCommand(redisClient
*c
);
672 static void sortCommand(redisClient
*c
);
673 static void lremCommand(redisClient
*c
);
674 static void rpoplpushcommand(redisClient
*c
);
675 static void infoCommand(redisClient
*c
);
676 static void mgetCommand(redisClient
*c
);
677 static void monitorCommand(redisClient
*c
);
678 static void expireCommand(redisClient
*c
);
679 static void expireatCommand(redisClient
*c
);
680 static void getsetCommand(redisClient
*c
);
681 static void ttlCommand(redisClient
*c
);
682 static void slaveofCommand(redisClient
*c
);
683 static void debugCommand(redisClient
*c
);
684 static void msetCommand(redisClient
*c
);
685 static void msetnxCommand(redisClient
*c
);
686 static void zaddCommand(redisClient
*c
);
687 static void zincrbyCommand(redisClient
*c
);
688 static void zrangeCommand(redisClient
*c
);
689 static void zrangebyscoreCommand(redisClient
*c
);
690 static void zcountCommand(redisClient
*c
);
691 static void zrevrangeCommand(redisClient
*c
);
692 static void zcardCommand(redisClient
*c
);
693 static void zremCommand(redisClient
*c
);
694 static void zscoreCommand(redisClient
*c
);
695 static void zremrangebyscoreCommand(redisClient
*c
);
696 static void multiCommand(redisClient
*c
);
697 static void execCommand(redisClient
*c
);
698 static void discardCommand(redisClient
*c
);
699 static void blpopCommand(redisClient
*c
);
700 static void brpopCommand(redisClient
*c
);
701 static void appendCommand(redisClient
*c
);
702 static void substrCommand(redisClient
*c
);
703 static void zrankCommand(redisClient
*c
);
704 static void zrevrankCommand(redisClient
*c
);
705 static void hsetCommand(redisClient
*c
);
706 static void hgetCommand(redisClient
*c
);
707 static void hdelCommand(redisClient
*c
);
708 static void hlenCommand(redisClient
*c
);
709 static void zremrangebyrankCommand(redisClient
*c
);
710 static void zunionCommand(redisClient
*c
);
711 static void zinterCommand(redisClient
*c
);
712 static void hkeysCommand(redisClient
*c
);
713 static void hvalsCommand(redisClient
*c
);
714 static void hgetallCommand(redisClient
*c
);
715 static void hexistsCommand(redisClient
*c
);
716 static void configCommand(redisClient
*c
);
717 static void hincrbyCommand(redisClient
*c
);
718 static void subscribeCommand(redisClient
*c
);
719 static void unsubscribeCommand(redisClient
*c
);
720 static void psubscribeCommand(redisClient
*c
);
721 static void punsubscribeCommand(redisClient
*c
);
722 static void publishCommand(redisClient
*c
);
724 /*================================= Globals ================================= */
727 static struct redisServer server
; /* server global state */
728 static struct redisCommand cmdTable
[] = {
729 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
730 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
731 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
732 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
733 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
734 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
735 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
736 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
737 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
738 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
739 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
740 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
741 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
742 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
743 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
744 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
745 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
746 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
747 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
748 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
749 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
750 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
751 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
752 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
753 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
754 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
755 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
756 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
757 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
758 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
759 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
760 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
761 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
762 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
763 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
764 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
765 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
766 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
767 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
768 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
769 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
770 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
771 {"zunion",zunionCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
772 {"zinter",zinterCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
773 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
774 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
775 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
776 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
777 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
778 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
779 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
780 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
781 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
782 {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
783 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
784 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
785 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
786 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
787 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
788 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
789 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
790 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
791 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
792 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
793 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
794 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
795 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
796 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
797 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
798 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
799 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
800 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
801 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
802 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
803 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
804 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
805 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
806 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
807 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
808 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
809 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
810 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
811 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
812 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
813 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
814 {"exec",execCommand
,1,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
815 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
816 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
817 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
818 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
819 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
820 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
821 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
822 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
823 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
824 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
825 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
826 {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
827 {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
828 {"psubscribe",psubscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
829 {"punsubscribe",punsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
830 {"publish",publishCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_FORCE_REPLICATION
,NULL
,0,0,0},
831 {NULL
,NULL
,0,0,NULL
,0,0,0}
834 /*============================ Utility functions ============================ */
836 /* Glob-style pattern matching. */
837 static int stringmatchlen(const char *pattern
, int patternLen
,
838 const char *string
, int stringLen
, int nocase
)
843 while (pattern
[1] == '*') {
848 return 1; /* match */
850 if (stringmatchlen(pattern
+1, patternLen
-1,
851 string
, stringLen
, nocase
))
852 return 1; /* match */
856 return 0; /* no match */
860 return 0; /* no match */
870 not = pattern
[0] == '^';
877 if (pattern
[0] == '\\') {
880 if (pattern
[0] == string
[0])
882 } else if (pattern
[0] == ']') {
884 } else if (patternLen
== 0) {
888 } else if (pattern
[1] == '-' && patternLen
>= 3) {
889 int start
= pattern
[0];
890 int end
= pattern
[2];
898 start
= tolower(start
);
904 if (c
>= start
&& c
<= end
)
908 if (pattern
[0] == string
[0])
911 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
921 return 0; /* no match */
927 if (patternLen
>= 2) {
934 if (pattern
[0] != string
[0])
935 return 0; /* no match */
937 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
938 return 0; /* no match */
946 if (stringLen
== 0) {
947 while(*pattern
== '*') {
954 if (patternLen
== 0 && stringLen
== 0)
959 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
960 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
963 static void redisLog(int level
, const char *fmt
, ...) {
967 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
971 if (level
>= server
.verbosity
) {
977 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
978 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
979 vfprintf(fp
, fmt
, ap
);
985 if (server
.logfile
) fclose(fp
);
988 /*====================== Hash table type implementation ==================== */
990 /* This is an hash table type that uses the SDS dynamic strings libary as
991 * keys and radis objects as values (objects can hold SDS strings,
994 static void dictVanillaFree(void *privdata
, void *val
)
996 DICT_NOTUSED(privdata
);
1000 static void dictListDestructor(void *privdata
, void *val
)
1002 DICT_NOTUSED(privdata
);
1003 listRelease((list
*)val
);
1006 static int sdsDictKeyCompare(void *privdata
, const void *key1
,
1010 DICT_NOTUSED(privdata
);
1012 l1
= sdslen((sds
)key1
);
1013 l2
= sdslen((sds
)key2
);
1014 if (l1
!= l2
) return 0;
1015 return memcmp(key1
, key2
, l1
) == 0;
1018 static void dictRedisObjectDestructor(void *privdata
, void *val
)
1020 DICT_NOTUSED(privdata
);
1022 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
1026 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1029 const robj
*o1
= key1
, *o2
= key2
;
1030 return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1033 static unsigned int dictObjHash(const void *key
) {
1034 const robj
*o
= key
;
1035 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1038 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1041 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1044 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1045 o2
->encoding
== REDIS_ENCODING_INT
&&
1046 o1
->ptr
== o2
->ptr
) return 1;
1048 o1
= getDecodedObject(o1
);
1049 o2
= getDecodedObject(o2
);
1050 cmp
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1056 static unsigned int dictEncObjHash(const void *key
) {
1057 robj
*o
= (robj
*) key
;
1059 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1060 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1062 if (o
->encoding
== REDIS_ENCODING_INT
) {
1066 len
= snprintf(buf
,32,"%ld",(long)o
->ptr
);
1067 return dictGenHashFunction((unsigned char*)buf
, len
);
1071 o
= getDecodedObject(o
);
1072 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1079 /* Sets type and expires */
1080 static dictType setDictType
= {
1081 dictEncObjHash
, /* hash function */
1084 dictEncObjKeyCompare
, /* key compare */
1085 dictRedisObjectDestructor
, /* key destructor */
1086 NULL
/* val destructor */
1089 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1090 static dictType zsetDictType
= {
1091 dictEncObjHash
, /* hash function */
1094 dictEncObjKeyCompare
, /* key compare */
1095 dictRedisObjectDestructor
, /* key destructor */
1096 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1100 static dictType dbDictType
= {
1101 dictObjHash
, /* hash function */
1104 dictObjKeyCompare
, /* key compare */
1105 dictRedisObjectDestructor
, /* key destructor */
1106 dictRedisObjectDestructor
/* val destructor */
1110 static dictType keyptrDictType
= {
1111 dictObjHash
, /* hash function */
1114 dictObjKeyCompare
, /* key compare */
1115 dictRedisObjectDestructor
, /* key destructor */
1116 NULL
/* val destructor */
1119 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1120 static dictType hashDictType
= {
1121 dictEncObjHash
, /* hash function */
1124 dictEncObjKeyCompare
, /* key compare */
1125 dictRedisObjectDestructor
, /* key destructor */
1126 dictRedisObjectDestructor
/* val destructor */
1129 /* Keylist hash table type has unencoded redis objects as keys and
1130 * lists as values. It's used for blocking operations (BLPOP) and to
1131 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1132 static dictType keylistDictType
= {
1133 dictObjHash
, /* hash function */
1136 dictObjKeyCompare
, /* key compare */
1137 dictRedisObjectDestructor
, /* key destructor */
1138 dictListDestructor
/* val destructor */
1141 static void version();
1143 /* ========================= Random utility functions ======================= */
1145 /* Redis generally does not try to recover from out of memory conditions
1146 * when allocating objects or strings, it is not clear if it will be possible
1147 * to report this condition to the client since the networking layer itself
1148 * is based on heap allocation for send buffers, so we simply abort.
1149 * At least the code will be simpler to read... */
1150 static void oom(const char *msg
) {
1151 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1156 /* ====================== Redis server networking stuff ===================== */
1157 static void closeTimedoutClients(void) {
1160 time_t now
= time(NULL
);
1163 listRewind(server
.clients
,&li
);
1164 while ((ln
= listNext(&li
)) != NULL
) {
1165 c
= listNodeValue(ln
);
1166 if (server
.maxidletime
&&
1167 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1168 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1169 dictSize(c
->pubsub_channels
) == 0 && /* no timeout for pubsub */
1170 listLength(c
->pubsub_patterns
) == 0 &&
1171 (now
- c
->lastinteraction
> server
.maxidletime
))
1173 redisLog(REDIS_VERBOSE
,"Closing idle client");
1175 } else if (c
->flags
& REDIS_BLOCKED
) {
1176 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1177 addReply(c
,shared
.nullmultibulk
);
1178 unblockClientWaitingData(c
);
1184 static int htNeedsResize(dict
*dict
) {
1185 long long size
, used
;
1187 size
= dictSlots(dict
);
1188 used
= dictSize(dict
);
1189 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1190 (used
*100/size
< REDIS_HT_MINFILL
));
1193 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1194 * we resize the hash table to save memory */
1195 static void tryResizeHashTables(void) {
1198 for (j
= 0; j
< server
.dbnum
; j
++) {
1199 if (htNeedsResize(server
.db
[j
].dict
)) {
1200 redisLog(REDIS_VERBOSE
,"The hash table %d is too sparse, resize it...",j
);
1201 dictResize(server
.db
[j
].dict
);
1202 redisLog(REDIS_VERBOSE
,"Hash table %d resized.",j
);
1204 if (htNeedsResize(server
.db
[j
].expires
))
1205 dictResize(server
.db
[j
].expires
);
1209 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1210 void backgroundSaveDoneHandler(int statloc
) {
1211 int exitcode
= WEXITSTATUS(statloc
);
1212 int bysignal
= WIFSIGNALED(statloc
);
1214 if (!bysignal
&& exitcode
== 0) {
1215 redisLog(REDIS_NOTICE
,
1216 "Background saving terminated with success");
1218 server
.lastsave
= time(NULL
);
1219 } else if (!bysignal
&& exitcode
!= 0) {
1220 redisLog(REDIS_WARNING
, "Background saving error");
1222 redisLog(REDIS_WARNING
,
1223 "Background saving terminated by signal %d", WTERMSIG(statloc
));
1224 rdbRemoveTempFile(server
.bgsavechildpid
);
1226 server
.bgsavechildpid
= -1;
1227 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1228 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1229 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1232 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1234 void backgroundRewriteDoneHandler(int statloc
) {
1235 int exitcode
= WEXITSTATUS(statloc
);
1236 int bysignal
= WIFSIGNALED(statloc
);
1238 if (!bysignal
&& exitcode
== 0) {
1242 redisLog(REDIS_NOTICE
,
1243 "Background append only file rewriting terminated with success");
1244 /* Now it's time to flush the differences accumulated by the parent */
1245 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1246 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1248 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1251 /* Flush our data... */
1252 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1253 (signed) sdslen(server
.bgrewritebuf
)) {
1254 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1258 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1259 /* Now our work is to rename the temp file into the stable file. And
1260 * switch the file descriptor used by the server for append only. */
1261 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1262 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1266 /* Mission completed... almost */
1267 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1268 if (server
.appendfd
!= -1) {
1269 /* If append only is actually enabled... */
1270 close(server
.appendfd
);
1271 server
.appendfd
= fd
;
1273 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1274 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1276 /* If append only is disabled we just generate a dump in this
1277 * format. Why not? */
1280 } else if (!bysignal
&& exitcode
!= 0) {
1281 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1283 redisLog(REDIS_WARNING
,
1284 "Background append only file rewriting terminated by signal %d",
1288 sdsfree(server
.bgrewritebuf
);
1289 server
.bgrewritebuf
= sdsempty();
1290 aofRemoveTempFile(server
.bgrewritechildpid
);
1291 server
.bgrewritechildpid
= -1;
1294 /* This function is called once a background process of some kind terminates,
1295 * as we want to avoid resizing the hash tables when there is a child in order
1296 * to play well with copy-on-write (otherwise when a resize happens lots of
1297 * memory pages are copied). The goal of this function is to update the ability
1298 * for dict.c to resize the hash tables accordingly to the fact we have o not
1299 * running childs. */
1300 static void updateDictResizePolicy(void) {
1301 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1)
1304 dictDisableResize();
1307 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1308 int j
, loops
= server
.cronloops
++;
1309 REDIS_NOTUSED(eventLoop
);
1311 REDIS_NOTUSED(clientData
);
1313 /* We take a cached value of the unix time in the global state because
1314 * with virtual memory and aging there is to store the current time
1315 * in objects at every object access, and accuracy is not needed.
1316 * To access a global var is faster than calling time(NULL) */
1317 server
.unixtime
= time(NULL
);
1319 /* Show some info about non-empty databases */
1320 for (j
= 0; j
< server
.dbnum
; j
++) {
1321 long long size
, used
, vkeys
;
1323 size
= dictSlots(server
.db
[j
].dict
);
1324 used
= dictSize(server
.db
[j
].dict
);
1325 vkeys
= dictSize(server
.db
[j
].expires
);
1326 if (!(loops
% 50) && (used
|| vkeys
)) {
1327 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1328 /* dictPrintStats(server.dict); */
1332 /* We don't want to resize the hash tables while a bacground saving
1333 * is in progress: the saving child is created using fork() that is
1334 * implemented with a copy-on-write semantic in most modern systems, so
1335 * if we resize the HT while there is the saving child at work actually
1336 * a lot of memory movements in the parent will cause a lot of pages
1338 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1 &&
1341 tryResizeHashTables();
1344 /* Show information about connected clients */
1345 if (!(loops
% 50)) {
1346 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use",
1347 listLength(server
.clients
)-listLength(server
.slaves
),
1348 listLength(server
.slaves
),
1349 zmalloc_used_memory());
1352 /* Close connections of timedout clients */
1353 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1354 closeTimedoutClients();
1356 /* Check if a background saving or AOF rewrite in progress terminated */
1357 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1361 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1362 if (pid
== server
.bgsavechildpid
) {
1363 backgroundSaveDoneHandler(statloc
);
1365 backgroundRewriteDoneHandler(statloc
);
1367 updateDictResizePolicy();
1370 /* If there is not a background saving in progress check if
1371 * we have to save now */
1372 time_t now
= time(NULL
);
1373 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1374 struct saveparam
*sp
= server
.saveparams
+j
;
1376 if (server
.dirty
>= sp
->changes
&&
1377 now
-server
.lastsave
> sp
->seconds
) {
1378 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1379 sp
->changes
, sp
->seconds
);
1380 rdbSaveBackground(server
.dbfilename
);
1386 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1387 * will use few CPU cycles if there are few expiring keys, otherwise
1388 * it will get more aggressive to avoid that too much memory is used by
1389 * keys that can be removed from the keyspace. */
1390 for (j
= 0; j
< server
.dbnum
; j
++) {
1392 redisDb
*db
= server
.db
+j
;
1394 /* Continue to expire if at the end of the cycle more than 25%
1395 * of the keys were expired. */
1397 long num
= dictSize(db
->expires
);
1398 time_t now
= time(NULL
);
1401 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1402 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1407 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1408 t
= (time_t) dictGetEntryVal(de
);
1410 deleteKey(db
,dictGetEntryKey(de
));
1412 server
.stat_expiredkeys
++;
1415 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1418 /* Swap a few keys on disk if we are over the memory limit and VM
1419 * is enbled. Try to free objects from the free list first. */
1420 if (vmCanSwapOut()) {
1421 while (server
.vm_enabled
&& zmalloc_used_memory() >
1422 server
.vm_max_memory
)
1426 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1427 retval
= (server
.vm_max_threads
== 0) ?
1428 vmSwapOneObjectBlocking() :
1429 vmSwapOneObjectThreaded();
1430 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1431 zmalloc_used_memory() >
1432 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1434 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1436 /* Note that when using threade I/O we free just one object,
1437 * because anyway when the I/O thread in charge to swap this
1438 * object out will finish, the handler of completed jobs
1439 * will try to swap more objects if we are still out of memory. */
1440 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1444 /* Check if we should connect to a MASTER */
1445 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1446 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1447 if (syncWithMaster() == REDIS_OK
) {
1448 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1454 /* This function gets called every time Redis is entering the
1455 * main loop of the event driven library, that is, before to sleep
1456 * for ready file descriptors. */
1457 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1458 REDIS_NOTUSED(eventLoop
);
1460 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1464 listRewind(server
.io_ready_clients
,&li
);
1465 while((ln
= listNext(&li
))) {
1466 redisClient
*c
= ln
->value
;
1467 struct redisCommand
*cmd
;
1469 /* Resume the client. */
1470 listDelNode(server
.io_ready_clients
,ln
);
1471 c
->flags
&= (~REDIS_IO_WAIT
);
1472 server
.vm_blocked_clients
--;
1473 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1474 readQueryFromClient
, c
);
1475 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1476 assert(cmd
!= NULL
);
1479 /* There may be more data to process in the input buffer. */
1480 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1481 processInputBuffer(c
);
1486 static void createSharedObjects(void) {
1487 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1488 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1489 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1490 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1491 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1492 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1493 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1494 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1495 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1496 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1497 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1498 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1499 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1500 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1501 "-ERR no such key\r\n"));
1502 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1503 "-ERR syntax error\r\n"));
1504 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1505 "-ERR source and destination objects are the same\r\n"));
1506 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1507 "-ERR index out of range\r\n"));
1508 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1509 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1510 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1511 shared
.select0
= createStringObject("select 0\r\n",10);
1512 shared
.select1
= createStringObject("select 1\r\n",10);
1513 shared
.select2
= createStringObject("select 2\r\n",10);
1514 shared
.select3
= createStringObject("select 3\r\n",10);
1515 shared
.select4
= createStringObject("select 4\r\n",10);
1516 shared
.select5
= createStringObject("select 5\r\n",10);
1517 shared
.select6
= createStringObject("select 6\r\n",10);
1518 shared
.select7
= createStringObject("select 7\r\n",10);
1519 shared
.select8
= createStringObject("select 8\r\n",10);
1520 shared
.select9
= createStringObject("select 9\r\n",10);
1521 shared
.messagebulk
= createStringObject("$7\r\nmessage\r\n",13);
1522 shared
.subscribebulk
= createStringObject("$9\r\nsubscribe\r\n",15);
1523 shared
.unsubscribebulk
= createStringObject("$11\r\nunsubscribe\r\n",18);
1524 shared
.psubscribebulk
= createStringObject("$10\r\npsubscribe\r\n",17);
1525 shared
.punsubscribebulk
= createStringObject("$12\r\npunsubscribe\r\n",19);
1526 shared
.mbulk3
= createStringObject("*3\r\n",4);
1529 static void appendServerSaveParams(time_t seconds
, int changes
) {
1530 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1531 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1532 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1533 server
.saveparamslen
++;
1536 static void resetServerSaveParams() {
1537 zfree(server
.saveparams
);
1538 server
.saveparams
= NULL
;
1539 server
.saveparamslen
= 0;
1542 static void initServerConfig() {
1543 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1544 server
.port
= REDIS_SERVERPORT
;
1545 server
.verbosity
= REDIS_VERBOSE
;
1546 server
.maxidletime
= REDIS_MAXIDLETIME
;
1547 server
.saveparams
= NULL
;
1548 server
.logfile
= NULL
; /* NULL = log on standard output */
1549 server
.bindaddr
= NULL
;
1550 server
.glueoutputbuf
= 1;
1551 server
.daemonize
= 0;
1552 server
.appendonly
= 0;
1553 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1554 server
.lastfsync
= time(NULL
);
1555 server
.appendfd
= -1;
1556 server
.appendseldb
= -1; /* Make sure the first time will not match */
1557 server
.pidfile
= zstrdup("/var/run/redis.pid");
1558 server
.dbfilename
= zstrdup("dump.rdb");
1559 server
.appendfilename
= zstrdup("appendonly.aof");
1560 server
.requirepass
= NULL
;
1561 server
.shareobjects
= 0;
1562 server
.rdbcompression
= 1;
1563 server
.maxclients
= 0;
1564 server
.blpop_blocked_clients
= 0;
1565 server
.maxmemory
= 0;
1566 server
.vm_enabled
= 0;
1567 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1568 server
.vm_page_size
= 256; /* 256 bytes per page */
1569 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1570 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1571 server
.vm_max_threads
= 4;
1572 server
.vm_blocked_clients
= 0;
1573 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1574 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1576 resetServerSaveParams();
1578 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1579 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1580 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1581 /* Replication related */
1583 server
.masterauth
= NULL
;
1584 server
.masterhost
= NULL
;
1585 server
.masterport
= 6379;
1586 server
.master
= NULL
;
1587 server
.replstate
= REDIS_REPL_NONE
;
1589 /* Double constants initialization */
1591 R_PosInf
= 1.0/R_Zero
;
1592 R_NegInf
= -1.0/R_Zero
;
1593 R_Nan
= R_Zero
/R_Zero
;
1596 static void initServer() {
1599 signal(SIGHUP
, SIG_IGN
);
1600 signal(SIGPIPE
, SIG_IGN
);
1601 setupSigSegvAction();
1603 server
.devnull
= fopen("/dev/null","w");
1604 if (server
.devnull
== NULL
) {
1605 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1608 server
.clients
= listCreate();
1609 server
.slaves
= listCreate();
1610 server
.monitors
= listCreate();
1611 server
.objfreelist
= listCreate();
1612 createSharedObjects();
1613 server
.el
= aeCreateEventLoop();
1614 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1615 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1616 if (server
.fd
== -1) {
1617 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1620 for (j
= 0; j
< server
.dbnum
; j
++) {
1621 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1622 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1623 server
.db
[j
].blockingkeys
= dictCreate(&keylistDictType
,NULL
);
1624 if (server
.vm_enabled
)
1625 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1626 server
.db
[j
].id
= j
;
1628 server
.pubsub_channels
= dictCreate(&keylistDictType
,NULL
);
1629 server
.pubsub_patterns
= listCreate();
1630 listSetFreeMethod(server
.pubsub_patterns
,freePubsubPattern
);
1631 listSetMatchMethod(server
.pubsub_patterns
,listMatchPubsubPattern
);
1632 server
.cronloops
= 0;
1633 server
.bgsavechildpid
= -1;
1634 server
.bgrewritechildpid
= -1;
1635 server
.bgrewritebuf
= sdsempty();
1636 server
.lastsave
= time(NULL
);
1638 server
.stat_numcommands
= 0;
1639 server
.stat_numconnections
= 0;
1640 server
.stat_expiredkeys
= 0;
1641 server
.stat_starttime
= time(NULL
);
1642 server
.unixtime
= time(NULL
);
1643 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1644 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1645 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1647 if (server
.appendonly
) {
1648 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1649 if (server
.appendfd
== -1) {
1650 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1656 if (server
.vm_enabled
) vmInit();
1659 /* Empty the whole database */
1660 static long long emptyDb() {
1662 long long removed
= 0;
1664 for (j
= 0; j
< server
.dbnum
; j
++) {
1665 removed
+= dictSize(server
.db
[j
].dict
);
1666 dictEmpty(server
.db
[j
].dict
);
1667 dictEmpty(server
.db
[j
].expires
);
1672 static int yesnotoi(char *s
) {
1673 if (!strcasecmp(s
,"yes")) return 1;
1674 else if (!strcasecmp(s
,"no")) return 0;
1678 /* I agree, this is a very rudimental way to load a configuration...
1679 will improve later if the config gets more complex */
1680 static void loadServerConfig(char *filename
) {
1682 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1685 char *errormsg
= "Fatal error, can't open config file '%s'";
1686 char *errorbuf
= zmalloc(sizeof(char)*(strlen(errormsg
)+strlen(filename
)));
1687 sprintf(errorbuf
, errormsg
, filename
);
1689 if (filename
[0] == '-' && filename
[1] == '\0')
1692 if ((fp
= fopen(filename
,"r")) == NULL
) {
1693 redisLog(REDIS_WARNING
, errorbuf
);
1698 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1704 line
= sdstrim(line
," \t\r\n");
1706 /* Skip comments and blank lines*/
1707 if (line
[0] == '#' || line
[0] == '\0') {
1712 /* Split into arguments */
1713 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1714 sdstolower(argv
[0]);
1716 /* Execute config directives */
1717 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1718 server
.maxidletime
= atoi(argv
[1]);
1719 if (server
.maxidletime
< 0) {
1720 err
= "Invalid timeout value"; goto loaderr
;
1722 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1723 server
.port
= atoi(argv
[1]);
1724 if (server
.port
< 1 || server
.port
> 65535) {
1725 err
= "Invalid port"; goto loaderr
;
1727 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1728 server
.bindaddr
= zstrdup(argv
[1]);
1729 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1730 int seconds
= atoi(argv
[1]);
1731 int changes
= atoi(argv
[2]);
1732 if (seconds
< 1 || changes
< 0) {
1733 err
= "Invalid save parameters"; goto loaderr
;
1735 appendServerSaveParams(seconds
,changes
);
1736 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1737 if (chdir(argv
[1]) == -1) {
1738 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1739 argv
[1], strerror(errno
));
1742 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1743 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1744 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1745 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1746 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1748 err
= "Invalid log level. Must be one of debug, notice, warning";
1751 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1754 server
.logfile
= zstrdup(argv
[1]);
1755 if (!strcasecmp(server
.logfile
,"stdout")) {
1756 zfree(server
.logfile
);
1757 server
.logfile
= NULL
;
1759 if (server
.logfile
) {
1760 /* Test if we are able to open the file. The server will not
1761 * be able to abort just for this problem later... */
1762 logfp
= fopen(server
.logfile
,"a");
1763 if (logfp
== NULL
) {
1764 err
= sdscatprintf(sdsempty(),
1765 "Can't open the log file: %s", strerror(errno
));
1770 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1771 server
.dbnum
= atoi(argv
[1]);
1772 if (server
.dbnum
< 1) {
1773 err
= "Invalid number of databases"; goto loaderr
;
1775 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1776 loadServerConfig(argv
[1]);
1777 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1778 server
.maxclients
= atoi(argv
[1]);
1779 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1780 server
.maxmemory
= strtoll(argv
[1], NULL
, 10);
1781 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1782 server
.masterhost
= sdsnew(argv
[1]);
1783 server
.masterport
= atoi(argv
[2]);
1784 server
.replstate
= REDIS_REPL_CONNECT
;
1785 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1786 server
.masterauth
= zstrdup(argv
[1]);
1787 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1788 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1789 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1791 } else if (!strcasecmp(argv
[0],"shareobjects") && argc
== 2) {
1792 if ((server
.shareobjects
= yesnotoi(argv
[1])) == -1) {
1793 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1795 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1796 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1797 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1799 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1800 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1801 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1803 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1804 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1805 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1807 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
1808 if (!strcasecmp(argv
[1],"no")) {
1809 server
.appendfsync
= APPENDFSYNC_NO
;
1810 } else if (!strcasecmp(argv
[1],"always")) {
1811 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1812 } else if (!strcasecmp(argv
[1],"everysec")) {
1813 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1815 err
= "argument must be 'no', 'always' or 'everysec'";
1818 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
1819 server
.requirepass
= zstrdup(argv
[1]);
1820 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
1821 zfree(server
.pidfile
);
1822 server
.pidfile
= zstrdup(argv
[1]);
1823 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
1824 zfree(server
.dbfilename
);
1825 server
.dbfilename
= zstrdup(argv
[1]);
1826 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
1827 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
1828 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1830 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
1831 zfree(server
.vm_swap_file
);
1832 server
.vm_swap_file
= zstrdup(argv
[1]);
1833 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
1834 server
.vm_max_memory
= strtoll(argv
[1], NULL
, 10);
1835 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
1836 server
.vm_page_size
= strtoll(argv
[1], NULL
, 10);
1837 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
1838 server
.vm_pages
= strtoll(argv
[1], NULL
, 10);
1839 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1840 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1841 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
1842 server
.hash_max_zipmap_entries
= strtol(argv
[1], NULL
, 10);
1843 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
1844 server
.hash_max_zipmap_value
= strtol(argv
[1], NULL
, 10);
1845 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1846 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1848 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
1850 for (j
= 0; j
< argc
; j
++)
1855 if (fp
!= stdin
) fclose(fp
);
1859 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
1860 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
1861 fprintf(stderr
, ">>> '%s'\n", line
);
1862 fprintf(stderr
, "%s\n", err
);
1866 static void freeClientArgv(redisClient
*c
) {
1869 for (j
= 0; j
< c
->argc
; j
++)
1870 decrRefCount(c
->argv
[j
]);
1871 for (j
= 0; j
< c
->mbargc
; j
++)
1872 decrRefCount(c
->mbargv
[j
]);
1877 static void freeClient(redisClient
*c
) {
1880 /* Note that if the client we are freeing is blocked into a blocking
1881 * call, we have to set querybuf to NULL *before* to call
1882 * unblockClientWaitingData() to avoid processInputBuffer() will get
1883 * called. Also it is important to remove the file events after
1884 * this, because this call adds the READABLE event. */
1885 sdsfree(c
->querybuf
);
1887 if (c
->flags
& REDIS_BLOCKED
)
1888 unblockClientWaitingData(c
);
1890 /* Unsubscribe from all the pubsub channels */
1891 pubsubUnsubscribeAllChannels(c
,0);
1892 pubsubUnsubscribeAllPatterns(c
,0);
1893 dictRelease(c
->pubsub_channels
);
1894 listRelease(c
->pubsub_patterns
);
1895 /* Obvious cleanup */
1896 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
1897 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
1898 listRelease(c
->reply
);
1901 /* Remove from the list of clients */
1902 ln
= listSearchKey(server
.clients
,c
);
1903 redisAssert(ln
!= NULL
);
1904 listDelNode(server
.clients
,ln
);
1905 /* Remove from the list of clients waiting for swapped keys */
1906 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
1907 ln
= listSearchKey(server
.io_ready_clients
,c
);
1909 listDelNode(server
.io_ready_clients
,ln
);
1910 server
.vm_blocked_clients
--;
1913 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
1914 ln
= listFirst(c
->io_keys
);
1915 dontWaitForSwappedKey(c
,ln
->value
);
1917 listRelease(c
->io_keys
);
1918 /* Master/slave cleanup */
1919 if (c
->flags
& REDIS_SLAVE
) {
1920 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
1922 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
1923 ln
= listSearchKey(l
,c
);
1924 redisAssert(ln
!= NULL
);
1927 if (c
->flags
& REDIS_MASTER
) {
1928 server
.master
= NULL
;
1929 server
.replstate
= REDIS_REPL_CONNECT
;
1931 /* Release memory */
1934 freeClientMultiState(c
);
1938 #define GLUEREPLY_UP_TO (1024)
1939 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
1941 char buf
[GLUEREPLY_UP_TO
];
1946 listRewind(c
->reply
,&li
);
1947 while((ln
= listNext(&li
))) {
1951 objlen
= sdslen(o
->ptr
);
1952 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
1953 memcpy(buf
+copylen
,o
->ptr
,objlen
);
1955 listDelNode(c
->reply
,ln
);
1957 if (copylen
== 0) return;
1961 /* Now the output buffer is empty, add the new single element */
1962 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
1963 listAddNodeHead(c
->reply
,o
);
1966 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
1967 redisClient
*c
= privdata
;
1968 int nwritten
= 0, totwritten
= 0, objlen
;
1971 REDIS_NOTUSED(mask
);
1973 /* Use writev() if we have enough buffers to send */
1974 if (!server
.glueoutputbuf
&&
1975 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
1976 !(c
->flags
& REDIS_MASTER
))
1978 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
1982 while(listLength(c
->reply
)) {
1983 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
1984 glueReplyBuffersIfNeeded(c
);
1986 o
= listNodeValue(listFirst(c
->reply
));
1987 objlen
= sdslen(o
->ptr
);
1990 listDelNode(c
->reply
,listFirst(c
->reply
));
1994 if (c
->flags
& REDIS_MASTER
) {
1995 /* Don't reply to a master */
1996 nwritten
= objlen
- c
->sentlen
;
1998 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
1999 if (nwritten
<= 0) break;
2001 c
->sentlen
+= nwritten
;
2002 totwritten
+= nwritten
;
2003 /* If we fully sent the object on head go to the next one */
2004 if (c
->sentlen
== objlen
) {
2005 listDelNode(c
->reply
,listFirst(c
->reply
));
2008 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
2009 * bytes, in a single threaded server it's a good idea to serve
2010 * other clients as well, even if a very large request comes from
2011 * super fast link that is always able to accept data (in real world
2012 * scenario think about 'KEYS *' against the loopback interfae) */
2013 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
2015 if (nwritten
== -1) {
2016 if (errno
== EAGAIN
) {
2019 redisLog(REDIS_VERBOSE
,
2020 "Error writing to client: %s", strerror(errno
));
2025 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
2026 if (listLength(c
->reply
) == 0) {
2028 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2032 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
2034 redisClient
*c
= privdata
;
2035 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
2037 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
2038 int offset
, ion
= 0;
2040 REDIS_NOTUSED(mask
);
2043 while (listLength(c
->reply
)) {
2044 offset
= c
->sentlen
;
2048 /* fill-in the iov[] array */
2049 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
2050 o
= listNodeValue(node
);
2051 objlen
= sdslen(o
->ptr
);
2053 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
2056 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2057 break; /* no more iovecs */
2059 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2060 iov
[ion
].iov_len
= objlen
- offset
;
2061 willwrite
+= objlen
- offset
;
2062 offset
= 0; /* just for the first item */
2069 /* write all collected blocks at once */
2070 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2071 if (errno
!= EAGAIN
) {
2072 redisLog(REDIS_VERBOSE
,
2073 "Error writing to client: %s", strerror(errno
));
2080 totwritten
+= nwritten
;
2081 offset
= c
->sentlen
;
2083 /* remove written robjs from c->reply */
2084 while (nwritten
&& listLength(c
->reply
)) {
2085 o
= listNodeValue(listFirst(c
->reply
));
2086 objlen
= sdslen(o
->ptr
);
2088 if(nwritten
>= objlen
- offset
) {
2089 listDelNode(c
->reply
, listFirst(c
->reply
));
2090 nwritten
-= objlen
- offset
;
2094 c
->sentlen
+= nwritten
;
2102 c
->lastinteraction
= time(NULL
);
2104 if (listLength(c
->reply
) == 0) {
2106 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2110 static struct redisCommand
*lookupCommand(char *name
) {
2112 while(cmdTable
[j
].name
!= NULL
) {
2113 if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
];
2119 /* resetClient prepare the client to process the next command */
2120 static void resetClient(redisClient
*c
) {
2126 /* Call() is the core of Redis execution of a command */
2127 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2130 dirty
= server
.dirty
;
2132 dirty
= server
.dirty
-dirty
;
2134 if (server
.appendonly
&& dirty
)
2135 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2136 if ((dirty
|| cmd
->flags
& REDIS_CMD_FORCE_REPLICATION
) &&
2137 listLength(server
.slaves
))
2138 replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
);
2139 if (listLength(server
.monitors
))
2140 replicationFeedSlaves(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
);
2141 server
.stat_numcommands
++;
2144 /* If this function gets called we already read a whole
2145 * command, argments are in the client argv/argc fields.
2146 * processCommand() execute the command or prepare the
2147 * server for a bulk read from the client.
2149 * If 1 is returned the client is still alive and valid and
2150 * and other operations can be performed by the caller. Otherwise
2151 * if 0 is returned the client was destroied (i.e. after QUIT). */
2152 static int processCommand(redisClient
*c
) {
2153 struct redisCommand
*cmd
;
2155 /* Free some memory if needed (maxmemory setting) */
2156 if (server
.maxmemory
) freeMemoryIfNeeded();
2158 /* Handle the multi bulk command type. This is an alternative protocol
2159 * supported by Redis in order to receive commands that are composed of
2160 * multiple binary-safe "bulk" arguments. The latency of processing is
2161 * a bit higher but this allows things like multi-sets, so if this
2162 * protocol is used only for MSET and similar commands this is a big win. */
2163 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2164 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2165 if (c
->multibulk
<= 0) {
2169 decrRefCount(c
->argv
[c
->argc
-1]);
2173 } else if (c
->multibulk
) {
2174 if (c
->bulklen
== -1) {
2175 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2176 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2180 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2181 decrRefCount(c
->argv
[0]);
2182 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2184 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2189 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2193 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2194 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2198 if (c
->multibulk
== 0) {
2202 /* Here we need to swap the multi-bulk argc/argv with the
2203 * normal argc/argv of the client structure. */
2205 c
->argv
= c
->mbargv
;
2206 c
->mbargv
= auxargv
;
2209 c
->argc
= c
->mbargc
;
2210 c
->mbargc
= auxargc
;
2212 /* We need to set bulklen to something different than -1
2213 * in order for the code below to process the command without
2214 * to try to read the last argument of a bulk command as
2215 * a special argument. */
2217 /* continue below and process the command */
2224 /* -- end of multi bulk commands processing -- */
2226 /* The QUIT command is handled as a special case. Normal command
2227 * procs are unable to close the client connection safely */
2228 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2233 /* Now lookup the command and check ASAP about trivial error conditions
2234 * such wrong arity, bad command name and so forth. */
2235 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2238 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2239 (char*)c
->argv
[0]->ptr
));
2242 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2243 (c
->argc
< -cmd
->arity
)) {
2245 sdscatprintf(sdsempty(),
2246 "-ERR wrong number of arguments for '%s' command\r\n",
2250 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2251 /* This is a bulk command, we have to read the last argument yet. */
2252 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2254 decrRefCount(c
->argv
[c
->argc
-1]);
2255 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2257 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2262 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2263 /* It is possible that the bulk read is already in the
2264 * buffer. Check this condition and handle it accordingly.
2265 * This is just a fast path, alternative to call processInputBuffer().
2266 * It's a good idea since the code is small and this condition
2267 * happens most of the times. */
2268 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2269 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2271 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2273 /* Otherwise return... there is to read the last argument
2274 * from the socket. */
2278 /* Let's try to encode the bulk object to save space. */
2279 if (cmd
->flags
& REDIS_CMD_BULK
)
2280 tryObjectEncoding(c
->argv
[c
->argc
-1]);
2282 /* Check if the user is authenticated */
2283 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2284 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2289 /* Handle the maxmemory directive */
2290 if (server
.maxmemory
&& (cmd
->flags
& REDIS_CMD_DENYOOM
) &&
2291 zmalloc_used_memory() > server
.maxmemory
)
2293 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2298 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
2299 if (dictSize(c
->pubsub_channels
) > 0 &&
2300 cmd
->proc
!= subscribeCommand
&& cmd
->proc
!= unsubscribeCommand
&&
2301 cmd
->proc
!= psubscribeCommand
&& cmd
->proc
!= punsubscribeCommand
) {
2302 addReplySds(c
,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n"));
2307 /* Exec the command */
2308 if (c
->flags
& REDIS_MULTI
&& cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
) {
2309 queueMultiCommand(c
,cmd
);
2310 addReply(c
,shared
.queued
);
2312 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2313 blockClientOnSwappedKeys(cmd
,c
)) return 1;
2317 /* Prepare the client for the next command */
2322 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
) {
2327 /* We need 1+(ARGS*3) objects since commands are using the new protocol
2328 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
2329 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
2330 robj
*static_outv
[REDIS_STATIC_ARGS
*3+1];
2333 if (argc
<= REDIS_STATIC_ARGS
) {
2336 outv
= zmalloc(sizeof(robj
*)*(argc
*3+1));
2339 lenobj
= createObject(REDIS_STRING
,
2340 sdscatprintf(sdsempty(), "*%d\r\n", argc
));
2341 lenobj
->refcount
= 0;
2342 outv
[outc
++] = lenobj
;
2343 for (j
= 0; j
< argc
; j
++) {
2344 lenobj
= createObject(REDIS_STRING
,
2345 sdscatprintf(sdsempty(),"$%lu\r\n",
2346 (unsigned long) stringObjectLen(argv
[j
])));
2347 lenobj
->refcount
= 0;
2348 outv
[outc
++] = lenobj
;
2349 outv
[outc
++] = argv
[j
];
2350 outv
[outc
++] = shared
.crlf
;
2353 /* Increment all the refcounts at start and decrement at end in order to
2354 * be sure to free objects if there is no slave in a replication state
2355 * able to be feed with commands */
2356 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2357 listRewind(slaves
,&li
);
2358 while((ln
= listNext(&li
))) {
2359 redisClient
*slave
= ln
->value
;
2361 /* Don't feed slaves that are still waiting for BGSAVE to start */
2362 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2364 /* Feed all the other slaves, MONITORs and so on */
2365 if (slave
->slaveseldb
!= dictid
) {
2369 case 0: selectcmd
= shared
.select0
; break;
2370 case 1: selectcmd
= shared
.select1
; break;
2371 case 2: selectcmd
= shared
.select2
; break;
2372 case 3: selectcmd
= shared
.select3
; break;
2373 case 4: selectcmd
= shared
.select4
; break;
2374 case 5: selectcmd
= shared
.select5
; break;
2375 case 6: selectcmd
= shared
.select6
; break;
2376 case 7: selectcmd
= shared
.select7
; break;
2377 case 8: selectcmd
= shared
.select8
; break;
2378 case 9: selectcmd
= shared
.select9
; break;
2380 selectcmd
= createObject(REDIS_STRING
,
2381 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2382 selectcmd
->refcount
= 0;
2385 addReply(slave
,selectcmd
);
2386 slave
->slaveseldb
= dictid
;
2388 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2390 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2391 if (outv
!= static_outv
) zfree(outv
);
2394 static void processInputBuffer(redisClient
*c
) {
2396 /* Before to process the input buffer, make sure the client is not
2397 * waitig for a blocking operation such as BLPOP. Note that the first
2398 * iteration the client is never blocked, otherwise the processInputBuffer
2399 * would not be called at all, but after the execution of the first commands
2400 * in the input buffer the client may be blocked, and the "goto again"
2401 * will try to reiterate. The following line will make it return asap. */
2402 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2403 if (c
->bulklen
== -1) {
2404 /* Read the first line of the query */
2405 char *p
= strchr(c
->querybuf
,'\n');
2412 query
= c
->querybuf
;
2413 c
->querybuf
= sdsempty();
2414 querylen
= 1+(p
-(query
));
2415 if (sdslen(query
) > querylen
) {
2416 /* leave data after the first line of the query in the buffer */
2417 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2419 *p
= '\0'; /* remove "\n" */
2420 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2421 sdsupdatelen(query
);
2423 /* Now we can split the query in arguments */
2424 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2427 if (c
->argv
) zfree(c
->argv
);
2428 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2430 for (j
= 0; j
< argc
; j
++) {
2431 if (sdslen(argv
[j
])) {
2432 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2440 /* Execute the command. If the client is still valid
2441 * after processCommand() return and there is something
2442 * on the query buffer try to process the next command. */
2443 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2445 /* Nothing to process, argc == 0. Just process the query
2446 * buffer if it's not empty or return to the caller */
2447 if (sdslen(c
->querybuf
)) goto again
;
2450 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2451 redisLog(REDIS_VERBOSE
, "Client protocol error");
2456 /* Bulk read handling. Note that if we are at this point
2457 the client already sent a command terminated with a newline,
2458 we are reading the bulk data that is actually the last
2459 argument of the command. */
2460 int qbl
= sdslen(c
->querybuf
);
2462 if (c
->bulklen
<= qbl
) {
2463 /* Copy everything but the final CRLF as final argument */
2464 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2466 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2467 /* Process the command. If the client is still valid after
2468 * the processing and there is more data in the buffer
2469 * try to parse it. */
2470 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2476 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2477 redisClient
*c
= (redisClient
*) privdata
;
2478 char buf
[REDIS_IOBUF_LEN
];
2481 REDIS_NOTUSED(mask
);
2483 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2485 if (errno
== EAGAIN
) {
2488 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2492 } else if (nread
== 0) {
2493 redisLog(REDIS_VERBOSE
, "Client closed connection");
2498 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2499 c
->lastinteraction
= time(NULL
);
2503 processInputBuffer(c
);
2506 static int selectDb(redisClient
*c
, int id
) {
2507 if (id
< 0 || id
>= server
.dbnum
)
2509 c
->db
= &server
.db
[id
];
2513 static void *dupClientReplyValue(void *o
) {
2514 incrRefCount((robj
*)o
);
2518 static int listMatchObjects(void *a
, void *b
) {
2519 return compareStringObjects(a
,b
) == 0;
2522 static redisClient
*createClient(int fd
) {
2523 redisClient
*c
= zmalloc(sizeof(*c
));
2525 anetNonBlock(NULL
,fd
);
2526 anetTcpNoDelay(NULL
,fd
);
2527 if (!c
) return NULL
;
2530 c
->querybuf
= sdsempty();
2539 c
->lastinteraction
= time(NULL
);
2540 c
->authenticated
= 0;
2541 c
->replstate
= REDIS_REPL_NONE
;
2542 c
->reply
= listCreate();
2543 listSetFreeMethod(c
->reply
,decrRefCount
);
2544 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2545 c
->blockingkeys
= NULL
;
2546 c
->blockingkeysnum
= 0;
2547 c
->io_keys
= listCreate();
2548 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2549 c
->pubsub_channels
= dictCreate(&setDictType
,NULL
);
2550 c
->pubsub_patterns
= listCreate();
2551 listSetFreeMethod(c
->pubsub_patterns
,decrRefCount
);
2552 listSetMatchMethod(c
->pubsub_patterns
,listMatchObjects
);
2553 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2554 readQueryFromClient
, c
) == AE_ERR
) {
2558 listAddNodeTail(server
.clients
,c
);
2559 initClientMultiState(c
);
2563 static void addReply(redisClient
*c
, robj
*obj
) {
2564 if (listLength(c
->reply
) == 0 &&
2565 (c
->replstate
== REDIS_REPL_NONE
||
2566 c
->replstate
== REDIS_REPL_ONLINE
) &&
2567 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2568 sendReplyToClient
, c
) == AE_ERR
) return;
2570 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2571 obj
= dupStringObject(obj
);
2572 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2574 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2577 static void addReplySds(redisClient
*c
, sds s
) {
2578 robj
*o
= createObject(REDIS_STRING
,s
);
2583 static void addReplyDouble(redisClient
*c
, double d
) {
2586 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2587 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2588 (unsigned long) strlen(buf
),buf
));
2591 static void addReplyLong(redisClient
*c
, long l
) {
2596 addReply(c
,shared
.czero
);
2598 } else if (l
== 1) {
2599 addReply(c
,shared
.cone
);
2602 len
= snprintf(buf
,sizeof(buf
),":%ld\r\n",l
);
2603 addReplySds(c
,sdsnewlen(buf
,len
));
2606 static void addReplyLongLong(redisClient
*c
, long long ll
) {
2611 addReply(c
,shared
.czero
);
2613 } else if (ll
== 1) {
2614 addReply(c
,shared
.cone
);
2617 len
= snprintf(buf
,sizeof(buf
),":%lld\r\n",ll
);
2618 addReplySds(c
,sdsnewlen(buf
,len
));
2621 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2626 addReply(c
,shared
.czero
);
2628 } else if (ul
== 1) {
2629 addReply(c
,shared
.cone
);
2632 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2633 addReplySds(c
,sdsnewlen(buf
,len
));
2636 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2639 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2640 len
= sdslen(obj
->ptr
);
2642 long n
= (long)obj
->ptr
;
2644 /* Compute how many bytes will take this integer as a radix 10 string */
2650 while((n
= n
/10) != 0) {
2654 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",(unsigned long)len
));
2657 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2658 addReplyBulkLen(c
,obj
);
2660 addReply(c
,shared
.crlf
);
2663 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2664 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2666 addReply(c
,shared
.nullbulk
);
2668 robj
*o
= createStringObject(s
,strlen(s
));
2674 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2679 REDIS_NOTUSED(mask
);
2680 REDIS_NOTUSED(privdata
);
2682 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2683 if (cfd
== AE_ERR
) {
2684 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2687 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2688 if ((c
= createClient(cfd
)) == NULL
) {
2689 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2690 close(cfd
); /* May be already closed, just ingore errors */
2693 /* If maxclient directive is set and this is one client more... close the
2694 * connection. Note that we create the client instead to check before
2695 * for this condition, since now the socket is already set in nonblocking
2696 * mode and we can send an error for free using the Kernel I/O */
2697 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2698 char *err
= "-ERR max number of clients reached\r\n";
2700 /* That's a best effort error message, don't check write errors */
2701 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2702 /* Nothing to do, Just to avoid the warning... */
2707 server
.stat_numconnections
++;
2710 /* ======================= Redis objects implementation ===================== */
2712 static robj
*createObject(int type
, void *ptr
) {
2715 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2716 if (listLength(server
.objfreelist
)) {
2717 listNode
*head
= listFirst(server
.objfreelist
);
2718 o
= listNodeValue(head
);
2719 listDelNode(server
.objfreelist
,head
);
2720 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2722 if (server
.vm_enabled
) {
2723 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2724 o
= zmalloc(sizeof(*o
));
2726 o
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
));
2730 o
->encoding
= REDIS_ENCODING_RAW
;
2733 if (server
.vm_enabled
) {
2734 /* Note that this code may run in the context of an I/O thread
2735 * and accessing to server.unixtime in theory is an error
2736 * (no locks). But in practice this is safe, and even if we read
2737 * garbage Redis will not fail, as it's just a statistical info */
2738 o
->vm
.atime
= server
.unixtime
;
2739 o
->storage
= REDIS_VM_MEMORY
;
2744 static robj
*createStringObject(char *ptr
, size_t len
) {
2745 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
2748 static robj
*dupStringObject(robj
*o
) {
2749 assert(o
->encoding
== REDIS_ENCODING_RAW
);
2750 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
2753 static robj
*createListObject(void) {
2754 list
*l
= listCreate();
2756 listSetFreeMethod(l
,decrRefCount
);
2757 return createObject(REDIS_LIST
,l
);
2760 static robj
*createSetObject(void) {
2761 dict
*d
= dictCreate(&setDictType
,NULL
);
2762 return createObject(REDIS_SET
,d
);
2765 static robj
*createHashObject(void) {
2766 /* All the Hashes start as zipmaps. Will be automatically converted
2767 * into hash tables if there are enough elements or big elements
2769 unsigned char *zm
= zipmapNew();
2770 robj
*o
= createObject(REDIS_HASH
,zm
);
2771 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
2775 static robj
*createZsetObject(void) {
2776 zset
*zs
= zmalloc(sizeof(*zs
));
2778 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
2779 zs
->zsl
= zslCreate();
2780 return createObject(REDIS_ZSET
,zs
);
2783 static void freeStringObject(robj
*o
) {
2784 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2789 static void freeListObject(robj
*o
) {
2790 listRelease((list
*) o
->ptr
);
2793 static void freeSetObject(robj
*o
) {
2794 dictRelease((dict
*) o
->ptr
);
2797 static void freeZsetObject(robj
*o
) {
2800 dictRelease(zs
->dict
);
2805 static void freeHashObject(robj
*o
) {
2806 switch (o
->encoding
) {
2807 case REDIS_ENCODING_HT
:
2808 dictRelease((dict
*) o
->ptr
);
2810 case REDIS_ENCODING_ZIPMAP
:
2819 static void incrRefCount(robj
*o
) {
2823 static void decrRefCount(void *obj
) {
2826 /* Object is a key of a swapped out value, or in the process of being
2828 if (server
.vm_enabled
&&
2829 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
2831 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
);
2832 redisAssert(o
->type
== REDIS_STRING
);
2833 freeStringObject(o
);
2834 vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
);
2835 pthread_mutex_lock(&server
.obj_freelist_mutex
);
2836 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2837 !listAddNodeHead(server
.objfreelist
,o
))
2839 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2840 server
.vm_stats_swapped_objects
--;
2843 /* Object is in memory, or in the process of being swapped out. */
2844 if (--(o
->refcount
) == 0) {
2845 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
2846 vmCancelThreadedIOJob(obj
);
2848 case REDIS_STRING
: freeStringObject(o
); break;
2849 case REDIS_LIST
: freeListObject(o
); break;
2850 case REDIS_SET
: freeSetObject(o
); break;
2851 case REDIS_ZSET
: freeZsetObject(o
); break;
2852 case REDIS_HASH
: freeHashObject(o
); break;
2853 default: redisAssert(0); break;
2855 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2856 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2857 !listAddNodeHead(server
.objfreelist
,o
))
2859 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2863 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
2864 dictEntry
*de
= dictFind(db
->dict
,key
);
2866 robj
*key
= dictGetEntryKey(de
);
2867 robj
*val
= dictGetEntryVal(de
);
2869 if (server
.vm_enabled
) {
2870 if (key
->storage
== REDIS_VM_MEMORY
||
2871 key
->storage
== REDIS_VM_SWAPPING
)
2873 /* If we were swapping the object out, stop it, this key
2875 if (key
->storage
== REDIS_VM_SWAPPING
)
2876 vmCancelThreadedIOJob(key
);
2877 /* Update the access time of the key for the aging algorithm. */
2878 key
->vm
.atime
= server
.unixtime
;
2880 int notify
= (key
->storage
== REDIS_VM_LOADING
);
2882 /* Our value was swapped on disk. Bring it at home. */
2883 redisAssert(val
== NULL
);
2884 val
= vmLoadObject(key
);
2885 dictGetEntryVal(de
) = val
;
2887 /* Clients blocked by the VM subsystem may be waiting for
2889 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
2898 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
2899 expireIfNeeded(db
,key
);
2900 return lookupKey(db
,key
);
2903 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
2904 deleteIfVolatile(db
,key
);
2905 return lookupKey(db
,key
);
2908 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
2909 robj
*o
= lookupKeyRead(c
->db
, key
);
2910 if (!o
) addReply(c
,reply
);
2914 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
2915 robj
*o
= lookupKeyWrite(c
->db
, key
);
2916 if (!o
) addReply(c
,reply
);
2920 static int checkType(redisClient
*c
, robj
*o
, int type
) {
2921 if (o
->type
!= type
) {
2922 addReply(c
,shared
.wrongtypeerr
);
2928 static int deleteKey(redisDb
*db
, robj
*key
) {
2931 /* We need to protect key from destruction: after the first dictDelete()
2932 * it may happen that 'key' is no longer valid if we don't increment
2933 * it's count. This may happen when we get the object reference directly
2934 * from the hash table with dictRandomKey() or dict iterators */
2936 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
);
2937 retval
= dictDelete(db
->dict
,key
);
2940 return retval
== DICT_OK
;
2943 /* Check if the nul-terminated string 's' can be represented by a long
2944 * (that is, is a number that fits into long without any other space or
2945 * character before or after the digits).
2947 * If so, the function returns REDIS_OK and *longval is set to the value
2948 * of the number. Otherwise REDIS_ERR is returned */
2949 static int isStringRepresentableAsLong(sds s
, long *longval
) {
2950 char buf
[32], *endptr
;
2954 value
= strtol(s
, &endptr
, 10);
2955 if (endptr
[0] != '\0') return REDIS_ERR
;
2956 slen
= snprintf(buf
,32,"%ld",value
);
2958 /* If the number converted back into a string is not identical
2959 * then it's not possible to encode the string as integer */
2960 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
2961 if (longval
) *longval
= value
;
2965 /* Try to encode a string object in order to save space */
2966 static int tryObjectEncoding(robj
*o
) {
2970 if (o
->encoding
!= REDIS_ENCODING_RAW
)
2971 return REDIS_ERR
; /* Already encoded */
2973 /* It's not save to encode shared objects: shared objects can be shared
2974 * everywhere in the "object space" of Redis. Encoded objects can only
2975 * appear as "values" (and not, for instance, as keys) */
2976 if (o
->refcount
> 1) return REDIS_ERR
;
2978 /* Currently we try to encode only strings */
2979 redisAssert(o
->type
== REDIS_STRING
);
2981 /* Check if we can represent this string as a long integer */
2982 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return REDIS_ERR
;
2984 /* Ok, this object can be encoded */
2985 o
->encoding
= REDIS_ENCODING_INT
;
2987 o
->ptr
= (void*) value
;
2991 /* Get a decoded version of an encoded object (returned as a new object).
2992 * If the object is already raw-encoded just increment the ref count. */
2993 static robj
*getDecodedObject(robj
*o
) {
2996 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3000 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
3003 snprintf(buf
,32,"%ld",(long)o
->ptr
);
3004 dec
= createStringObject(buf
,strlen(buf
));
3007 redisAssert(1 != 1);
3011 /* Compare two string objects via strcmp() or alike.
3012 * Note that the objects may be integer-encoded. In such a case we
3013 * use snprintf() to get a string representation of the numbers on the stack
3014 * and compare the strings, it's much faster than calling getDecodedObject().
3016 * Important note: if objects are not integer encoded, but binary-safe strings,
3017 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
3019 static int compareStringObjects(robj
*a
, robj
*b
) {
3020 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
3021 char bufa
[128], bufb
[128], *astr
, *bstr
;
3024 if (a
== b
) return 0;
3025 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
3026 snprintf(bufa
,sizeof(bufa
),"%ld",(long) a
->ptr
);
3032 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
3033 snprintf(bufb
,sizeof(bufb
),"%ld",(long) b
->ptr
);
3039 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
3042 static size_t stringObjectLen(robj
*o
) {
3043 redisAssert(o
->type
== REDIS_STRING
);
3044 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3045 return sdslen(o
->ptr
);
3049 return snprintf(buf
,32,"%ld",(long)o
->ptr
);
3053 /*============================ RDB saving/loading =========================== */
3055 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3056 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3060 static int rdbSaveTime(FILE *fp
, time_t t
) {
3061 int32_t t32
= (int32_t) t
;
3062 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3066 /* check rdbLoadLen() comments for more info */
3067 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3068 unsigned char buf
[2];
3071 /* Save a 6 bit len */
3072 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3073 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3074 } else if (len
< (1<<14)) {
3075 /* Save a 14 bit len */
3076 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3078 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3080 /* Save a 32 bit len */
3081 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3082 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3084 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3089 /* String objects in the form "2391" "-100" without any space and with a
3090 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3091 * encoded as integers to save space */
3092 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3094 char *endptr
, buf
[32];
3096 /* Check if it's possible to encode this value as a number */
3097 value
= strtoll(s
, &endptr
, 10);
3098 if (endptr
[0] != '\0') return 0;
3099 snprintf(buf
,32,"%lld",value
);
3101 /* If the number converted back into a string is not identical
3102 * then it's not possible to encode the string as integer */
3103 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3105 /* Finally check if it fits in our ranges */
3106 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3107 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3108 enc
[1] = value
&0xFF;
3110 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3111 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3112 enc
[1] = value
&0xFF;
3113 enc
[2] = (value
>>8)&0xFF;
3115 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3116 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3117 enc
[1] = value
&0xFF;
3118 enc
[2] = (value
>>8)&0xFF;
3119 enc
[3] = (value
>>16)&0xFF;
3120 enc
[4] = (value
>>24)&0xFF;
3127 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3128 size_t comprlen
, outlen
;
3132 /* We require at least four bytes compression for this to be worth it */
3133 if (len
<= 4) return 0;
3135 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3136 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3137 if (comprlen
== 0) {
3141 /* Data compressed! Let's save it on disk */
3142 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3143 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3144 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3145 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3146 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3155 /* Save a string objet as [len][data] on disk. If the object is a string
3156 * representation of an integer value we try to safe it in a special form */
3157 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3160 /* Try integer encoding */
3162 unsigned char buf
[5];
3163 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3164 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3169 /* Try LZF compression - under 20 bytes it's unable to compress even
3170 * aaaaaaaaaaaaaaaaaa so skip it */
3171 if (server
.rdbcompression
&& len
> 20) {
3174 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3175 if (retval
== -1) return -1;
3176 if (retval
> 0) return 0;
3177 /* retval == 0 means data can't be compressed, save the old way */
3180 /* Store verbatim */
3181 if (rdbSaveLen(fp
,len
) == -1) return -1;
3182 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3186 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3187 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3190 /* Avoid incr/decr ref count business when possible.
3191 * This plays well with copy-on-write given that we are probably
3192 * in a child process (BGSAVE). Also this makes sure key objects
3193 * of swapped objects are not incRefCount-ed (an assert does not allow
3194 * this in order to avoid bugs) */
3195 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
3196 obj
= getDecodedObject(obj
);
3197 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3200 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3205 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3206 * 8 bit integer specifing the length of the representation.
3207 * This 8 bit integer has special values in order to specify the following
3213 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3214 unsigned char buf
[128];
3220 } else if (!isfinite(val
)) {
3222 buf
[0] = (val
< 0) ? 255 : 254;
3224 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3225 buf
[0] = strlen((char*)buf
+1);
3228 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3232 /* Save a Redis object. */
3233 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3234 if (o
->type
== REDIS_STRING
) {
3235 /* Save a string value */
3236 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3237 } else if (o
->type
== REDIS_LIST
) {
3238 /* Save a list value */
3239 list
*list
= o
->ptr
;
3243 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3244 listRewind(list
,&li
);
3245 while((ln
= listNext(&li
))) {
3246 robj
*eleobj
= listNodeValue(ln
);
3248 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3250 } else if (o
->type
== REDIS_SET
) {
3251 /* Save a set value */
3253 dictIterator
*di
= dictGetIterator(set
);
3256 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3257 while((de
= dictNext(di
)) != NULL
) {
3258 robj
*eleobj
= dictGetEntryKey(de
);
3260 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3262 dictReleaseIterator(di
);
3263 } else if (o
->type
== REDIS_ZSET
) {
3264 /* Save a set value */
3266 dictIterator
*di
= dictGetIterator(zs
->dict
);
3269 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3270 while((de
= dictNext(di
)) != NULL
) {
3271 robj
*eleobj
= dictGetEntryKey(de
);
3272 double *score
= dictGetEntryVal(de
);
3274 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3275 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3277 dictReleaseIterator(di
);
3278 } else if (o
->type
== REDIS_HASH
) {
3279 /* Save a hash value */
3280 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3281 unsigned char *p
= zipmapRewind(o
->ptr
);
3282 unsigned int count
= zipmapLen(o
->ptr
);
3283 unsigned char *key
, *val
;
3284 unsigned int klen
, vlen
;
3286 if (rdbSaveLen(fp
,count
) == -1) return -1;
3287 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3288 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3289 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3292 dictIterator
*di
= dictGetIterator(o
->ptr
);
3295 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3296 while((de
= dictNext(di
)) != NULL
) {
3297 robj
*key
= dictGetEntryKey(de
);
3298 robj
*val
= dictGetEntryVal(de
);
3300 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3301 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3303 dictReleaseIterator(di
);
3311 /* Return the length the object will have on disk if saved with
3312 * the rdbSaveObject() function. Currently we use a trick to get
3313 * this length with very little changes to the code. In the future
3314 * we could switch to a faster solution. */
3315 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3316 if (fp
== NULL
) fp
= server
.devnull
;
3318 assert(rdbSaveObject(fp
,o
) != 1);
3322 /* Return the number of pages required to save this object in the swap file */
3323 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3324 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3326 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3329 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3330 static int rdbSave(char *filename
) {
3331 dictIterator
*di
= NULL
;
3336 time_t now
= time(NULL
);
3338 /* Wait for I/O therads to terminate, just in case this is a
3339 * foreground-saving, to avoid seeking the swap file descriptor at the
3341 if (server
.vm_enabled
)
3342 waitEmptyIOJobsQueue();
3344 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3345 fp
= fopen(tmpfile
,"w");
3347 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3350 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3351 for (j
= 0; j
< server
.dbnum
; j
++) {
3352 redisDb
*db
= server
.db
+j
;
3354 if (dictSize(d
) == 0) continue;
3355 di
= dictGetIterator(d
);
3361 /* Write the SELECT DB opcode */
3362 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3363 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3365 /* Iterate this DB writing every entry */
3366 while((de
= dictNext(di
)) != NULL
) {
3367 robj
*key
= dictGetEntryKey(de
);
3368 robj
*o
= dictGetEntryVal(de
);
3369 time_t expiretime
= getExpire(db
,key
);
3371 /* Save the expire time */
3372 if (expiretime
!= -1) {
3373 /* If this key is already expired skip it */
3374 if (expiretime
< now
) continue;
3375 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3376 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3378 /* Save the key and associated value. This requires special
3379 * handling if the value is swapped out. */
3380 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
3381 key
->storage
== REDIS_VM_SWAPPING
) {
3382 /* Save type, key, value */
3383 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3384 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3385 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3387 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3389 /* Get a preview of the object in memory */
3390 po
= vmPreviewObject(key
);
3391 /* Save type, key, value */
3392 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
;
3393 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3394 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3395 /* Remove the loaded object from memory */
3399 dictReleaseIterator(di
);
3402 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3404 /* Make sure data will not remain on the OS's output buffers */
3409 /* Use RENAME to make sure the DB file is changed atomically only
3410 * if the generate DB file is ok. */
3411 if (rename(tmpfile
,filename
) == -1) {
3412 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3416 redisLog(REDIS_NOTICE
,"DB saved on disk");
3418 server
.lastsave
= time(NULL
);
3424 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3425 if (di
) dictReleaseIterator(di
);
3429 static int rdbSaveBackground(char *filename
) {
3432 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3433 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3434 if ((childpid
= fork()) == 0) {
3436 if (server
.vm_enabled
) vmReopenSwapFile();
3438 if (rdbSave(filename
) == REDIS_OK
) {
3445 if (childpid
== -1) {
3446 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3450 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3451 server
.bgsavechildpid
= childpid
;
3452 updateDictResizePolicy();
3455 return REDIS_OK
; /* unreached */
3458 static void rdbRemoveTempFile(pid_t childpid
) {
3461 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
3465 static int rdbLoadType(FILE *fp
) {
3467 if (fread(&type
,1,1,fp
) == 0) return -1;
3471 static time_t rdbLoadTime(FILE *fp
) {
3473 if (fread(&t32
,4,1,fp
) == 0) return -1;
3474 return (time_t) t32
;
3477 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3478 * of this file for a description of how this are stored on disk.
3480 * isencoded is set to 1 if the readed length is not actually a length but
3481 * an "encoding type", check the above comments for more info */
3482 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
3483 unsigned char buf
[2];
3487 if (isencoded
) *isencoded
= 0;
3488 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3489 type
= (buf
[0]&0xC0)>>6;
3490 if (type
== REDIS_RDB_6BITLEN
) {
3491 /* Read a 6 bit len */
3493 } else if (type
== REDIS_RDB_ENCVAL
) {
3494 /* Read a 6 bit len encoding type */
3495 if (isencoded
) *isencoded
= 1;
3497 } else if (type
== REDIS_RDB_14BITLEN
) {
3498 /* Read a 14 bit len */
3499 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3500 return ((buf
[0]&0x3F)<<8)|buf
[1];
3502 /* Read a 32 bit len */
3503 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
3508 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
) {
3509 unsigned char enc
[4];
3512 if (enctype
== REDIS_RDB_ENC_INT8
) {
3513 if (fread(enc
,1,1,fp
) == 0) return NULL
;
3514 val
= (signed char)enc
[0];
3515 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
3517 if (fread(enc
,2,1,fp
) == 0) return NULL
;
3518 v
= enc
[0]|(enc
[1]<<8);
3520 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
3522 if (fread(enc
,4,1,fp
) == 0) return NULL
;
3523 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
3526 val
= 0; /* anti-warning */
3529 return createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",val
));
3532 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
3533 unsigned int len
, clen
;
3534 unsigned char *c
= NULL
;
3537 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3538 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3539 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
3540 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
3541 if (fread(c
,clen
,1,fp
) == 0) goto err
;
3542 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
3544 return createObject(REDIS_STRING
,val
);
3551 static robj
*rdbLoadStringObject(FILE*fp
) {
3556 len
= rdbLoadLen(fp
,&isencoded
);
3559 case REDIS_RDB_ENC_INT8
:
3560 case REDIS_RDB_ENC_INT16
:
3561 case REDIS_RDB_ENC_INT32
:
3562 return rdbLoadIntegerObject(fp
,len
);
3563 case REDIS_RDB_ENC_LZF
:
3564 return rdbLoadLzfStringObject(fp
);
3570 if (len
== REDIS_RDB_LENERR
) return NULL
;
3571 val
= sdsnewlen(NULL
,len
);
3572 if (len
&& fread(val
,len
,1,fp
) == 0) {
3576 return createObject(REDIS_STRING
,val
);
3579 /* For information about double serialization check rdbSaveDoubleValue() */
3580 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
3584 if (fread(&len
,1,1,fp
) == 0) return -1;
3586 case 255: *val
= R_NegInf
; return 0;
3587 case 254: *val
= R_PosInf
; return 0;
3588 case 253: *val
= R_Nan
; return 0;
3590 if (fread(buf
,len
,1,fp
) == 0) return -1;
3592 sscanf(buf
, "%lg", val
);
3597 /* Load a Redis object of the specified type from the specified file.
3598 * On success a newly allocated object is returned, otherwise NULL. */
3599 static robj
*rdbLoadObject(int type
, FILE *fp
) {
3602 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
3603 if (type
== REDIS_STRING
) {
3604 /* Read string value */
3605 if ((o
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3606 tryObjectEncoding(o
);
3607 } else if (type
== REDIS_LIST
|| type
== REDIS_SET
) {
3608 /* Read list/set value */
3611 if ((listlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3612 o
= (type
== REDIS_LIST
) ? createListObject() : createSetObject();
3613 /* It's faster to expand the dict to the right size asap in order
3614 * to avoid rehashing */
3615 if (type
== REDIS_SET
&& listlen
> DICT_HT_INITIAL_SIZE
)
3616 dictExpand(o
->ptr
,listlen
);
3617 /* Load every single element of the list/set */
3621 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3622 tryObjectEncoding(ele
);
3623 if (type
== REDIS_LIST
) {
3624 listAddNodeTail((list
*)o
->ptr
,ele
);
3626 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
3629 } else if (type
== REDIS_ZSET
) {
3630 /* Read list/set value */
3634 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3635 o
= createZsetObject();
3637 /* Load every single element of the list/set */
3640 double *score
= zmalloc(sizeof(double));
3642 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3643 tryObjectEncoding(ele
);
3644 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
3645 dictAdd(zs
->dict
,ele
,score
);
3646 zslInsert(zs
->zsl
,*score
,ele
);
3647 incrRefCount(ele
); /* added to skiplist */
3649 } else if (type
== REDIS_HASH
) {
3652 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3653 o
= createHashObject();
3654 /* Too many entries? Use an hash table. */
3655 if (hashlen
> server
.hash_max_zipmap_entries
)
3656 convertToRealHash(o
);
3657 /* Load every key/value, then set it into the zipmap or hash
3658 * table, as needed. */
3662 if ((key
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3663 if ((val
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3664 /* If we are using a zipmap and there are too big values
3665 * the object is converted to real hash table encoding. */
3666 if (o
->encoding
!= REDIS_ENCODING_HT
&&
3667 (sdslen(key
->ptr
) > server
.hash_max_zipmap_value
||
3668 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
))
3670 convertToRealHash(o
);
3673 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3674 unsigned char *zm
= o
->ptr
;
3676 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
3677 val
->ptr
,sdslen(val
->ptr
),NULL
);
3682 tryObjectEncoding(key
);
3683 tryObjectEncoding(val
);
3684 dictAdd((dict
*)o
->ptr
,key
,val
);
3693 static int rdbLoad(char *filename
) {
3695 robj
*keyobj
= NULL
;
3697 int type
, retval
, rdbver
;
3698 dict
*d
= server
.db
[0].dict
;
3699 redisDb
*db
= server
.db
+0;
3701 time_t expiretime
= -1, now
= time(NULL
);
3702 long long loadedkeys
= 0;
3704 fp
= fopen(filename
,"r");
3705 if (!fp
) return REDIS_ERR
;
3706 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
3708 if (memcmp(buf
,"REDIS",5) != 0) {
3710 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
3713 rdbver
= atoi(buf
+5);
3716 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
3723 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3724 if (type
== REDIS_EXPIRETIME
) {
3725 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
3726 /* We read the time so we need to read the object type again */
3727 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3729 if (type
== REDIS_EOF
) break;
3730 /* Handle SELECT DB opcode as a special case */
3731 if (type
== REDIS_SELECTDB
) {
3732 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
3734 if (dbid
>= (unsigned)server
.dbnum
) {
3735 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
3738 db
= server
.db
+dbid
;
3743 if ((keyobj
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
3745 if ((o
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
3746 /* Add the new object in the hash table */
3747 retval
= dictAdd(d
,keyobj
,o
);
3748 if (retval
== DICT_ERR
) {
3749 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj
->ptr
);
3752 /* Set the expire time if needed */
3753 if (expiretime
!= -1) {
3754 setExpire(db
,keyobj
,expiretime
);
3755 /* Delete this key if already expired */
3756 if (expiretime
< now
) deleteKey(db
,keyobj
);
3760 /* Handle swapping while loading big datasets when VM is on */
3762 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
3763 while (zmalloc_used_memory() > server
.vm_max_memory
) {
3764 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
3771 eoferr
: /* unexpected end of file is handled here with a fatal exit */
3772 if (keyobj
) decrRefCount(keyobj
);
3773 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
3775 return REDIS_ERR
; /* Just to avoid warning */
3778 /*================================== Commands =============================== */
3780 static void authCommand(redisClient
*c
) {
3781 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
3782 c
->authenticated
= 1;
3783 addReply(c
,shared
.ok
);
3785 c
->authenticated
= 0;
3786 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
3790 static void pingCommand(redisClient
*c
) {
3791 addReply(c
,shared
.pong
);
3794 static void echoCommand(redisClient
*c
) {
3795 addReplyBulk(c
,c
->argv
[1]);
3798 /*=================================== Strings =============================== */
3800 static void setGenericCommand(redisClient
*c
, int nx
) {
3803 if (nx
) deleteIfVolatile(c
->db
,c
->argv
[1]);
3804 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3805 if (retval
== DICT_ERR
) {
3807 /* If the key is about a swapped value, we want a new key object
3808 * to overwrite the old. So we delete the old key in the database.
3809 * This will also make sure that swap pages about the old object
3810 * will be marked as free. */
3811 if (server
.vm_enabled
&& deleteIfSwapped(c
->db
,c
->argv
[1]))
3812 incrRefCount(c
->argv
[1]);
3813 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3814 incrRefCount(c
->argv
[2]);
3816 addReply(c
,shared
.czero
);
3820 incrRefCount(c
->argv
[1]);
3821 incrRefCount(c
->argv
[2]);
3824 removeExpire(c
->db
,c
->argv
[1]);
3825 addReply(c
, nx
? shared
.cone
: shared
.ok
);
3828 static void setCommand(redisClient
*c
) {
3829 setGenericCommand(c
,0);
3832 static void setnxCommand(redisClient
*c
) {
3833 setGenericCommand(c
,1);
3836 static int getGenericCommand(redisClient
*c
) {
3839 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
3842 if (o
->type
!= REDIS_STRING
) {
3843 addReply(c
,shared
.wrongtypeerr
);
3851 static void getCommand(redisClient
*c
) {
3852 getGenericCommand(c
);
3855 static void getsetCommand(redisClient
*c
) {
3856 if (getGenericCommand(c
) == REDIS_ERR
) return;
3857 if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) {
3858 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3860 incrRefCount(c
->argv
[1]);
3862 incrRefCount(c
->argv
[2]);
3864 removeExpire(c
->db
,c
->argv
[1]);
3867 static void mgetCommand(redisClient
*c
) {
3870 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
3871 for (j
= 1; j
< c
->argc
; j
++) {
3872 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
3874 addReply(c
,shared
.nullbulk
);
3876 if (o
->type
!= REDIS_STRING
) {
3877 addReply(c
,shared
.nullbulk
);
3885 static void msetGenericCommand(redisClient
*c
, int nx
) {
3886 int j
, busykeys
= 0;
3888 if ((c
->argc
% 2) == 0) {
3889 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
3892 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
3893 * set nothing at all if at least one already key exists. */
3895 for (j
= 1; j
< c
->argc
; j
+= 2) {
3896 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
3902 addReply(c
, shared
.czero
);
3906 for (j
= 1; j
< c
->argc
; j
+= 2) {
3909 tryObjectEncoding(c
->argv
[j
+1]);
3910 retval
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
3911 if (retval
== DICT_ERR
) {
3912 dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
3913 incrRefCount(c
->argv
[j
+1]);
3915 incrRefCount(c
->argv
[j
]);
3916 incrRefCount(c
->argv
[j
+1]);
3918 removeExpire(c
->db
,c
->argv
[j
]);
3920 server
.dirty
+= (c
->argc
-1)/2;
3921 addReply(c
, nx
? shared
.cone
: shared
.ok
);
3924 static void msetCommand(redisClient
*c
) {
3925 msetGenericCommand(c
,0);
3928 static void msetnxCommand(redisClient
*c
) {
3929 msetGenericCommand(c
,1);
3932 static void incrDecrCommand(redisClient
*c
, long long incr
) {
3937 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
3941 if (o
->type
!= REDIS_STRING
) {
3946 if (o
->encoding
== REDIS_ENCODING_RAW
)
3947 value
= strtoll(o
->ptr
, &eptr
, 10);
3948 else if (o
->encoding
== REDIS_ENCODING_INT
)
3949 value
= (long)o
->ptr
;
3951 redisAssert(1 != 1);
3956 o
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
3957 tryObjectEncoding(o
);
3958 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],o
);
3959 if (retval
== DICT_ERR
) {
3960 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
3961 removeExpire(c
->db
,c
->argv
[1]);
3963 incrRefCount(c
->argv
[1]);
3966 addReply(c
,shared
.colon
);
3968 addReply(c
,shared
.crlf
);
3971 static void incrCommand(redisClient
*c
) {
3972 incrDecrCommand(c
,1);
3975 static void decrCommand(redisClient
*c
) {
3976 incrDecrCommand(c
,-1);
3979 static void incrbyCommand(redisClient
*c
) {
3980 long long incr
= strtoll(c
->argv
[2]->ptr
, NULL
, 10);
3981 incrDecrCommand(c
,incr
);
3984 static void decrbyCommand(redisClient
*c
) {
3985 long long incr
= strtoll(c
->argv
[2]->ptr
, NULL
, 10);
3986 incrDecrCommand(c
,-incr
);
3989 static void appendCommand(redisClient
*c
) {
3994 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
3996 /* Create the key */
3997 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3998 incrRefCount(c
->argv
[1]);
3999 incrRefCount(c
->argv
[2]);
4000 totlen
= stringObjectLen(c
->argv
[2]);
4004 de
= dictFind(c
->db
->dict
,c
->argv
[1]);
4007 o
= dictGetEntryVal(de
);
4008 if (o
->type
!= REDIS_STRING
) {
4009 addReply(c
,shared
.wrongtypeerr
);
4012 /* If the object is specially encoded or shared we have to make
4014 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
4015 robj
*decoded
= getDecodedObject(o
);
4017 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
4018 decrRefCount(decoded
);
4019 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4022 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
4023 o
->ptr
= sdscatlen(o
->ptr
,
4024 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
4026 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
4027 (unsigned long) c
->argv
[2]->ptr
);
4029 totlen
= sdslen(o
->ptr
);
4032 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
4035 static void substrCommand(redisClient
*c
) {
4037 long start
= atoi(c
->argv
[2]->ptr
);
4038 long end
= atoi(c
->argv
[3]->ptr
);
4039 size_t rangelen
, strlen
;
4042 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4043 checkType(c
,o
,REDIS_STRING
)) return;
4045 o
= getDecodedObject(o
);
4046 strlen
= sdslen(o
->ptr
);
4048 /* convert negative indexes */
4049 if (start
< 0) start
= strlen
+start
;
4050 if (end
< 0) end
= strlen
+end
;
4051 if (start
< 0) start
= 0;
4052 if (end
< 0) end
= 0;
4054 /* indexes sanity checks */
4055 if (start
> end
|| (size_t)start
>= strlen
) {
4056 /* Out of range start or start > end result in null reply */
4057 addReply(c
,shared
.nullbulk
);
4061 if ((size_t)end
>= strlen
) end
= strlen
-1;
4062 rangelen
= (end
-start
)+1;
4064 /* Return the result */
4065 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4066 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4067 addReplySds(c
,range
);
4068 addReply(c
,shared
.crlf
);
4072 /* ========================= Type agnostic commands ========================= */
4074 static void delCommand(redisClient
*c
) {
4077 for (j
= 1; j
< c
->argc
; j
++) {
4078 if (deleteKey(c
->db
,c
->argv
[j
])) {
4083 addReplyLong(c
,deleted
);
4086 static void existsCommand(redisClient
*c
) {
4087 addReply(c
,lookupKeyRead(c
->db
,c
->argv
[1]) ? shared
.cone
: shared
.czero
);
4090 static void selectCommand(redisClient
*c
) {
4091 int id
= atoi(c
->argv
[1]->ptr
);
4093 if (selectDb(c
,id
) == REDIS_ERR
) {
4094 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4096 addReply(c
,shared
.ok
);
4100 static void randomkeyCommand(redisClient
*c
) {
4104 de
= dictGetRandomKey(c
->db
->dict
);
4105 if (!de
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break;
4108 addReply(c
,shared
.plus
);
4109 addReply(c
,shared
.crlf
);
4111 addReply(c
,shared
.plus
);
4112 addReply(c
,dictGetEntryKey(de
));
4113 addReply(c
,shared
.crlf
);
4117 static void keysCommand(redisClient
*c
) {
4120 sds pattern
= c
->argv
[1]->ptr
;
4121 int plen
= sdslen(pattern
);
4122 unsigned long numkeys
= 0;
4123 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4125 di
= dictGetIterator(c
->db
->dict
);
4127 decrRefCount(lenobj
);
4128 while((de
= dictNext(di
)) != NULL
) {
4129 robj
*keyobj
= dictGetEntryKey(de
);
4131 sds key
= keyobj
->ptr
;
4132 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4133 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4134 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4135 addReplyBulk(c
,keyobj
);
4140 dictReleaseIterator(di
);
4141 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4144 static void dbsizeCommand(redisClient
*c
) {
4146 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4149 static void lastsaveCommand(redisClient
*c
) {
4151 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4154 static void typeCommand(redisClient
*c
) {
4158 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4163 case REDIS_STRING
: type
= "+string"; break;
4164 case REDIS_LIST
: type
= "+list"; break;
4165 case REDIS_SET
: type
= "+set"; break;
4166 case REDIS_ZSET
: type
= "+zset"; break;
4167 case REDIS_HASH
: type
= "+hash"; break;
4168 default: type
= "+unknown"; break;
4171 addReplySds(c
,sdsnew(type
));
4172 addReply(c
,shared
.crlf
);
4175 static void saveCommand(redisClient
*c
) {
4176 if (server
.bgsavechildpid
!= -1) {
4177 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4180 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4181 addReply(c
,shared
.ok
);
4183 addReply(c
,shared
.err
);
4187 static void bgsaveCommand(redisClient
*c
) {
4188 if (server
.bgsavechildpid
!= -1) {
4189 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4192 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4193 char *status
= "+Background saving started\r\n";
4194 addReplySds(c
,sdsnew(status
));
4196 addReply(c
,shared
.err
);
4200 static void shutdownCommand(redisClient
*c
) {
4201 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4202 /* Kill the saving child if there is a background saving in progress.
4203 We want to avoid race conditions, for instance our saving child may
4204 overwrite the synchronous saving did by SHUTDOWN. */
4205 if (server
.bgsavechildpid
!= -1) {
4206 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4207 kill(server
.bgsavechildpid
,SIGKILL
);
4208 rdbRemoveTempFile(server
.bgsavechildpid
);
4210 if (server
.appendonly
) {
4211 /* Append only file: fsync() the AOF and exit */
4212 fsync(server
.appendfd
);
4213 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4216 /* Snapshotting. Perform a SYNC SAVE and exit */
4217 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4218 if (server
.daemonize
)
4219 unlink(server
.pidfile
);
4220 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4221 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4222 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4225 /* Ooops.. error saving! The best we can do is to continue
4226 * operating. Note that if there was a background saving process,
4227 * in the next cron() Redis will be notified that the background
4228 * saving aborted, handling special stuff like slaves pending for
4229 * synchronization... */
4230 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4232 sdsnew("-ERR can't quit, problems saving the DB\r\n"));
4237 static void renameGenericCommand(redisClient
*c
, int nx
) {
4240 /* To use the same key as src and dst is probably an error */
4241 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4242 addReply(c
,shared
.sameobjecterr
);
4246 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4250 deleteIfVolatile(c
->db
,c
->argv
[2]);
4251 if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) {
4254 addReply(c
,shared
.czero
);
4257 dictReplace(c
->db
->dict
,c
->argv
[2],o
);
4259 incrRefCount(c
->argv
[2]);
4261 deleteKey(c
->db
,c
->argv
[1]);
4263 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4266 static void renameCommand(redisClient
*c
) {
4267 renameGenericCommand(c
,0);
4270 static void renamenxCommand(redisClient
*c
) {
4271 renameGenericCommand(c
,1);
4274 static void moveCommand(redisClient
*c
) {
4279 /* Obtain source and target DB pointers */
4282 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4283 addReply(c
,shared
.outofrangeerr
);
4287 selectDb(c
,srcid
); /* Back to the source DB */
4289 /* If the user is moving using as target the same
4290 * DB as the source DB it is probably an error. */
4292 addReply(c
,shared
.sameobjecterr
);
4296 /* Check if the element exists and get a reference */
4297 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4299 addReply(c
,shared
.czero
);
4303 /* Try to add the element to the target DB */
4304 deleteIfVolatile(dst
,c
->argv
[1]);
4305 if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) {
4306 addReply(c
,shared
.czero
);
4309 incrRefCount(c
->argv
[1]);
4312 /* OK! key moved, free the entry in the source DB */
4313 deleteKey(src
,c
->argv
[1]);
4315 addReply(c
,shared
.cone
);
4318 /* =================================== Lists ================================ */
4319 static void pushGenericCommand(redisClient
*c
, int where
) {
4323 lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4325 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4326 addReply(c
,shared
.cone
);
4329 lobj
= createListObject();
4331 if (where
== REDIS_HEAD
) {
4332 listAddNodeHead(list
,c
->argv
[2]);
4334 listAddNodeTail(list
,c
->argv
[2]);
4336 dictAdd(c
->db
->dict
,c
->argv
[1],lobj
);
4337 incrRefCount(c
->argv
[1]);
4338 incrRefCount(c
->argv
[2]);
4340 if (lobj
->type
!= REDIS_LIST
) {
4341 addReply(c
,shared
.wrongtypeerr
);
4344 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4345 addReply(c
,shared
.cone
);
4349 if (where
== REDIS_HEAD
) {
4350 listAddNodeHead(list
,c
->argv
[2]);
4352 listAddNodeTail(list
,c
->argv
[2]);
4354 incrRefCount(c
->argv
[2]);
4357 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",listLength(list
)));
4360 static void lpushCommand(redisClient
*c
) {
4361 pushGenericCommand(c
,REDIS_HEAD
);
4364 static void rpushCommand(redisClient
*c
) {
4365 pushGenericCommand(c
,REDIS_TAIL
);
4368 static void llenCommand(redisClient
*c
) {
4372 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4373 checkType(c
,o
,REDIS_LIST
)) return;
4376 addReplyUlong(c
,listLength(l
));
4379 static void lindexCommand(redisClient
*c
) {
4381 int index
= atoi(c
->argv
[2]->ptr
);
4385 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4386 checkType(c
,o
,REDIS_LIST
)) return;
4389 ln
= listIndex(list
, index
);
4391 addReply(c
,shared
.nullbulk
);
4393 robj
*ele
= listNodeValue(ln
);
4394 addReplyBulk(c
,ele
);
4398 static void lsetCommand(redisClient
*c
) {
4400 int index
= atoi(c
->argv
[2]->ptr
);
4404 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
||
4405 checkType(c
,o
,REDIS_LIST
)) return;
4408 ln
= listIndex(list
, index
);
4410 addReply(c
,shared
.outofrangeerr
);
4412 robj
*ele
= listNodeValue(ln
);
4415 listNodeValue(ln
) = c
->argv
[3];
4416 incrRefCount(c
->argv
[3]);
4417 addReply(c
,shared
.ok
);
4422 static void popGenericCommand(redisClient
*c
, int where
) {
4427 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4428 checkType(c
,o
,REDIS_LIST
)) return;
4431 if (where
== REDIS_HEAD
)
4432 ln
= listFirst(list
);
4434 ln
= listLast(list
);
4437 addReply(c
,shared
.nullbulk
);
4439 robj
*ele
= listNodeValue(ln
);
4440 addReplyBulk(c
,ele
);
4441 listDelNode(list
,ln
);
4442 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4447 static void lpopCommand(redisClient
*c
) {
4448 popGenericCommand(c
,REDIS_HEAD
);
4451 static void rpopCommand(redisClient
*c
) {
4452 popGenericCommand(c
,REDIS_TAIL
);
4455 static void lrangeCommand(redisClient
*c
) {
4457 int start
= atoi(c
->argv
[2]->ptr
);
4458 int end
= atoi(c
->argv
[3]->ptr
);
4465 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL
||
4466 checkType(c
,o
,REDIS_LIST
)) return;
4468 llen
= listLength(list
);
4470 /* convert negative indexes */
4471 if (start
< 0) start
= llen
+start
;
4472 if (end
< 0) end
= llen
+end
;
4473 if (start
< 0) start
= 0;
4474 if (end
< 0) end
= 0;
4476 /* indexes sanity checks */
4477 if (start
> end
|| start
>= llen
) {
4478 /* Out of range start or start > end result in empty list */
4479 addReply(c
,shared
.emptymultibulk
);
4482 if (end
>= llen
) end
= llen
-1;
4483 rangelen
= (end
-start
)+1;
4485 /* Return the result in form of a multi-bulk reply */
4486 ln
= listIndex(list
, start
);
4487 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
4488 for (j
= 0; j
< rangelen
; j
++) {
4489 ele
= listNodeValue(ln
);
4490 addReplyBulk(c
,ele
);
4495 static void ltrimCommand(redisClient
*c
) {
4497 int start
= atoi(c
->argv
[2]->ptr
);
4498 int end
= atoi(c
->argv
[3]->ptr
);
4500 int j
, ltrim
, rtrim
;
4504 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
4505 checkType(c
,o
,REDIS_LIST
)) return;
4507 llen
= listLength(list
);
4509 /* convert negative indexes */
4510 if (start
< 0) start
= llen
+start
;
4511 if (end
< 0) end
= llen
+end
;
4512 if (start
< 0) start
= 0;
4513 if (end
< 0) end
= 0;
4515 /* indexes sanity checks */
4516 if (start
> end
|| start
>= llen
) {
4517 /* Out of range start or start > end result in empty list */
4521 if (end
>= llen
) end
= llen
-1;
4526 /* Remove list elements to perform the trim */
4527 for (j
= 0; j
< ltrim
; j
++) {
4528 ln
= listFirst(list
);
4529 listDelNode(list
,ln
);
4531 for (j
= 0; j
< rtrim
; j
++) {
4532 ln
= listLast(list
);
4533 listDelNode(list
,ln
);
4535 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4537 addReply(c
,shared
.ok
);
4540 static void lremCommand(redisClient
*c
) {
4543 listNode
*ln
, *next
;
4544 int toremove
= atoi(c
->argv
[2]->ptr
);
4548 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4549 checkType(c
,o
,REDIS_LIST
)) return;
4553 toremove
= -toremove
;
4556 ln
= fromtail
? list
->tail
: list
->head
;
4558 robj
*ele
= listNodeValue(ln
);
4560 next
= fromtail
? ln
->prev
: ln
->next
;
4561 if (compareStringObjects(ele
,c
->argv
[3]) == 0) {
4562 listDelNode(list
,ln
);
4565 if (toremove
&& removed
== toremove
) break;
4569 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4570 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
4573 /* This is the semantic of this command:
4574 * RPOPLPUSH srclist dstlist:
4575 * IF LLEN(srclist) > 0
4576 * element = RPOP srclist
4577 * LPUSH dstlist element
4584 * The idea is to be able to get an element from a list in a reliable way
4585 * since the element is not just returned but pushed against another list
4586 * as well. This command was originally proposed by Ezra Zygmuntowicz.
4588 static void rpoplpushcommand(redisClient
*c
) {
4593 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4594 checkType(c
,sobj
,REDIS_LIST
)) return;
4595 srclist
= sobj
->ptr
;
4596 ln
= listLast(srclist
);
4599 addReply(c
,shared
.nullbulk
);
4601 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4602 robj
*ele
= listNodeValue(ln
);
4605 if (dobj
&& dobj
->type
!= REDIS_LIST
) {
4606 addReply(c
,shared
.wrongtypeerr
);
4610 /* Add the element to the target list (unless it's directly
4611 * passed to some BLPOP-ing client */
4612 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) {
4614 /* Create the list if the key does not exist */
4615 dobj
= createListObject();
4616 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
);
4617 incrRefCount(c
->argv
[2]);
4619 dstlist
= dobj
->ptr
;
4620 listAddNodeHead(dstlist
,ele
);
4624 /* Send the element to the client as reply as well */
4625 addReplyBulk(c
,ele
);
4627 /* Finally remove the element from the source list */
4628 listDelNode(srclist
,ln
);
4629 if (listLength(srclist
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4634 /* ==================================== Sets ================================ */
4636 static void saddCommand(redisClient
*c
) {
4639 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4641 set
= createSetObject();
4642 dictAdd(c
->db
->dict
,c
->argv
[1],set
);
4643 incrRefCount(c
->argv
[1]);
4645 if (set
->type
!= REDIS_SET
) {
4646 addReply(c
,shared
.wrongtypeerr
);
4650 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
4651 incrRefCount(c
->argv
[2]);
4653 addReply(c
,shared
.cone
);
4655 addReply(c
,shared
.czero
);
4659 static void sremCommand(redisClient
*c
) {
4662 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4663 checkType(c
,set
,REDIS_SET
)) return;
4665 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
4667 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4668 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4669 addReply(c
,shared
.cone
);
4671 addReply(c
,shared
.czero
);
4675 static void smoveCommand(redisClient
*c
) {
4676 robj
*srcset
, *dstset
;
4678 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4679 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4681 /* If the source key does not exist return 0, if it's of the wrong type
4683 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
4684 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
4687 /* Error if the destination key is not a set as well */
4688 if (dstset
&& dstset
->type
!= REDIS_SET
) {
4689 addReply(c
,shared
.wrongtypeerr
);
4692 /* Remove the element from the source set */
4693 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
4694 /* Key not found in the src set! return zero */
4695 addReply(c
,shared
.czero
);
4698 if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset
!= dstset
)
4699 deleteKey(c
->db
,c
->argv
[1]);
4701 /* Add the element to the destination set */
4703 dstset
= createSetObject();
4704 dictAdd(c
->db
->dict
,c
->argv
[2],dstset
);
4705 incrRefCount(c
->argv
[2]);
4707 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
4708 incrRefCount(c
->argv
[3]);
4709 addReply(c
,shared
.cone
);
4712 static void sismemberCommand(redisClient
*c
) {
4715 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4716 checkType(c
,set
,REDIS_SET
)) return;
4718 if (dictFind(set
->ptr
,c
->argv
[2]))
4719 addReply(c
,shared
.cone
);
4721 addReply(c
,shared
.czero
);
4724 static void scardCommand(redisClient
*c
) {
4728 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4729 checkType(c
,o
,REDIS_SET
)) return;
4732 addReplyUlong(c
,dictSize(s
));
4735 static void spopCommand(redisClient
*c
) {
4739 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4740 checkType(c
,set
,REDIS_SET
)) return;
4742 de
= dictGetRandomKey(set
->ptr
);
4744 addReply(c
,shared
.nullbulk
);
4746 robj
*ele
= dictGetEntryKey(de
);
4748 addReplyBulk(c
,ele
);
4749 dictDelete(set
->ptr
,ele
);
4750 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4751 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4756 static void srandmemberCommand(redisClient
*c
) {
4760 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4761 checkType(c
,set
,REDIS_SET
)) return;
4763 de
= dictGetRandomKey(set
->ptr
);
4765 addReply(c
,shared
.nullbulk
);
4767 robj
*ele
= dictGetEntryKey(de
);
4769 addReplyBulk(c
,ele
);
4773 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
4774 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
4776 return dictSize(*d1
)-dictSize(*d2
);
4779 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
4780 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4783 robj
*lenobj
= NULL
, *dstset
= NULL
;
4784 unsigned long j
, cardinality
= 0;
4786 for (j
= 0; j
< setsnum
; j
++) {
4790 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4791 lookupKeyRead(c
->db
,setskeys
[j
]);
4795 if (deleteKey(c
->db
,dstkey
))
4797 addReply(c
,shared
.czero
);
4799 addReply(c
,shared
.nullmultibulk
);
4803 if (setobj
->type
!= REDIS_SET
) {
4805 addReply(c
,shared
.wrongtypeerr
);
4808 dv
[j
] = setobj
->ptr
;
4810 /* Sort sets from the smallest to largest, this will improve our
4811 * algorithm's performace */
4812 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
4814 /* The first thing we should output is the total number of elements...
4815 * since this is a multi-bulk write, but at this stage we don't know
4816 * the intersection set size, so we use a trick, append an empty object
4817 * to the output list and save the pointer to later modify it with the
4820 lenobj
= createObject(REDIS_STRING
,NULL
);
4822 decrRefCount(lenobj
);
4824 /* If we have a target key where to store the resulting set
4825 * create this key with an empty set inside */
4826 dstset
= createSetObject();
4829 /* Iterate all the elements of the first (smallest) set, and test
4830 * the element against all the other sets, if at least one set does
4831 * not include the element it is discarded */
4832 di
= dictGetIterator(dv
[0]);
4834 while((de
= dictNext(di
)) != NULL
) {
4837 for (j
= 1; j
< setsnum
; j
++)
4838 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
4840 continue; /* at least one set does not contain the member */
4841 ele
= dictGetEntryKey(de
);
4843 addReplyBulk(c
,ele
);
4846 dictAdd(dstset
->ptr
,ele
,NULL
);
4850 dictReleaseIterator(di
);
4853 /* Store the resulting set into the target, if the intersection
4854 * is not an empty set. */
4855 deleteKey(c
->db
,dstkey
);
4856 if (dictSize((dict
*)dstset
->ptr
) > 0) {
4857 dictAdd(c
->db
->dict
,dstkey
,dstset
);
4858 incrRefCount(dstkey
);
4859 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
4861 decrRefCount(dstset
);
4862 addReply(c
,shared
.czero
);
4866 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
4871 static void sinterCommand(redisClient
*c
) {
4872 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
4875 static void sinterstoreCommand(redisClient
*c
) {
4876 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
4879 #define REDIS_OP_UNION 0
4880 #define REDIS_OP_DIFF 1
4881 #define REDIS_OP_INTER 2
4883 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
4884 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4887 robj
*dstset
= NULL
;
4888 int j
, cardinality
= 0;
4890 for (j
= 0; j
< setsnum
; j
++) {
4894 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4895 lookupKeyRead(c
->db
,setskeys
[j
]);
4900 if (setobj
->type
!= REDIS_SET
) {
4902 addReply(c
,shared
.wrongtypeerr
);
4905 dv
[j
] = setobj
->ptr
;
4908 /* We need a temp set object to store our union. If the dstkey
4909 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
4910 * this set object will be the resulting object to set into the target key*/
4911 dstset
= createSetObject();
4913 /* Iterate all the elements of all the sets, add every element a single
4914 * time to the result set */
4915 for (j
= 0; j
< setsnum
; j
++) {
4916 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
4917 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
4919 di
= dictGetIterator(dv
[j
]);
4921 while((de
= dictNext(di
)) != NULL
) {
4924 /* dictAdd will not add the same element multiple times */
4925 ele
= dictGetEntryKey(de
);
4926 if (op
== REDIS_OP_UNION
|| j
== 0) {
4927 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
4931 } else if (op
== REDIS_OP_DIFF
) {
4932 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
4937 dictReleaseIterator(di
);
4939 /* result set is empty? Exit asap. */
4940 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
4943 /* Output the content of the resulting set, if not in STORE mode */
4945 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
4946 di
= dictGetIterator(dstset
->ptr
);
4947 while((de
= dictNext(di
)) != NULL
) {
4950 ele
= dictGetEntryKey(de
);
4951 addReplyBulk(c
,ele
);
4953 dictReleaseIterator(di
);
4954 decrRefCount(dstset
);
4956 /* If we have a target key where to store the resulting set
4957 * create this key with the result set inside */
4958 deleteKey(c
->db
,dstkey
);
4959 if (dictSize((dict
*)dstset
->ptr
) > 0) {
4960 dictAdd(c
->db
->dict
,dstkey
,dstset
);
4961 incrRefCount(dstkey
);
4962 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
4964 decrRefCount(dstset
);
4965 addReply(c
,shared
.czero
);
4972 static void sunionCommand(redisClient
*c
) {
4973 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
4976 static void sunionstoreCommand(redisClient
*c
) {
4977 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
4980 static void sdiffCommand(redisClient
*c
) {
4981 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
4984 static void sdiffstoreCommand(redisClient
*c
) {
4985 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
4988 /* ==================================== ZSets =============================== */
4990 /* ZSETs are ordered sets using two data structures to hold the same elements
4991 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
4994 * The elements are added to an hash table mapping Redis objects to scores.
4995 * At the same time the elements are added to a skip list mapping scores
4996 * to Redis objects (so objects are sorted by scores in this "view"). */
4998 /* This skiplist implementation is almost a C translation of the original
4999 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
5000 * Alternative to Balanced Trees", modified in three ways:
5001 * a) this implementation allows for repeated values.
5002 * b) the comparison is not just by key (our 'score') but by satellite data.
5003 * c) there is a back pointer, so it's a doubly linked list with the back
5004 * pointers being only at "level 1". This allows to traverse the list
5005 * from tail to head, useful for ZREVRANGE. */
5007 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
5008 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
5010 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
5012 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
5018 static zskiplist
*zslCreate(void) {
5022 zsl
= zmalloc(sizeof(*zsl
));
5025 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
5026 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
5027 zsl
->header
->forward
[j
] = NULL
;
5029 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
5030 if (j
< ZSKIPLIST_MAXLEVEL
-1)
5031 zsl
->header
->span
[j
] = 0;
5033 zsl
->header
->backward
= NULL
;
5038 static void zslFreeNode(zskiplistNode
*node
) {
5039 decrRefCount(node
->obj
);
5040 zfree(node
->forward
);
5045 static void zslFree(zskiplist
*zsl
) {
5046 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
5048 zfree(zsl
->header
->forward
);
5049 zfree(zsl
->header
->span
);
5052 next
= node
->forward
[0];
5059 static int zslRandomLevel(void) {
5061 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
5063 return (level
<ZSKIPLIST_MAXLEVEL
) ? level
: ZSKIPLIST_MAXLEVEL
;
5066 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
5067 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5068 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
5072 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5073 /* store rank that is crossed to reach the insert position */
5074 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
5076 while (x
->forward
[i
] &&
5077 (x
->forward
[i
]->score
< score
||
5078 (x
->forward
[i
]->score
== score
&&
5079 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
5080 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
5085 /* we assume the key is not already inside, since we allow duplicated
5086 * scores, and the re-insertion of score and redis object should never
5087 * happpen since the caller of zslInsert() should test in the hash table
5088 * if the element is already inside or not. */
5089 level
= zslRandomLevel();
5090 if (level
> zsl
->level
) {
5091 for (i
= zsl
->level
; i
< level
; i
++) {
5093 update
[i
] = zsl
->header
;
5094 update
[i
]->span
[i
-1] = zsl
->length
;
5098 x
= zslCreateNode(level
,score
,obj
);
5099 for (i
= 0; i
< level
; i
++) {
5100 x
->forward
[i
] = update
[i
]->forward
[i
];
5101 update
[i
]->forward
[i
] = x
;
5103 /* update span covered by update[i] as x is inserted here */
5105 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5106 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5110 /* increment span for untouched levels */
5111 for (i
= level
; i
< zsl
->level
; i
++) {
5112 update
[i
]->span
[i
-1]++;
5115 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
5117 x
->forward
[0]->backward
= x
;
5123 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
5124 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
5126 for (i
= 0; i
< zsl
->level
; i
++) {
5127 if (update
[i
]->forward
[i
] == x
) {
5129 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5131 update
[i
]->forward
[i
] = x
->forward
[i
];
5133 /* invariant: i > 0, because update[0]->forward[0]
5134 * is always equal to x */
5135 update
[i
]->span
[i
-1] -= 1;
5138 if (x
->forward
[0]) {
5139 x
->forward
[0]->backward
= x
->backward
;
5141 zsl
->tail
= x
->backward
;
5143 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5148 /* Delete an element with matching score/object from the skiplist. */
5149 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
5150 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5154 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5155 while (x
->forward
[i
] &&
5156 (x
->forward
[i
]->score
< score
||
5157 (x
->forward
[i
]->score
== score
&&
5158 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
5162 /* We may have multiple elements with the same score, what we need
5163 * is to find the element with both the right score and object. */
5165 if (x
&& score
== x
->score
&& compareStringObjects(x
->obj
,obj
) == 0) {
5166 zslDeleteNode(zsl
, x
, update
);
5170 return 0; /* not found */
5172 return 0; /* not found */
5175 /* Delete all the elements with score between min and max from the skiplist.
5176 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5177 * Note that this function takes the reference to the hash table view of the
5178 * sorted set, in order to remove the elements from the hash table too. */
5179 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
5180 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5181 unsigned long removed
= 0;
5185 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5186 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
5190 /* We may have multiple elements with the same score, what we need
5191 * is to find the element with both the right score and object. */
5193 while (x
&& x
->score
<= max
) {
5194 zskiplistNode
*next
= x
->forward
[0];
5195 zslDeleteNode(zsl
, x
, update
);
5196 dictDelete(dict
,x
->obj
);
5201 return removed
; /* not found */
5204 /* Delete all the elements with rank between start and end from the skiplist.
5205 * Start and end are inclusive. Note that start and end need to be 1-based */
5206 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
5207 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5208 unsigned long traversed
= 0, removed
= 0;
5212 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5213 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
5214 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5222 while (x
&& traversed
<= end
) {
5223 zskiplistNode
*next
= x
->forward
[0];
5224 zslDeleteNode(zsl
, x
, update
);
5225 dictDelete(dict
,x
->obj
);
5234 /* Find the first node having a score equal or greater than the specified one.
5235 * Returns NULL if there is no match. */
5236 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
5241 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5242 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
5245 /* We may have multiple elements with the same score, what we need
5246 * is to find the element with both the right score and object. */
5247 return x
->forward
[0];
5250 /* Find the rank for an element by both score and key.
5251 * Returns 0 when the element cannot be found, rank otherwise.
5252 * Note that the rank is 1-based due to the span of zsl->header to the
5254 static unsigned long zslGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
5256 unsigned long rank
= 0;
5260 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5261 while (x
->forward
[i
] &&
5262 (x
->forward
[i
]->score
< score
||
5263 (x
->forward
[i
]->score
== score
&&
5264 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
5265 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
5269 /* x might be equal to zsl->header, so test if obj is non-NULL */
5270 if (x
->obj
&& compareStringObjects(x
->obj
,o
) == 0) {
5277 /* Finds an element by its rank. The rank argument needs to be 1-based. */
5278 zskiplistNode
* zslGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
5280 unsigned long traversed
= 0;
5284 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5285 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
5287 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5290 if (traversed
== rank
) {
5297 /* The actual Z-commands implementations */
5299 /* This generic command implements both ZADD and ZINCRBY.
5300 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
5301 * the increment if the operation is a ZINCRBY (doincrement == 1). */
5302 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
5307 zsetobj
= lookupKeyWrite(c
->db
,key
);
5308 if (zsetobj
== NULL
) {
5309 zsetobj
= createZsetObject();
5310 dictAdd(c
->db
->dict
,key
,zsetobj
);
5313 if (zsetobj
->type
!= REDIS_ZSET
) {
5314 addReply(c
,shared
.wrongtypeerr
);
5320 /* Ok now since we implement both ZADD and ZINCRBY here the code
5321 * needs to handle the two different conditions. It's all about setting
5322 * '*score', that is, the new score to set, to the right value. */
5323 score
= zmalloc(sizeof(double));
5327 /* Read the old score. If the element was not present starts from 0 */
5328 de
= dictFind(zs
->dict
,ele
);
5330 double *oldscore
= dictGetEntryVal(de
);
5331 *score
= *oldscore
+ scoreval
;
5339 /* What follows is a simple remove and re-insert operation that is common
5340 * to both ZADD and ZINCRBY... */
5341 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
5342 /* case 1: New element */
5343 incrRefCount(ele
); /* added to hash */
5344 zslInsert(zs
->zsl
,*score
,ele
);
5345 incrRefCount(ele
); /* added to skiplist */
5348 addReplyDouble(c
,*score
);
5350 addReply(c
,shared
.cone
);
5355 /* case 2: Score update operation */
5356 de
= dictFind(zs
->dict
,ele
);
5357 redisAssert(de
!= NULL
);
5358 oldscore
= dictGetEntryVal(de
);
5359 if (*score
!= *oldscore
) {
5362 /* Remove and insert the element in the skip list with new score */
5363 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
5364 redisAssert(deleted
!= 0);
5365 zslInsert(zs
->zsl
,*score
,ele
);
5367 /* Update the score in the hash table */
5368 dictReplace(zs
->dict
,ele
,score
);
5374 addReplyDouble(c
,*score
);
5376 addReply(c
,shared
.czero
);
5380 static void zaddCommand(redisClient
*c
) {
5383 scoreval
= strtod(c
->argv
[2]->ptr
,NULL
);
5384 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
5387 static void zincrbyCommand(redisClient
*c
) {
5390 scoreval
= strtod(c
->argv
[2]->ptr
,NULL
);
5391 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
5394 static void zremCommand(redisClient
*c
) {
5401 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5402 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5405 de
= dictFind(zs
->dict
,c
->argv
[2]);
5407 addReply(c
,shared
.czero
);
5410 /* Delete from the skiplist */
5411 oldscore
= dictGetEntryVal(de
);
5412 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
5413 redisAssert(deleted
!= 0);
5415 /* Delete from the hash table */
5416 dictDelete(zs
->dict
,c
->argv
[2]);
5417 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5418 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5420 addReply(c
,shared
.cone
);
5423 static void zremrangebyscoreCommand(redisClient
*c
) {
5424 double min
= strtod(c
->argv
[2]->ptr
,NULL
);
5425 double max
= strtod(c
->argv
[3]->ptr
,NULL
);
5430 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5431 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5434 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
5435 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5436 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5437 server
.dirty
+= deleted
;
5438 addReplyLong(c
,deleted
);
5441 static void zremrangebyrankCommand(redisClient
*c
) {
5442 int start
= atoi(c
->argv
[2]->ptr
);
5443 int end
= atoi(c
->argv
[3]->ptr
);
5449 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5450 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5452 llen
= zs
->zsl
->length
;
5454 /* convert negative indexes */
5455 if (start
< 0) start
= llen
+start
;
5456 if (end
< 0) end
= llen
+end
;
5457 if (start
< 0) start
= 0;
5458 if (end
< 0) end
= 0;
5460 /* indexes sanity checks */
5461 if (start
> end
|| start
>= llen
) {
5462 addReply(c
,shared
.czero
);
5465 if (end
>= llen
) end
= llen
-1;
5467 /* increment start and end because zsl*Rank functions
5468 * use 1-based rank */
5469 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
5470 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5471 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5472 server
.dirty
+= deleted
;
5473 addReplyLong(c
, deleted
);
5481 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
5482 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
5483 unsigned long size1
, size2
;
5484 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
5485 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
5486 return size1
- size2
;
5489 #define REDIS_AGGR_SUM 1
5490 #define REDIS_AGGR_MIN 2
5491 #define REDIS_AGGR_MAX 3
5493 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
5494 if (aggregate
== REDIS_AGGR_SUM
) {
5495 *target
= *target
+ val
;
5496 } else if (aggregate
== REDIS_AGGR_MIN
) {
5497 *target
= val
< *target
? val
: *target
;
5498 } else if (aggregate
== REDIS_AGGR_MAX
) {
5499 *target
= val
> *target
? val
: *target
;
5502 redisAssert(0 != 0);
5506 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
5508 int aggregate
= REDIS_AGGR_SUM
;
5515 /* expect zsetnum input keys to be given */
5516 zsetnum
= atoi(c
->argv
[2]->ptr
);
5518 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNION/ZINTER\r\n"));
5522 /* test if the expected number of keys would overflow */
5523 if (3+zsetnum
> c
->argc
) {
5524 addReply(c
,shared
.syntaxerr
);
5528 /* read keys to be used for input */
5529 src
= zmalloc(sizeof(zsetopsrc
) * zsetnum
);
5530 for (i
= 0, j
= 3; i
< zsetnum
; i
++, j
++) {
5531 robj
*zsetobj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
5535 if (zsetobj
->type
!= REDIS_ZSET
) {
5537 addReply(c
,shared
.wrongtypeerr
);
5540 src
[i
].dict
= ((zset
*)zsetobj
->ptr
)->dict
;
5543 /* default all weights to 1 */
5544 src
[i
].weight
= 1.0;
5547 /* parse optional extra arguments */
5549 int remaining
= c
->argc
- j
;
5552 if (remaining
>= (zsetnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
5554 for (i
= 0; i
< zsetnum
; i
++, j
++, remaining
--) {
5555 src
[i
].weight
= strtod(c
->argv
[j
]->ptr
, NULL
);
5557 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
5559 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
5560 aggregate
= REDIS_AGGR_SUM
;
5561 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
5562 aggregate
= REDIS_AGGR_MIN
;
5563 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
5564 aggregate
= REDIS_AGGR_MAX
;
5567 addReply(c
,shared
.syntaxerr
);
5573 addReply(c
,shared
.syntaxerr
);
5579 /* sort sets from the smallest to largest, this will improve our
5580 * algorithm's performance */
5581 qsort(src
,zsetnum
,sizeof(zsetopsrc
), qsortCompareZsetopsrcByCardinality
);
5583 dstobj
= createZsetObject();
5584 dstzset
= dstobj
->ptr
;
5586 if (op
== REDIS_OP_INTER
) {
5587 /* skip going over all entries if the smallest zset is NULL or empty */
5588 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
5589 /* precondition: as src[0].dict is non-empty and the zsets are ordered
5590 * from small to large, all src[i > 0].dict are non-empty too */
5591 di
= dictGetIterator(src
[0].dict
);
5592 while((de
= dictNext(di
)) != NULL
) {
5593 double *score
= zmalloc(sizeof(double)), value
;
5594 *score
= src
[0].weight
* (*(double*)dictGetEntryVal(de
));
5596 for (j
= 1; j
< zsetnum
; j
++) {
5597 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5599 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5600 zunionInterAggregate(score
, value
, aggregate
);
5606 /* skip entry when not present in every source dict */
5610 robj
*o
= dictGetEntryKey(de
);
5611 dictAdd(dstzset
->dict
,o
,score
);
5612 incrRefCount(o
); /* added to dictionary */
5613 zslInsert(dstzset
->zsl
,*score
,o
);
5614 incrRefCount(o
); /* added to skiplist */
5617 dictReleaseIterator(di
);
5619 } else if (op
== REDIS_OP_UNION
) {
5620 for (i
= 0; i
< zsetnum
; i
++) {
5621 if (!src
[i
].dict
) continue;
5623 di
= dictGetIterator(src
[i
].dict
);
5624 while((de
= dictNext(di
)) != NULL
) {
5625 /* skip key when already processed */
5626 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
5628 double *score
= zmalloc(sizeof(double)), value
;
5629 *score
= src
[i
].weight
* (*(double*)dictGetEntryVal(de
));
5631 /* because the zsets are sorted by size, its only possible
5632 * for sets at larger indices to hold this entry */
5633 for (j
= (i
+1); j
< zsetnum
; j
++) {
5634 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5636 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5637 zunionInterAggregate(score
, value
, aggregate
);
5641 robj
*o
= dictGetEntryKey(de
);
5642 dictAdd(dstzset
->dict
,o
,score
);
5643 incrRefCount(o
); /* added to dictionary */
5644 zslInsert(dstzset
->zsl
,*score
,o
);
5645 incrRefCount(o
); /* added to skiplist */
5647 dictReleaseIterator(di
);
5650 /* unknown operator */
5651 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
5654 deleteKey(c
->db
,dstkey
);
5655 if (dstzset
->zsl
->length
) {
5656 dictAdd(c
->db
->dict
,dstkey
,dstobj
);
5657 incrRefCount(dstkey
);
5658 addReplyLong(c
, dstzset
->zsl
->length
);
5661 decrRefCount(dstobj
);
5662 addReply(c
, shared
.czero
);
5667 static void zunionCommand(redisClient
*c
) {
5668 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
5671 static void zinterCommand(redisClient
*c
) {
5672 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
5675 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
5677 int start
= atoi(c
->argv
[2]->ptr
);
5678 int end
= atoi(c
->argv
[3]->ptr
);
5687 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
5689 } else if (c
->argc
>= 5) {
5690 addReply(c
,shared
.syntaxerr
);
5694 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL
||
5695 checkType(c
,o
,REDIS_ZSET
)) return;
5700 /* convert negative indexes */
5701 if (start
< 0) start
= llen
+start
;
5702 if (end
< 0) end
= llen
+end
;
5703 if (start
< 0) start
= 0;
5704 if (end
< 0) end
= 0;
5706 /* indexes sanity checks */
5707 if (start
> end
|| start
>= llen
) {
5708 /* Out of range start or start > end result in empty list */
5709 addReply(c
,shared
.emptymultibulk
);
5712 if (end
>= llen
) end
= llen
-1;
5713 rangelen
= (end
-start
)+1;
5715 /* check if starting point is trivial, before searching
5716 * the element in log(N) time */
5718 ln
= start
== 0 ? zsl
->tail
: zslGetElementByRank(zsl
, llen
-start
);
5721 zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1);
5724 /* Return the result in form of a multi-bulk reply */
5725 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
5726 withscores
? (rangelen
*2) : rangelen
));
5727 for (j
= 0; j
< rangelen
; j
++) {
5729 addReplyBulk(c
,ele
);
5731 addReplyDouble(c
,ln
->score
);
5732 ln
= reverse
? ln
->backward
: ln
->forward
[0];
5736 static void zrangeCommand(redisClient
*c
) {
5737 zrangeGenericCommand(c
,0);
5740 static void zrevrangeCommand(redisClient
*c
) {
5741 zrangeGenericCommand(c
,1);
5744 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
5745 * If justcount is non-zero, just the count is returned. */
5746 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
5749 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
5750 int offset
= 0, limit
= -1;
5754 /* Parse the min-max interval. If one of the values is prefixed
5755 * by the "(" character, it's considered "open". For instance
5756 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
5757 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
5758 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
5759 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
5762 min
= strtod(c
->argv
[2]->ptr
,NULL
);
5764 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
5765 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
5768 max
= strtod(c
->argv
[3]->ptr
,NULL
);
5771 /* Parse "WITHSCORES": note that if the command was called with
5772 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
5773 * enter the following paths to parse WITHSCORES and LIMIT. */
5774 if (c
->argc
== 5 || c
->argc
== 8) {
5775 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
5780 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
5784 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
5789 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
5790 addReply(c
,shared
.syntaxerr
);
5792 } else if (c
->argc
== (7 + withscores
)) {
5793 offset
= atoi(c
->argv
[5]->ptr
);
5794 limit
= atoi(c
->argv
[6]->ptr
);
5795 if (offset
< 0) offset
= 0;
5798 /* Ok, lookup the key and get the range */
5799 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
5801 addReply(c
,justcount
? shared
.czero
: shared
.nullmultibulk
);
5803 if (o
->type
!= REDIS_ZSET
) {
5804 addReply(c
,shared
.wrongtypeerr
);
5806 zset
*zsetobj
= o
->ptr
;
5807 zskiplist
*zsl
= zsetobj
->zsl
;
5809 robj
*ele
, *lenobj
= NULL
;
5810 unsigned long rangelen
= 0;
5812 /* Get the first node with the score >= min, or with
5813 * score > min if 'minex' is true. */
5814 ln
= zslFirstWithScore(zsl
,min
);
5815 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
5818 /* No element matching the speciifed interval */
5819 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
5823 /* We don't know in advance how many matching elements there
5824 * are in the list, so we push this object that will represent
5825 * the multi-bulk length in the output buffer, and will "fix"
5828 lenobj
= createObject(REDIS_STRING
,NULL
);
5830 decrRefCount(lenobj
);
5833 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
5836 ln
= ln
->forward
[0];
5839 if (limit
== 0) break;
5842 addReplyBulk(c
,ele
);
5844 addReplyDouble(c
,ln
->score
);
5846 ln
= ln
->forward
[0];
5848 if (limit
> 0) limit
--;
5851 addReplyLong(c
,(long)rangelen
);
5853 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
5854 withscores
? (rangelen
*2) : rangelen
);
5860 static void zrangebyscoreCommand(redisClient
*c
) {
5861 genericZrangebyscoreCommand(c
,0);
5864 static void zcountCommand(redisClient
*c
) {
5865 genericZrangebyscoreCommand(c
,1);
5868 static void zcardCommand(redisClient
*c
) {
5872 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5873 checkType(c
,o
,REDIS_ZSET
)) return;
5876 addReplyUlong(c
,zs
->zsl
->length
);
5879 static void zscoreCommand(redisClient
*c
) {
5884 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5885 checkType(c
,o
,REDIS_ZSET
)) return;
5888 de
= dictFind(zs
->dict
,c
->argv
[2]);
5890 addReply(c
,shared
.nullbulk
);
5892 double *score
= dictGetEntryVal(de
);
5894 addReplyDouble(c
,*score
);
5898 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
5906 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5907 checkType(c
,o
,REDIS_ZSET
)) return;
5911 de
= dictFind(zs
->dict
,c
->argv
[2]);
5913 addReply(c
,shared
.nullbulk
);
5917 score
= dictGetEntryVal(de
);
5918 rank
= zslGetRank(zsl
, *score
, c
->argv
[2]);
5921 addReplyLong(c
, zsl
->length
- rank
);
5923 addReplyLong(c
, rank
-1);
5926 addReply(c
,shared
.nullbulk
);
5930 static void zrankCommand(redisClient
*c
) {
5931 zrankGenericCommand(c
, 0);
5934 static void zrevrankCommand(redisClient
*c
) {
5935 zrankGenericCommand(c
, 1);
5938 /* =================================== Hashes =============================== */
5939 static void hsetCommand(redisClient
*c
) {
5941 robj
*o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5944 o
= createHashObject();
5945 dictAdd(c
->db
->dict
,c
->argv
[1],o
);
5946 incrRefCount(c
->argv
[1]);
5948 if (o
->type
!= REDIS_HASH
) {
5949 addReply(c
,shared
.wrongtypeerr
);
5953 /* We want to convert the zipmap into an hash table right now if the
5954 * entry to be added is too big. Note that we check if the object
5955 * is integer encoded before to try fetching the length in the test below.
5956 * This is because integers are small, but currently stringObjectLen()
5957 * performs a slow conversion: not worth it. */
5958 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
&&
5959 ((c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
&&
5960 sdslen(c
->argv
[2]->ptr
) > server
.hash_max_zipmap_value
) ||
5961 (c
->argv
[3]->encoding
== REDIS_ENCODING_RAW
&&
5962 sdslen(c
->argv
[3]->ptr
) > server
.hash_max_zipmap_value
)))
5964 convertToRealHash(o
);
5967 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
5968 unsigned char *zm
= o
->ptr
;
5969 robj
*valobj
= getDecodedObject(c
->argv
[3]);
5971 zm
= zipmapSet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
5972 valobj
->ptr
,sdslen(valobj
->ptr
),&update
);
5973 decrRefCount(valobj
);
5976 /* And here there is the second check for hash conversion. */
5977 if (zipmapLen(zm
) > server
.hash_max_zipmap_entries
)
5978 convertToRealHash(o
);
5980 tryObjectEncoding(c
->argv
[2]);
5981 /* note that c->argv[3] is already encoded, as the latest arg
5982 * of a bulk command is always integer encoded if possible. */
5983 if (dictReplace(o
->ptr
,c
->argv
[2],c
->argv
[3])) {
5984 incrRefCount(c
->argv
[2]);
5988 incrRefCount(c
->argv
[3]);
5991 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",update
== 0));
5994 static void hincrbyCommand(redisClient
*c
) {
5995 long long value
= 0, incr
= 0;
5996 robj
*o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5999 o
= createHashObject();
6000 dictAdd(c
->db
->dict
,c
->argv
[1],o
);
6001 incrRefCount(c
->argv
[1]);
6003 if (o
->type
!= REDIS_HASH
) {
6004 addReply(c
,shared
.wrongtypeerr
);
6009 incr
= strtoll(c
->argv
[3]->ptr
, NULL
, 10);
6010 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6011 unsigned char *zm
= o
->ptr
;
6012 unsigned char *zval
;
6015 /* Find value if already present in hash */
6016 if (zipmapGet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
6018 /* strtoll needs the char* to have a trailing \0, but
6019 * the zipmap doesn't include them. */
6020 sds szval
= sdsnewlen(zval
, zvlen
);
6021 value
= strtoll(szval
,NULL
,10);
6026 sds svalue
= sdscatprintf(sdsempty(),"%lld",value
);
6027 zm
= zipmapSet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
6028 (unsigned char*)svalue
,sdslen(svalue
),NULL
);
6032 /* Check if the zipmap needs to be converted. */
6033 if (zipmapLen(zm
) > server
.hash_max_zipmap_entries
)
6034 convertToRealHash(o
);
6039 /* Find value if already present in hash */
6040 de
= dictFind(o
->ptr
,c
->argv
[2]);
6042 hval
= dictGetEntryVal(de
);
6043 if (hval
->encoding
== REDIS_ENCODING_RAW
)
6044 value
= strtoll(hval
->ptr
,NULL
,10);
6045 else if (hval
->encoding
== REDIS_ENCODING_INT
)
6046 value
= (long)hval
->ptr
;
6048 redisAssert(1 != 1);
6052 hval
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
6053 tryObjectEncoding(hval
);
6054 if (dictReplace(o
->ptr
,c
->argv
[2],hval
)) {
6055 incrRefCount(c
->argv
[2]);
6060 addReplyLongLong(c
, value
);
6063 static void hgetCommand(redisClient
*c
) {
6066 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6067 checkType(c
,o
,REDIS_HASH
)) return;
6069 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6070 unsigned char *zm
= o
->ptr
;
6075 field
= getDecodedObject(c
->argv
[2]);
6076 if (zipmapGet(zm
,field
->ptr
,sdslen(field
->ptr
), &val
,&vlen
)) {
6077 addReplySds(c
,sdscatprintf(sdsempty(),"$%u\r\n", vlen
));
6078 addReplySds(c
,sdsnewlen(val
,vlen
));
6079 addReply(c
,shared
.crlf
);
6080 decrRefCount(field
);
6083 addReply(c
,shared
.nullbulk
);
6084 decrRefCount(field
);
6088 struct dictEntry
*de
;
6090 de
= dictFind(o
->ptr
,c
->argv
[2]);
6092 addReply(c
,shared
.nullbulk
);
6094 robj
*e
= dictGetEntryVal(de
);
6101 static void hdelCommand(redisClient
*c
) {
6105 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6106 checkType(c
,o
,REDIS_HASH
)) return;
6108 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6109 robj
*field
= getDecodedObject(c
->argv
[2]);
6111 o
->ptr
= zipmapDel((unsigned char*) o
->ptr
,
6112 (unsigned char*) field
->ptr
,
6113 sdslen(field
->ptr
), &deleted
);
6114 decrRefCount(field
);
6115 if (zipmapLen((unsigned char*) o
->ptr
) == 0)
6116 deleteKey(c
->db
,c
->argv
[1]);
6118 deleted
= dictDelete((dict
*)o
->ptr
,c
->argv
[2]) == DICT_OK
;
6119 if (htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
6120 if (dictSize((dict
*)o
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
6122 if (deleted
) server
.dirty
++;
6123 addReply(c
,deleted
? shared
.cone
: shared
.czero
);
6126 static void hlenCommand(redisClient
*c
) {
6130 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6131 checkType(c
,o
,REDIS_HASH
)) return;
6133 len
= (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
6134 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
6135 addReplyUlong(c
,len
);
6138 #define REDIS_GETALL_KEYS 1
6139 #define REDIS_GETALL_VALS 2
6140 static void genericHgetallCommand(redisClient
*c
, int flags
) {
6142 unsigned long count
= 0;
6144 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL
6145 || checkType(c
,o
,REDIS_HASH
)) return;
6147 lenobj
= createObject(REDIS_STRING
,NULL
);
6149 decrRefCount(lenobj
);
6151 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6152 unsigned char *p
= zipmapRewind(o
->ptr
);
6153 unsigned char *field
, *val
;
6154 unsigned int flen
, vlen
;
6156 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
6159 if (flags
& REDIS_GETALL_KEYS
) {
6160 aux
= createStringObject((char*)field
,flen
);
6161 addReplyBulk(c
,aux
);
6165 if (flags
& REDIS_GETALL_VALS
) {
6166 aux
= createStringObject((char*)val
,vlen
);
6167 addReplyBulk(c
,aux
);
6173 dictIterator
*di
= dictGetIterator(o
->ptr
);
6176 while((de
= dictNext(di
)) != NULL
) {
6177 robj
*fieldobj
= dictGetEntryKey(de
);
6178 robj
*valobj
= dictGetEntryVal(de
);
6180 if (flags
& REDIS_GETALL_KEYS
) {
6181 addReplyBulk(c
,fieldobj
);
6184 if (flags
& REDIS_GETALL_VALS
) {
6185 addReplyBulk(c
,valobj
);
6189 dictReleaseIterator(di
);
6191 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
6194 static void hkeysCommand(redisClient
*c
) {
6195 genericHgetallCommand(c
,REDIS_GETALL_KEYS
);
6198 static void hvalsCommand(redisClient
*c
) {
6199 genericHgetallCommand(c
,REDIS_GETALL_VALS
);
6202 static void hgetallCommand(redisClient
*c
) {
6203 genericHgetallCommand(c
,REDIS_GETALL_KEYS
|REDIS_GETALL_VALS
);
6206 static void hexistsCommand(redisClient
*c
) {
6210 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6211 checkType(c
,o
,REDIS_HASH
)) return;
6213 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6215 unsigned char *zm
= o
->ptr
;
6217 field
= getDecodedObject(c
->argv
[2]);
6218 exists
= zipmapExists(zm
,field
->ptr
,sdslen(field
->ptr
));
6219 decrRefCount(field
);
6221 exists
= dictFind(o
->ptr
,c
->argv
[2]) != NULL
;
6223 addReply(c
,exists
? shared
.cone
: shared
.czero
);
6226 static void convertToRealHash(robj
*o
) {
6227 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
6228 unsigned int klen
, vlen
;
6229 dict
*dict
= dictCreate(&hashDictType
,NULL
);
6231 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
6232 p
= zipmapRewind(zm
);
6233 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
6234 robj
*keyobj
, *valobj
;
6236 keyobj
= createStringObject((char*)key
,klen
);
6237 valobj
= createStringObject((char*)val
,vlen
);
6238 tryObjectEncoding(keyobj
);
6239 tryObjectEncoding(valobj
);
6240 dictAdd(dict
,keyobj
,valobj
);
6242 o
->encoding
= REDIS_ENCODING_HT
;
6247 /* ========================= Non type-specific commands ==================== */
6249 static void flushdbCommand(redisClient
*c
) {
6250 server
.dirty
+= dictSize(c
->db
->dict
);
6251 dictEmpty(c
->db
->dict
);
6252 dictEmpty(c
->db
->expires
);
6253 addReply(c
,shared
.ok
);
6256 static void flushallCommand(redisClient
*c
) {
6257 server
.dirty
+= emptyDb();
6258 addReply(c
,shared
.ok
);
6259 if (server
.bgsavechildpid
!= -1) {
6260 kill(server
.bgsavechildpid
,SIGKILL
);
6261 rdbRemoveTempFile(server
.bgsavechildpid
);
6263 rdbSave(server
.dbfilename
);
6267 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
6268 redisSortOperation
*so
= zmalloc(sizeof(*so
));
6270 so
->pattern
= pattern
;
6274 /* Return the value associated to the key with a name obtained
6275 * substituting the first occurence of '*' in 'pattern' with 'subst' */
6276 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
6280 int prefixlen
, sublen
, postfixlen
;
6281 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
6285 char buf
[REDIS_SORTKEY_MAX
+1];
6288 /* If the pattern is "#" return the substitution object itself in order
6289 * to implement the "SORT ... GET #" feature. */
6290 spat
= pattern
->ptr
;
6291 if (spat
[0] == '#' && spat
[1] == '\0') {
6295 /* The substitution object may be specially encoded. If so we create
6296 * a decoded object on the fly. Otherwise getDecodedObject will just
6297 * increment the ref count, that we'll decrement later. */
6298 subst
= getDecodedObject(subst
);
6301 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
6302 p
= strchr(spat
,'*');
6304 decrRefCount(subst
);
6309 sublen
= sdslen(ssub
);
6310 postfixlen
= sdslen(spat
)-(prefixlen
+1);
6311 memcpy(keyname
.buf
,spat
,prefixlen
);
6312 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
6313 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
6314 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
6315 keyname
.len
= prefixlen
+sublen
+postfixlen
;
6317 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2))
6318 decrRefCount(subst
);
6320 /* printf("lookup '%s' => %p\n", keyname.buf,de); */
6321 return lookupKeyRead(db
,&keyobj
);
6324 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
6325 * the additional parameter is not standard but a BSD-specific we have to
6326 * pass sorting parameters via the global 'server' structure */
6327 static int sortCompare(const void *s1
, const void *s2
) {
6328 const redisSortObject
*so1
= s1
, *so2
= s2
;
6331 if (!server
.sort_alpha
) {
6332 /* Numeric sorting. Here it's trivial as we precomputed scores */
6333 if (so1
->u
.score
> so2
->u
.score
) {
6335 } else if (so1
->u
.score
< so2
->u
.score
) {
6341 /* Alphanumeric sorting */
6342 if (server
.sort_bypattern
) {
6343 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
6344 /* At least one compare object is NULL */
6345 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
6347 else if (so1
->u
.cmpobj
== NULL
)
6352 /* We have both the objects, use strcoll */
6353 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
6356 /* Compare elements directly */
6359 dec1
= getDecodedObject(so1
->obj
);
6360 dec2
= getDecodedObject(so2
->obj
);
6361 cmp
= strcoll(dec1
->ptr
,dec2
->ptr
);
6366 return server
.sort_desc
? -cmp
: cmp
;
6369 /* The SORT command is the most complex command in Redis. Warning: this code
6370 * is optimized for speed and a bit less for readability */
6371 static void sortCommand(redisClient
*c
) {
6374 int desc
= 0, alpha
= 0;
6375 int limit_start
= 0, limit_count
= -1, start
, end
;
6376 int j
, dontsort
= 0, vectorlen
;
6377 int getop
= 0; /* GET operation counter */
6378 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
6379 redisSortObject
*vector
; /* Resulting vector to sort */
6381 /* Lookup the key to sort. It must be of the right types */
6382 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
6383 if (sortval
== NULL
) {
6384 addReply(c
,shared
.nullmultibulk
);
6387 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
6388 sortval
->type
!= REDIS_ZSET
)
6390 addReply(c
,shared
.wrongtypeerr
);
6394 /* Create a list of operations to perform for every sorted element.
6395 * Operations can be GET/DEL/INCR/DECR */
6396 operations
= listCreate();
6397 listSetFreeMethod(operations
,zfree
);
6400 /* Now we need to protect sortval incrementing its count, in the future
6401 * SORT may have options able to overwrite/delete keys during the sorting
6402 * and the sorted key itself may get destroied */
6403 incrRefCount(sortval
);
6405 /* The SORT command has an SQL-alike syntax, parse it */
6406 while(j
< c
->argc
) {
6407 int leftargs
= c
->argc
-j
-1;
6408 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
6410 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
6412 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
6414 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
6415 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
6416 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
6418 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
6419 storekey
= c
->argv
[j
+1];
6421 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
6422 sortby
= c
->argv
[j
+1];
6423 /* If the BY pattern does not contain '*', i.e. it is constant,
6424 * we don't need to sort nor to lookup the weight keys. */
6425 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
6427 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
6428 listAddNodeTail(operations
,createSortOperation(
6429 REDIS_SORT_GET
,c
->argv
[j
+1]));
6433 decrRefCount(sortval
);
6434 listRelease(operations
);
6435 addReply(c
,shared
.syntaxerr
);
6441 /* Load the sorting vector with all the objects to sort */
6442 switch(sortval
->type
) {
6443 case REDIS_LIST
: vectorlen
= listLength((list
*)sortval
->ptr
); break;
6444 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
6445 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
6446 default: vectorlen
= 0; redisAssert(0); /* Avoid GCC warning */
6448 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
6451 if (sortval
->type
== REDIS_LIST
) {
6452 list
*list
= sortval
->ptr
;
6456 listRewind(list
,&li
);
6457 while((ln
= listNext(&li
))) {
6458 robj
*ele
= ln
->value
;
6459 vector
[j
].obj
= ele
;
6460 vector
[j
].u
.score
= 0;
6461 vector
[j
].u
.cmpobj
= NULL
;
6469 if (sortval
->type
== REDIS_SET
) {
6472 zset
*zs
= sortval
->ptr
;
6476 di
= dictGetIterator(set
);
6477 while((setele
= dictNext(di
)) != NULL
) {
6478 vector
[j
].obj
= dictGetEntryKey(setele
);
6479 vector
[j
].u
.score
= 0;
6480 vector
[j
].u
.cmpobj
= NULL
;
6483 dictReleaseIterator(di
);
6485 redisAssert(j
== vectorlen
);
6487 /* Now it's time to load the right scores in the sorting vector */
6488 if (dontsort
== 0) {
6489 for (j
= 0; j
< vectorlen
; j
++) {
6493 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
6494 if (!byval
|| byval
->type
!= REDIS_STRING
) continue;
6496 vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
6498 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
6499 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
6501 /* Don't need to decode the object if it's
6502 * integer-encoded (the only encoding supported) so
6503 * far. We can just cast it */
6504 if (byval
->encoding
== REDIS_ENCODING_INT
) {
6505 vector
[j
].u
.score
= (long)byval
->ptr
;
6507 redisAssert(1 != 1);
6512 if (vector
[j
].obj
->encoding
== REDIS_ENCODING_RAW
)
6513 vector
[j
].u
.score
= strtod(vector
[j
].obj
->ptr
,NULL
);
6515 if (vector
[j
].obj
->encoding
== REDIS_ENCODING_INT
)
6516 vector
[j
].u
.score
= (long) vector
[j
].obj
->ptr
;
6518 redisAssert(1 != 1);
6525 /* We are ready to sort the vector... perform a bit of sanity check
6526 * on the LIMIT option too. We'll use a partial version of quicksort. */
6527 start
= (limit_start
< 0) ? 0 : limit_start
;
6528 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
6529 if (start
>= vectorlen
) {
6530 start
= vectorlen
-1;
6533 if (end
>= vectorlen
) end
= vectorlen
-1;
6535 if (dontsort
== 0) {
6536 server
.sort_desc
= desc
;
6537 server
.sort_alpha
= alpha
;
6538 server
.sort_bypattern
= sortby
? 1 : 0;
6539 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
6540 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
6542 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
6545 /* Send command output to the output buffer, performing the specified
6546 * GET/DEL/INCR/DECR operations if any. */
6547 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
6548 if (storekey
== NULL
) {
6549 /* STORE option not specified, sent the sorting result to client */
6550 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
6551 for (j
= start
; j
<= end
; j
++) {
6555 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
6556 listRewind(operations
,&li
);
6557 while((ln
= listNext(&li
))) {
6558 redisSortOperation
*sop
= ln
->value
;
6559 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6562 if (sop
->type
== REDIS_SORT_GET
) {
6563 if (!val
|| val
->type
!= REDIS_STRING
) {
6564 addReply(c
,shared
.nullbulk
);
6566 addReplyBulk(c
,val
);
6569 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6574 robj
*listObject
= createListObject();
6575 list
*listPtr
= (list
*) listObject
->ptr
;
6577 /* STORE option specified, set the sorting result as a List object */
6578 for (j
= start
; j
<= end
; j
++) {
6583 listAddNodeTail(listPtr
,vector
[j
].obj
);
6584 incrRefCount(vector
[j
].obj
);
6586 listRewind(operations
,&li
);
6587 while((ln
= listNext(&li
))) {
6588 redisSortOperation
*sop
= ln
->value
;
6589 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6592 if (sop
->type
== REDIS_SORT_GET
) {
6593 if (!val
|| val
->type
!= REDIS_STRING
) {
6594 listAddNodeTail(listPtr
,createStringObject("",0));
6596 listAddNodeTail(listPtr
,val
);
6600 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6604 if (dictReplace(c
->db
->dict
,storekey
,listObject
)) {
6605 incrRefCount(storekey
);
6607 /* Note: we add 1 because the DB is dirty anyway since even if the
6608 * SORT result is empty a new key is set and maybe the old content
6610 server
.dirty
+= 1+outputlen
;
6611 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
6615 decrRefCount(sortval
);
6616 listRelease(operations
);
6617 for (j
= 0; j
< vectorlen
; j
++) {
6618 if (sortby
&& alpha
&& vector
[j
].u
.cmpobj
)
6619 decrRefCount(vector
[j
].u
.cmpobj
);
6624 /* Convert an amount of bytes into a human readable string in the form
6625 * of 100B, 2G, 100M, 4K, and so forth. */
6626 static void bytesToHuman(char *s
, unsigned long long n
) {
6631 sprintf(s
,"%lluB",n
);
6633 } else if (n
< (1024*1024)) {
6634 d
= (double)n
/(1024);
6635 sprintf(s
,"%.2fK",d
);
6636 } else if (n
< (1024LL*1024*1024)) {
6637 d
= (double)n
/(1024*1024);
6638 sprintf(s
,"%.2fM",d
);
6639 } else if (n
< (1024LL*1024*1024*1024)) {
6640 d
= (double)n
/(1024LL*1024*1024);
6641 sprintf(s
,"%.2fG",d
);
6645 /* Create the string returned by the INFO command. This is decoupled
6646 * by the INFO command itself as we need to report the same information
6647 * on memory corruption problems. */
6648 static sds
genRedisInfoString(void) {
6650 time_t uptime
= time(NULL
)-server
.stat_starttime
;
6654 bytesToHuman(hmem
,zmalloc_used_memory());
6655 info
= sdscatprintf(sdsempty(),
6656 "redis_version:%s\r\n"
6658 "multiplexing_api:%s\r\n"
6659 "process_id:%ld\r\n"
6660 "uptime_in_seconds:%ld\r\n"
6661 "uptime_in_days:%ld\r\n"
6662 "connected_clients:%d\r\n"
6663 "connected_slaves:%d\r\n"
6664 "blocked_clients:%d\r\n"
6665 "used_memory:%zu\r\n"
6666 "used_memory_human:%s\r\n"
6667 "changes_since_last_save:%lld\r\n"
6668 "bgsave_in_progress:%d\r\n"
6669 "last_save_time:%ld\r\n"
6670 "bgrewriteaof_in_progress:%d\r\n"
6671 "total_connections_received:%lld\r\n"
6672 "total_commands_processed:%lld\r\n"
6673 "expired_keys:%lld\r\n"
6674 "hash_max_zipmap_entries:%ld\r\n"
6675 "hash_max_zipmap_value:%ld\r\n"
6676 "pubsub_channels:%ld\r\n"
6677 "pubsub_patterns:%u\r\n"
6681 (sizeof(long) == 8) ? "64" : "32",
6686 listLength(server
.clients
)-listLength(server
.slaves
),
6687 listLength(server
.slaves
),
6688 server
.blpop_blocked_clients
,
6689 zmalloc_used_memory(),
6692 server
.bgsavechildpid
!= -1,
6694 server
.bgrewritechildpid
!= -1,
6695 server
.stat_numconnections
,
6696 server
.stat_numcommands
,
6697 server
.stat_expiredkeys
,
6698 server
.hash_max_zipmap_entries
,
6699 server
.hash_max_zipmap_value
,
6700 dictSize(server
.pubsub_channels
),
6701 listLength(server
.pubsub_patterns
),
6702 server
.vm_enabled
!= 0,
6703 server
.masterhost
== NULL
? "master" : "slave"
6705 if (server
.masterhost
) {
6706 info
= sdscatprintf(info
,
6707 "master_host:%s\r\n"
6708 "master_port:%d\r\n"
6709 "master_link_status:%s\r\n"
6710 "master_last_io_seconds_ago:%d\r\n"
6713 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
6715 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
6718 if (server
.vm_enabled
) {
6720 info
= sdscatprintf(info
,
6721 "vm_conf_max_memory:%llu\r\n"
6722 "vm_conf_page_size:%llu\r\n"
6723 "vm_conf_pages:%llu\r\n"
6724 "vm_stats_used_pages:%llu\r\n"
6725 "vm_stats_swapped_objects:%llu\r\n"
6726 "vm_stats_swappin_count:%llu\r\n"
6727 "vm_stats_swappout_count:%llu\r\n"
6728 "vm_stats_io_newjobs_len:%lu\r\n"
6729 "vm_stats_io_processing_len:%lu\r\n"
6730 "vm_stats_io_processed_len:%lu\r\n"
6731 "vm_stats_io_active_threads:%lu\r\n"
6732 "vm_stats_blocked_clients:%lu\r\n"
6733 ,(unsigned long long) server
.vm_max_memory
,
6734 (unsigned long long) server
.vm_page_size
,
6735 (unsigned long long) server
.vm_pages
,
6736 (unsigned long long) server
.vm_stats_used_pages
,
6737 (unsigned long long) server
.vm_stats_swapped_objects
,
6738 (unsigned long long) server
.vm_stats_swapins
,
6739 (unsigned long long) server
.vm_stats_swapouts
,
6740 (unsigned long) listLength(server
.io_newjobs
),
6741 (unsigned long) listLength(server
.io_processing
),
6742 (unsigned long) listLength(server
.io_processed
),
6743 (unsigned long) server
.io_active_threads
,
6744 (unsigned long) server
.vm_blocked_clients
6748 for (j
= 0; j
< server
.dbnum
; j
++) {
6749 long long keys
, vkeys
;
6751 keys
= dictSize(server
.db
[j
].dict
);
6752 vkeys
= dictSize(server
.db
[j
].expires
);
6753 if (keys
|| vkeys
) {
6754 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
6761 static void infoCommand(redisClient
*c
) {
6762 sds info
= genRedisInfoString();
6763 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
6764 (unsigned long)sdslen(info
)));
6765 addReplySds(c
,info
);
6766 addReply(c
,shared
.crlf
);
6769 static void monitorCommand(redisClient
*c
) {
6770 /* ignore MONITOR if aleady slave or in monitor mode */
6771 if (c
->flags
& REDIS_SLAVE
) return;
6773 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
6775 listAddNodeTail(server
.monitors
,c
);
6776 addReply(c
,shared
.ok
);
6779 /* ================================= Expire ================================= */
6780 static int removeExpire(redisDb
*db
, robj
*key
) {
6781 if (dictDelete(db
->expires
,key
) == DICT_OK
) {
6788 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
6789 if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) {
6797 /* Return the expire time of the specified key, or -1 if no expire
6798 * is associated with this key (i.e. the key is non volatile) */
6799 static time_t getExpire(redisDb
*db
, robj
*key
) {
6802 /* No expire? return ASAP */
6803 if (dictSize(db
->expires
) == 0 ||
6804 (de
= dictFind(db
->expires
,key
)) == NULL
) return -1;
6806 return (time_t) dictGetEntryVal(de
);
6809 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
6813 /* No expire? return ASAP */
6814 if (dictSize(db
->expires
) == 0 ||
6815 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
6817 /* Lookup the expire */
6818 when
= (time_t) dictGetEntryVal(de
);
6819 if (time(NULL
) <= when
) return 0;
6821 /* Delete the key */
6822 dictDelete(db
->expires
,key
);
6823 server
.stat_expiredkeys
++;
6824 return dictDelete(db
->dict
,key
) == DICT_OK
;
6827 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
6830 /* No expire? return ASAP */
6831 if (dictSize(db
->expires
) == 0 ||
6832 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
6834 /* Delete the key */
6836 server
.stat_expiredkeys
++;
6837 dictDelete(db
->expires
,key
);
6838 return dictDelete(db
->dict
,key
) == DICT_OK
;
6841 static void expireGenericCommand(redisClient
*c
, robj
*key
, time_t seconds
) {
6844 de
= dictFind(c
->db
->dict
,key
);
6846 addReply(c
,shared
.czero
);
6850 if (deleteKey(c
->db
,key
)) server
.dirty
++;
6851 addReply(c
, shared
.cone
);
6854 time_t when
= time(NULL
)+seconds
;
6855 if (setExpire(c
->db
,key
,when
)) {
6856 addReply(c
,shared
.cone
);
6859 addReply(c
,shared
.czero
);
6865 static void expireCommand(redisClient
*c
) {
6866 expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10));
6869 static void expireatCommand(redisClient
*c
) {
6870 expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10)-time(NULL
));
6873 static void ttlCommand(redisClient
*c
) {
6877 expire
= getExpire(c
->db
,c
->argv
[1]);
6879 ttl
= (int) (expire
-time(NULL
));
6880 if (ttl
< 0) ttl
= -1;
6882 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
6885 /* ================================ MULTI/EXEC ============================== */
6887 /* Client state initialization for MULTI/EXEC */
6888 static void initClientMultiState(redisClient
*c
) {
6889 c
->mstate
.commands
= NULL
;
6890 c
->mstate
.count
= 0;
6893 /* Release all the resources associated with MULTI/EXEC state */
6894 static void freeClientMultiState(redisClient
*c
) {
6897 for (j
= 0; j
< c
->mstate
.count
; j
++) {
6899 multiCmd
*mc
= c
->mstate
.commands
+j
;
6901 for (i
= 0; i
< mc
->argc
; i
++)
6902 decrRefCount(mc
->argv
[i
]);
6905 zfree(c
->mstate
.commands
);
6908 /* Add a new command into the MULTI commands queue */
6909 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
6913 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
6914 sizeof(multiCmd
)*(c
->mstate
.count
+1));
6915 mc
= c
->mstate
.commands
+c
->mstate
.count
;
6918 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
6919 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
6920 for (j
= 0; j
< c
->argc
; j
++)
6921 incrRefCount(mc
->argv
[j
]);
6925 static void multiCommand(redisClient
*c
) {
6926 c
->flags
|= REDIS_MULTI
;
6927 addReply(c
,shared
.ok
);
6930 static void discardCommand(redisClient
*c
) {
6931 if (!(c
->flags
& REDIS_MULTI
)) {
6932 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
6936 freeClientMultiState(c
);
6937 initClientMultiState(c
);
6938 c
->flags
&= (~REDIS_MULTI
);
6939 addReply(c
,shared
.ok
);
6942 static void execCommand(redisClient
*c
) {
6947 if (!(c
->flags
& REDIS_MULTI
)) {
6948 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
6952 orig_argv
= c
->argv
;
6953 orig_argc
= c
->argc
;
6954 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
6955 for (j
= 0; j
< c
->mstate
.count
; j
++) {
6956 c
->argc
= c
->mstate
.commands
[j
].argc
;
6957 c
->argv
= c
->mstate
.commands
[j
].argv
;
6958 call(c
,c
->mstate
.commands
[j
].cmd
);
6960 c
->argv
= orig_argv
;
6961 c
->argc
= orig_argc
;
6962 freeClientMultiState(c
);
6963 initClientMultiState(c
);
6964 c
->flags
&= (~REDIS_MULTI
);
6967 /* =========================== Blocking Operations ========================= */
6969 /* Currently Redis blocking operations support is limited to list POP ops,
6970 * so the current implementation is not fully generic, but it is also not
6971 * completely specific so it will not require a rewrite to support new
6972 * kind of blocking operations in the future.
6974 * Still it's important to note that list blocking operations can be already
6975 * used as a notification mechanism in order to implement other blocking
6976 * operations at application level, so there must be a very strong evidence
6977 * of usefulness and generality before new blocking operations are implemented.
6979 * This is how the current blocking POP works, we use BLPOP as example:
6980 * - If the user calls BLPOP and the key exists and contains a non empty list
6981 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
6982 * if there is not to block.
6983 * - If instead BLPOP is called and the key does not exists or the list is
6984 * empty we need to block. In order to do so we remove the notification for
6985 * new data to read in the client socket (so that we'll not serve new
6986 * requests if the blocking request is not served). Also we put the client
6987 * in a dictionary (db->blockingkeys) mapping keys to a list of clients
6988 * blocking for this keys.
6989 * - If a PUSH operation against a key with blocked clients waiting is
6990 * performed, we serve the first in the list: basically instead to push
6991 * the new element inside the list we return it to the (first / oldest)
6992 * blocking client, unblock the client, and remove it form the list.
6994 * The above comment and the source code should be enough in order to understand
6995 * the implementation and modify / fix it later.
6998 /* Set a client in blocking mode for the specified key, with the specified
7000 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
7005 c
->blockingkeys
= zmalloc(sizeof(robj
*)*numkeys
);
7006 c
->blockingkeysnum
= numkeys
;
7007 c
->blockingto
= timeout
;
7008 for (j
= 0; j
< numkeys
; j
++) {
7009 /* Add the key in the client structure, to map clients -> keys */
7010 c
->blockingkeys
[j
] = keys
[j
];
7011 incrRefCount(keys
[j
]);
7013 /* And in the other "side", to map keys -> clients */
7014 de
= dictFind(c
->db
->blockingkeys
,keys
[j
]);
7018 /* For every key we take a list of clients blocked for it */
7020 retval
= dictAdd(c
->db
->blockingkeys
,keys
[j
],l
);
7021 incrRefCount(keys
[j
]);
7022 assert(retval
== DICT_OK
);
7024 l
= dictGetEntryVal(de
);
7026 listAddNodeTail(l
,c
);
7028 /* Mark the client as a blocked client */
7029 c
->flags
|= REDIS_BLOCKED
;
7030 server
.blpop_blocked_clients
++;
7033 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
7034 static void unblockClientWaitingData(redisClient
*c
) {
7039 assert(c
->blockingkeys
!= NULL
);
7040 /* The client may wait for multiple keys, so unblock it for every key. */
7041 for (j
= 0; j
< c
->blockingkeysnum
; j
++) {
7042 /* Remove this client from the list of clients waiting for this key. */
7043 de
= dictFind(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7045 l
= dictGetEntryVal(de
);
7046 listDelNode(l
,listSearchKey(l
,c
));
7047 /* If the list is empty we need to remove it to avoid wasting memory */
7048 if (listLength(l
) == 0)
7049 dictDelete(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7050 decrRefCount(c
->blockingkeys
[j
]);
7052 /* Cleanup the client structure */
7053 zfree(c
->blockingkeys
);
7054 c
->blockingkeys
= NULL
;
7055 c
->flags
&= (~REDIS_BLOCKED
);
7056 server
.blpop_blocked_clients
--;
7057 /* We want to process data if there is some command waiting
7058 * in the input buffer. Note that this is safe even if
7059 * unblockClientWaitingData() gets called from freeClient() because
7060 * freeClient() will be smart enough to call this function
7061 * *after* c->querybuf was set to NULL. */
7062 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
7065 /* This should be called from any function PUSHing into lists.
7066 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
7067 * 'ele' is the element pushed.
7069 * If the function returns 0 there was no client waiting for a list push
7072 * If the function returns 1 there was a client waiting for a list push
7073 * against this key, the element was passed to this client thus it's not
7074 * needed to actually add it to the list and the caller should return asap. */
7075 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
7076 struct dictEntry
*de
;
7077 redisClient
*receiver
;
7081 de
= dictFind(c
->db
->blockingkeys
,key
);
7082 if (de
== NULL
) return 0;
7083 l
= dictGetEntryVal(de
);
7086 receiver
= ln
->value
;
7088 addReplySds(receiver
,sdsnew("*2\r\n"));
7089 addReplyBulk(receiver
,key
);
7090 addReplyBulk(receiver
,ele
);
7091 unblockClientWaitingData(receiver
);
7095 /* Blocking RPOP/LPOP */
7096 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
7101 for (j
= 1; j
< c
->argc
-1; j
++) {
7102 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
7104 if (o
->type
!= REDIS_LIST
) {
7105 addReply(c
,shared
.wrongtypeerr
);
7108 list
*list
= o
->ptr
;
7109 if (listLength(list
) != 0) {
7110 /* If the list contains elements fall back to the usual
7111 * non-blocking POP operation */
7112 robj
*argv
[2], **orig_argv
;
7115 /* We need to alter the command arguments before to call
7116 * popGenericCommand() as the command takes a single key. */
7117 orig_argv
= c
->argv
;
7118 orig_argc
= c
->argc
;
7119 argv
[1] = c
->argv
[j
];
7123 /* Also the return value is different, we need to output
7124 * the multi bulk reply header and the key name. The
7125 * "real" command will add the last element (the value)
7126 * for us. If this souds like an hack to you it's just
7127 * because it is... */
7128 addReplySds(c
,sdsnew("*2\r\n"));
7129 addReplyBulk(c
,argv
[1]);
7130 popGenericCommand(c
,where
);
7132 /* Fix the client structure with the original stuff */
7133 c
->argv
= orig_argv
;
7134 c
->argc
= orig_argc
;
7140 /* If the list is empty or the key does not exists we must block */
7141 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
7142 if (timeout
> 0) timeout
+= time(NULL
);
7143 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
7146 static void blpopCommand(redisClient
*c
) {
7147 blockingPopGenericCommand(c
,REDIS_HEAD
);
7150 static void brpopCommand(redisClient
*c
) {
7151 blockingPopGenericCommand(c
,REDIS_TAIL
);
7154 /* =============================== Replication ============================= */
7156 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7157 ssize_t nwritten
, ret
= size
;
7158 time_t start
= time(NULL
);
7162 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
7163 nwritten
= write(fd
,ptr
,size
);
7164 if (nwritten
== -1) return -1;
7168 if ((time(NULL
)-start
) > timeout
) {
7176 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7177 ssize_t nread
, totread
= 0;
7178 time_t start
= time(NULL
);
7182 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
7183 nread
= read(fd
,ptr
,size
);
7184 if (nread
== -1) return -1;
7189 if ((time(NULL
)-start
) > timeout
) {
7197 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7204 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
7207 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
7218 static void syncCommand(redisClient
*c
) {
7219 /* ignore SYNC if aleady slave or in monitor mode */
7220 if (c
->flags
& REDIS_SLAVE
) return;
7222 /* SYNC can't be issued when the server has pending data to send to
7223 * the client about already issued commands. We need a fresh reply
7224 * buffer registering the differences between the BGSAVE and the current
7225 * dataset, so that we can copy to other slaves if needed. */
7226 if (listLength(c
->reply
) != 0) {
7227 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
7231 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
7232 /* Here we need to check if there is a background saving operation
7233 * in progress, or if it is required to start one */
7234 if (server
.bgsavechildpid
!= -1) {
7235 /* Ok a background save is in progress. Let's check if it is a good
7236 * one for replication, i.e. if there is another slave that is
7237 * registering differences since the server forked to save */
7242 listRewind(server
.slaves
,&li
);
7243 while((ln
= listNext(&li
))) {
7245 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
7248 /* Perfect, the server is already registering differences for
7249 * another slave. Set the right state, and copy the buffer. */
7250 listRelease(c
->reply
);
7251 c
->reply
= listDup(slave
->reply
);
7252 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7253 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
7255 /* No way, we need to wait for the next BGSAVE in order to
7256 * register differences */
7257 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7258 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
7261 /* Ok we don't have a BGSAVE in progress, let's start one */
7262 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
7263 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7264 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
7265 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
7268 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7271 c
->flags
|= REDIS_SLAVE
;
7273 listAddNodeTail(server
.slaves
,c
);
7277 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
7278 redisClient
*slave
= privdata
;
7280 REDIS_NOTUSED(mask
);
7281 char buf
[REDIS_IOBUF_LEN
];
7282 ssize_t nwritten
, buflen
;
7284 if (slave
->repldboff
== 0) {
7285 /* Write the bulk write count before to transfer the DB. In theory here
7286 * we don't know how much room there is in the output buffer of the
7287 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
7288 * operations) will never be smaller than the few bytes we need. */
7291 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
7293 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
7301 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
7302 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
7304 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
7305 (buflen
== 0) ? "premature EOF" : strerror(errno
));
7309 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
7310 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
7315 slave
->repldboff
+= nwritten
;
7316 if (slave
->repldboff
== slave
->repldbsize
) {
7317 close(slave
->repldbfd
);
7318 slave
->repldbfd
= -1;
7319 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7320 slave
->replstate
= REDIS_REPL_ONLINE
;
7321 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
7322 sendReplyToClient
, slave
) == AE_ERR
) {
7326 addReplySds(slave
,sdsempty());
7327 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
7331 /* This function is called at the end of every backgrond saving.
7332 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
7333 * otherwise REDIS_ERR is passed to the function.
7335 * The goal of this function is to handle slaves waiting for a successful
7336 * background saving in order to perform non-blocking synchronization. */
7337 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
7339 int startbgsave
= 0;
7342 listRewind(server
.slaves
,&li
);
7343 while((ln
= listNext(&li
))) {
7344 redisClient
*slave
= ln
->value
;
7346 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
7348 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7349 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
7350 struct redis_stat buf
;
7352 if (bgsaveerr
!= REDIS_OK
) {
7354 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
7357 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
7358 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
7360 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
7363 slave
->repldboff
= 0;
7364 slave
->repldbsize
= buf
.st_size
;
7365 slave
->replstate
= REDIS_REPL_SEND_BULK
;
7366 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7367 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
7374 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7377 listRewind(server
.slaves
,&li
);
7378 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
7379 while((ln
= listNext(&li
))) {
7380 redisClient
*slave
= ln
->value
;
7382 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
7389 static int syncWithMaster(void) {
7390 char buf
[1024], tmpfile
[256], authcmd
[1024];
7392 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
7393 int dfd
, maxtries
= 5;
7396 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
7401 /* AUTH with the master if required. */
7402 if(server
.masterauth
) {
7403 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
7404 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
7406 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
7410 /* Read the AUTH result. */
7411 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7413 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
7417 if (buf
[0] != '+') {
7419 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
7424 /* Issue the SYNC command */
7425 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
7427 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
7431 /* Read the bulk write count */
7432 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7434 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
7438 if (buf
[0] != '$') {
7440 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
7443 dumpsize
= strtol(buf
+1,NULL
,10);
7444 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
7445 /* Read the bulk write data on a temp file */
7447 snprintf(tmpfile
,256,
7448 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
7449 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
7450 if (dfd
!= -1) break;
7455 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
7459 int nread
, nwritten
;
7461 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
7463 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
7469 nwritten
= write(dfd
,buf
,nread
);
7470 if (nwritten
== -1) {
7471 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
7479 if (rename(tmpfile
,server
.dbfilename
) == -1) {
7480 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
7486 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
7487 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
7491 server
.master
= createClient(fd
);
7492 server
.master
->flags
|= REDIS_MASTER
;
7493 server
.master
->authenticated
= 1;
7494 server
.replstate
= REDIS_REPL_CONNECTED
;
7498 static void slaveofCommand(redisClient
*c
) {
7499 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
7500 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
7501 if (server
.masterhost
) {
7502 sdsfree(server
.masterhost
);
7503 server
.masterhost
= NULL
;
7504 if (server
.master
) freeClient(server
.master
);
7505 server
.replstate
= REDIS_REPL_NONE
;
7506 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
7509 sdsfree(server
.masterhost
);
7510 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
7511 server
.masterport
= atoi(c
->argv
[2]->ptr
);
7512 if (server
.master
) freeClient(server
.master
);
7513 server
.replstate
= REDIS_REPL_CONNECT
;
7514 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
7515 server
.masterhost
, server
.masterport
);
7517 addReply(c
,shared
.ok
);
7520 /* ============================ Maxmemory directive ======================== */
7522 /* Try to free one object form the pre-allocated objects free list.
7523 * This is useful under low mem conditions as by default we take 1 million
7524 * free objects allocated. On success REDIS_OK is returned, otherwise
7526 static int tryFreeOneObjectFromFreelist(void) {
7529 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
7530 if (listLength(server
.objfreelist
)) {
7531 listNode
*head
= listFirst(server
.objfreelist
);
7532 o
= listNodeValue(head
);
7533 listDelNode(server
.objfreelist
,head
);
7534 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7538 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7543 /* This function gets called when 'maxmemory' is set on the config file to limit
7544 * the max memory used by the server, and we are out of memory.
7545 * This function will try to, in order:
7547 * - Free objects from the free list
7548 * - Try to remove keys with an EXPIRE set
7550 * It is not possible to free enough memory to reach used-memory < maxmemory
7551 * the server will start refusing commands that will enlarge even more the
7554 static void freeMemoryIfNeeded(void) {
7555 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
7556 int j
, k
, freed
= 0;
7558 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
7559 for (j
= 0; j
< server
.dbnum
; j
++) {
7561 robj
*minkey
= NULL
;
7562 struct dictEntry
*de
;
7564 if (dictSize(server
.db
[j
].expires
)) {
7566 /* From a sample of three keys drop the one nearest to
7567 * the natural expire */
7568 for (k
= 0; k
< 3; k
++) {
7571 de
= dictGetRandomKey(server
.db
[j
].expires
);
7572 t
= (time_t) dictGetEntryVal(de
);
7573 if (minttl
== -1 || t
< minttl
) {
7574 minkey
= dictGetEntryKey(de
);
7578 deleteKey(server
.db
+j
,minkey
);
7581 if (!freed
) return; /* nothing to free... */
7585 /* ============================== Append Only file ========================== */
7587 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
7588 sds buf
= sdsempty();
7594 /* The DB this command was targetting is not the same as the last command
7595 * we appendend. To issue a SELECT command is needed. */
7596 if (dictid
!= server
.appendseldb
) {
7599 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
7600 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
7601 (unsigned long)strlen(seldb
),seldb
);
7602 server
.appendseldb
= dictid
;
7605 /* "Fix" the argv vector if the command is EXPIRE. We want to translate
7606 * EXPIREs into EXPIREATs calls */
7607 if (cmd
->proc
== expireCommand
) {
7610 tmpargv
[0] = createStringObject("EXPIREAT",8);
7611 tmpargv
[1] = argv
[1];
7612 incrRefCount(argv
[1]);
7613 when
= time(NULL
)+strtol(argv
[2]->ptr
,NULL
,10);
7614 tmpargv
[2] = createObject(REDIS_STRING
,
7615 sdscatprintf(sdsempty(),"%ld",when
));
7619 /* Append the actual command */
7620 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
7621 for (j
= 0; j
< argc
; j
++) {
7624 o
= getDecodedObject(o
);
7625 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
7626 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
7627 buf
= sdscatlen(buf
,"\r\n",2);
7631 /* Free the objects from the modified argv for EXPIREAT */
7632 if (cmd
->proc
== expireCommand
) {
7633 for (j
= 0; j
< 3; j
++)
7634 decrRefCount(argv
[j
]);
7637 /* We want to perform a single write. This should be guaranteed atomic
7638 * at least if the filesystem we are writing is a real physical one.
7639 * While this will save us against the server being killed I don't think
7640 * there is much to do about the whole server stopping for power problems
7642 nwritten
= write(server
.appendfd
,buf
,sdslen(buf
));
7643 if (nwritten
!= (signed)sdslen(buf
)) {
7644 /* Ooops, we are in troubles. The best thing to do for now is
7645 * to simply exit instead to give the illusion that everything is
7646 * working as expected. */
7647 if (nwritten
== -1) {
7648 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
7650 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
7654 /* If a background append only file rewriting is in progress we want to
7655 * accumulate the differences between the child DB and the current one
7656 * in a buffer, so that when the child process will do its work we
7657 * can append the differences to the new append only file. */
7658 if (server
.bgrewritechildpid
!= -1)
7659 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
7663 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
7664 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
7665 now
-server
.lastfsync
> 1))
7667 fsync(server
.appendfd
); /* Let's try to get this data on the disk */
7668 server
.lastfsync
= now
;
7672 /* In Redis commands are always executed in the context of a client, so in
7673 * order to load the append only file we need to create a fake client. */
7674 static struct redisClient
*createFakeClient(void) {
7675 struct redisClient
*c
= zmalloc(sizeof(*c
));
7679 c
->querybuf
= sdsempty();
7683 /* We set the fake client as a slave waiting for the synchronization
7684 * so that Redis will not try to send replies to this client. */
7685 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7686 c
->reply
= listCreate();
7687 listSetFreeMethod(c
->reply
,decrRefCount
);
7688 listSetDupMethod(c
->reply
,dupClientReplyValue
);
7692 static void freeFakeClient(struct redisClient
*c
) {
7693 sdsfree(c
->querybuf
);
7694 listRelease(c
->reply
);
7698 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
7699 * error (the append only file is zero-length) REDIS_ERR is returned. On
7700 * fatal error an error message is logged and the program exists. */
7701 int loadAppendOnlyFile(char *filename
) {
7702 struct redisClient
*fakeClient
;
7703 FILE *fp
= fopen(filename
,"r");
7704 struct redis_stat sb
;
7705 unsigned long long loadedkeys
= 0;
7707 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
7711 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
7715 fakeClient
= createFakeClient();
7722 struct redisCommand
*cmd
;
7724 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
7730 if (buf
[0] != '*') goto fmterr
;
7732 argv
= zmalloc(sizeof(robj
*)*argc
);
7733 for (j
= 0; j
< argc
; j
++) {
7734 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
7735 if (buf
[0] != '$') goto fmterr
;
7736 len
= strtol(buf
+1,NULL
,10);
7737 argsds
= sdsnewlen(NULL
,len
);
7738 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
7739 argv
[j
] = createObject(REDIS_STRING
,argsds
);
7740 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
7743 /* Command lookup */
7744 cmd
= lookupCommand(argv
[0]->ptr
);
7746 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
7749 /* Try object encoding */
7750 if (cmd
->flags
& REDIS_CMD_BULK
)
7751 tryObjectEncoding(argv
[argc
-1]);
7752 /* Run the command in the context of a fake client */
7753 fakeClient
->argc
= argc
;
7754 fakeClient
->argv
= argv
;
7755 cmd
->proc(fakeClient
);
7756 /* Discard the reply objects list from the fake client */
7757 while(listLength(fakeClient
->reply
))
7758 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
7759 /* Clean up, ready for the next command */
7760 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
7762 /* Handle swapping while loading big datasets when VM is on */
7764 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
7765 while (zmalloc_used_memory() > server
.vm_max_memory
) {
7766 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
7771 freeFakeClient(fakeClient
);
7776 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
7778 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
7782 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
7786 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
7787 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
7791 /* Avoid the incr/decr ref count business if possible to help
7792 * copy-on-write (we are often in a child process when this function
7794 * Also makes sure that key objects don't get incrRefCount-ed when VM
7796 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
7797 obj
= getDecodedObject(obj
);
7800 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
));
7801 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
;
7802 if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0)
7804 if (fwrite("\r\n",2,1,fp
) == 0) goto err
;
7805 if (decrrc
) decrRefCount(obj
);
7808 if (decrrc
) decrRefCount(obj
);
7812 /* Write binary-safe string into a file in the bulkformat
7813 * $<count>\r\n<payload>\r\n */
7814 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
7817 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
);
7818 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7819 if (len
&& fwrite(s
,len
,1,fp
) == 0) return 0;
7820 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
7824 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
7825 static int fwriteBulkDouble(FILE *fp
, double d
) {
7826 char buf
[128], dbuf
[128];
7828 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
7829 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
7830 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7831 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
7835 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
7836 static int fwriteBulkLong(FILE *fp
, long l
) {
7837 char buf
[128], lbuf
[128];
7839 snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
);
7840 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2);
7841 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7842 if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0;
7846 /* Write a sequence of commands able to fully rebuild the dataset into
7847 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
7848 static int rewriteAppendOnlyFile(char *filename
) {
7849 dictIterator
*di
= NULL
;
7854 time_t now
= time(NULL
);
7856 /* Note that we have to use a different temp name here compared to the
7857 * one used by rewriteAppendOnlyFileBackground() function. */
7858 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
7859 fp
= fopen(tmpfile
,"w");
7861 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
7864 for (j
= 0; j
< server
.dbnum
; j
++) {
7865 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
7866 redisDb
*db
= server
.db
+j
;
7868 if (dictSize(d
) == 0) continue;
7869 di
= dictGetIterator(d
);
7875 /* SELECT the new DB */
7876 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
7877 if (fwriteBulkLong(fp
,j
) == 0) goto werr
;
7879 /* Iterate this DB writing every entry */
7880 while((de
= dictNext(di
)) != NULL
) {
7885 key
= dictGetEntryKey(de
);
7886 /* If the value for this key is swapped, load a preview in memory.
7887 * We use a "swapped" flag to remember if we need to free the
7888 * value object instead to just increment the ref count anyway
7889 * in order to avoid copy-on-write of pages if we are forked() */
7890 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
7891 key
->storage
== REDIS_VM_SWAPPING
) {
7892 o
= dictGetEntryVal(de
);
7895 o
= vmPreviewObject(key
);
7898 expiretime
= getExpire(db
,key
);
7900 /* Save the key and associated value */
7901 if (o
->type
== REDIS_STRING
) {
7902 /* Emit a SET command */
7903 char cmd
[]="*3\r\n$3\r\nSET\r\n";
7904 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7906 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7907 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
7908 } else if (o
->type
== REDIS_LIST
) {
7909 /* Emit the RPUSHes needed to rebuild the list */
7910 list
*list
= o
->ptr
;
7914 listRewind(list
,&li
);
7915 while((ln
= listNext(&li
))) {
7916 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
7917 robj
*eleobj
= listNodeValue(ln
);
7919 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7920 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7921 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
7923 } else if (o
->type
== REDIS_SET
) {
7924 /* Emit the SADDs needed to rebuild the set */
7926 dictIterator
*di
= dictGetIterator(set
);
7929 while((de
= dictNext(di
)) != NULL
) {
7930 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
7931 robj
*eleobj
= dictGetEntryKey(de
);
7933 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7934 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7935 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
7937 dictReleaseIterator(di
);
7938 } else if (o
->type
== REDIS_ZSET
) {
7939 /* Emit the ZADDs needed to rebuild the sorted set */
7941 dictIterator
*di
= dictGetIterator(zs
->dict
);
7944 while((de
= dictNext(di
)) != NULL
) {
7945 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
7946 robj
*eleobj
= dictGetEntryKey(de
);
7947 double *score
= dictGetEntryVal(de
);
7949 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7950 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7951 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
7952 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
7954 dictReleaseIterator(di
);
7955 } else if (o
->type
== REDIS_HASH
) {
7956 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
7958 /* Emit the HSETs needed to rebuild the hash */
7959 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
7960 unsigned char *p
= zipmapRewind(o
->ptr
);
7961 unsigned char *field
, *val
;
7962 unsigned int flen
, vlen
;
7964 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
7965 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7966 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7967 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
7969 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
7973 dictIterator
*di
= dictGetIterator(o
->ptr
);
7976 while((de
= dictNext(di
)) != NULL
) {
7977 robj
*field
= dictGetEntryKey(de
);
7978 robj
*val
= dictGetEntryVal(de
);
7980 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7981 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7982 if (fwriteBulkObject(fp
,field
) == -1) return -1;
7983 if (fwriteBulkObject(fp
,val
) == -1) return -1;
7985 dictReleaseIterator(di
);
7990 /* Save the expire time */
7991 if (expiretime
!= -1) {
7992 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
7993 /* If this key is already expired skip it */
7994 if (expiretime
< now
) continue;
7995 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7996 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7997 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
;
7999 if (swapped
) decrRefCount(o
);
8001 dictReleaseIterator(di
);
8004 /* Make sure data will not remain on the OS's output buffers */
8009 /* Use RENAME to make sure the DB file is changed atomically only
8010 * if the generate DB file is ok. */
8011 if (rename(tmpfile
,filename
) == -1) {
8012 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
8016 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
8022 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
8023 if (di
) dictReleaseIterator(di
);
8027 /* This is how rewriting of the append only file in background works:
8029 * 1) The user calls BGREWRITEAOF
8030 * 2) Redis calls this function, that forks():
8031 * 2a) the child rewrite the append only file in a temp file.
8032 * 2b) the parent accumulates differences in server.bgrewritebuf.
8033 * 3) When the child finished '2a' exists.
8034 * 4) The parent will trap the exit code, if it's OK, will append the
8035 * data accumulated into server.bgrewritebuf into the temp file, and
8036 * finally will rename(2) the temp file in the actual file name.
8037 * The the new file is reopened as the new append only file. Profit!
8039 static int rewriteAppendOnlyFileBackground(void) {
8042 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
8043 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
8044 if ((childpid
= fork()) == 0) {
8048 if (server
.vm_enabled
) vmReopenSwapFile();
8050 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
8051 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
8058 if (childpid
== -1) {
8059 redisLog(REDIS_WARNING
,
8060 "Can't rewrite append only file in background: fork: %s",
8064 redisLog(REDIS_NOTICE
,
8065 "Background append only file rewriting started by pid %d",childpid
);
8066 server
.bgrewritechildpid
= childpid
;
8067 updateDictResizePolicy();
8068 /* We set appendseldb to -1 in order to force the next call to the
8069 * feedAppendOnlyFile() to issue a SELECT command, so the differences
8070 * accumulated by the parent into server.bgrewritebuf will start
8071 * with a SELECT statement and it will be safe to merge. */
8072 server
.appendseldb
= -1;
8075 return REDIS_OK
; /* unreached */
8078 static void bgrewriteaofCommand(redisClient
*c
) {
8079 if (server
.bgrewritechildpid
!= -1) {
8080 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
8083 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
8084 char *status
= "+Background append only file rewriting started\r\n";
8085 addReplySds(c
,sdsnew(status
));
8087 addReply(c
,shared
.err
);
8091 static void aofRemoveTempFile(pid_t childpid
) {
8094 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
8098 /* Virtual Memory is composed mainly of two subsystems:
8099 * - Blocking Virutal Memory
8100 * - Threaded Virtual Memory I/O
8101 * The two parts are not fully decoupled, but functions are split among two
8102 * different sections of the source code (delimited by comments) in order to
8103 * make more clear what functionality is about the blocking VM and what about
8104 * the threaded (not blocking) VM.
8108 * Redis VM is a blocking VM (one that blocks reading swapped values from
8109 * disk into memory when a value swapped out is needed in memory) that is made
8110 * unblocking by trying to examine the command argument vector in order to
8111 * load in background values that will likely be needed in order to exec
8112 * the command. The command is executed only once all the relevant keys
8113 * are loaded into memory.
8115 * This basically is almost as simple of a blocking VM, but almost as parallel
8116 * as a fully non-blocking VM.
8119 /* =================== Virtual Memory - Blocking Side ====================== */
8121 /* substitute the first occurrence of '%p' with the process pid in the
8122 * swap file name. */
8123 static void expandVmSwapFilename(void) {
8124 char *p
= strstr(server
.vm_swap_file
,"%p");
8130 new = sdscat(new,server
.vm_swap_file
);
8131 new = sdscatprintf(new,"%ld",(long) getpid());
8132 new = sdscat(new,p
+2);
8133 zfree(server
.vm_swap_file
);
8134 server
.vm_swap_file
= new;
8137 static void vmInit(void) {
8142 if (server
.vm_max_threads
!= 0)
8143 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
8145 expandVmSwapFilename();
8146 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
8147 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
8148 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
8150 if (server
.vm_fp
== NULL
) {
8151 redisLog(REDIS_WARNING
,
8152 "Impossible to open the swap file: %s. Exiting.",
8156 server
.vm_fd
= fileno(server
.vm_fp
);
8157 server
.vm_next_page
= 0;
8158 server
.vm_near_pages
= 0;
8159 server
.vm_stats_used_pages
= 0;
8160 server
.vm_stats_swapped_objects
= 0;
8161 server
.vm_stats_swapouts
= 0;
8162 server
.vm_stats_swapins
= 0;
8163 totsize
= server
.vm_pages
*server
.vm_page_size
;
8164 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
8165 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
8166 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
8170 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
8172 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
8173 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
8174 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
8175 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
8177 /* Initialize threaded I/O (used by Virtual Memory) */
8178 server
.io_newjobs
= listCreate();
8179 server
.io_processing
= listCreate();
8180 server
.io_processed
= listCreate();
8181 server
.io_ready_clients
= listCreate();
8182 pthread_mutex_init(&server
.io_mutex
,NULL
);
8183 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
8184 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
8185 server
.io_active_threads
= 0;
8186 if (pipe(pipefds
) == -1) {
8187 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
8191 server
.io_ready_pipe_read
= pipefds
[0];
8192 server
.io_ready_pipe_write
= pipefds
[1];
8193 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
8194 /* LZF requires a lot of stack */
8195 pthread_attr_init(&server
.io_threads_attr
);
8196 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
8197 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
8198 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
8199 /* Listen for events in the threaded I/O pipe */
8200 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
8201 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
8202 oom("creating file event");
8205 /* Mark the page as used */
8206 static void vmMarkPageUsed(off_t page
) {
8207 off_t byte
= page
/8;
8209 redisAssert(vmFreePage(page
) == 1);
8210 server
.vm_bitmap
[byte
] |= 1<<bit
;
8213 /* Mark N contiguous pages as used, with 'page' being the first. */
8214 static void vmMarkPagesUsed(off_t page
, off_t count
) {
8217 for (j
= 0; j
< count
; j
++)
8218 vmMarkPageUsed(page
+j
);
8219 server
.vm_stats_used_pages
+= count
;
8220 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
8221 (long long)count
, (long long)page
);
8224 /* Mark the page as free */
8225 static void vmMarkPageFree(off_t page
) {
8226 off_t byte
= page
/8;
8228 redisAssert(vmFreePage(page
) == 0);
8229 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
8232 /* Mark N contiguous pages as free, with 'page' being the first. */
8233 static void vmMarkPagesFree(off_t page
, off_t count
) {
8236 for (j
= 0; j
< count
; j
++)
8237 vmMarkPageFree(page
+j
);
8238 server
.vm_stats_used_pages
-= count
;
8239 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
8240 (long long)count
, (long long)page
);
8243 /* Test if the page is free */
8244 static int vmFreePage(off_t page
) {
8245 off_t byte
= page
/8;
8247 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
8250 /* Find N contiguous free pages storing the first page of the cluster in *first.
8251 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
8252 * REDIS_ERR is returned.
8254 * This function uses a simple algorithm: we try to allocate
8255 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
8256 * again from the start of the swap file searching for free spaces.
8258 * If it looks pretty clear that there are no free pages near our offset
8259 * we try to find less populated places doing a forward jump of
8260 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
8261 * without hurry, and then we jump again and so forth...
8263 * This function can be improved using a free list to avoid to guess
8264 * too much, since we could collect data about freed pages.
8266 * note: I implemented this function just after watching an episode of
8267 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
8269 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
8270 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
8272 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
8273 server
.vm_near_pages
= 0;
8274 server
.vm_next_page
= 0;
8276 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
8277 base
= server
.vm_next_page
;
8279 while(offset
< server
.vm_pages
) {
8280 off_t
this = base
+offset
;
8282 /* If we overflow, restart from page zero */
8283 if (this >= server
.vm_pages
) {
8284 this -= server
.vm_pages
;
8286 /* Just overflowed, what we found on tail is no longer
8287 * interesting, as it's no longer contiguous. */
8291 if (vmFreePage(this)) {
8292 /* This is a free page */
8294 /* Already got N free pages? Return to the caller, with success */
8296 *first
= this-(n
-1);
8297 server
.vm_next_page
= this+1;
8298 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
8302 /* The current one is not a free page */
8306 /* Fast-forward if the current page is not free and we already
8307 * searched enough near this place. */
8309 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
8310 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
8312 /* Note that even if we rewind after the jump, we are don't need
8313 * to make sure numfree is set to zero as we only jump *if* it
8314 * is set to zero. */
8316 /* Otherwise just check the next page */
8323 /* Write the specified object at the specified page of the swap file */
8324 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
8325 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8326 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8327 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8328 redisLog(REDIS_WARNING
,
8329 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
8333 rdbSaveObject(server
.vm_fp
,o
);
8334 fflush(server
.vm_fp
);
8335 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8339 /* Swap the 'val' object relative to 'key' into disk. Store all the information
8340 * needed to later retrieve the object into the key object.
8341 * If we can't find enough contiguous empty pages to swap the object on disk
8342 * REDIS_ERR is returned. */
8343 static int vmSwapObjectBlocking(robj
*key
, robj
*val
) {
8344 off_t pages
= rdbSavedObjectPages(val
,NULL
);
8347 assert(key
->storage
== REDIS_VM_MEMORY
);
8348 assert(key
->refcount
== 1);
8349 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
;
8350 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
;
8351 key
->vm
.page
= page
;
8352 key
->vm
.usedpages
= pages
;
8353 key
->storage
= REDIS_VM_SWAPPED
;
8354 key
->vtype
= val
->type
;
8355 decrRefCount(val
); /* Deallocate the object from memory. */
8356 vmMarkPagesUsed(page
,pages
);
8357 redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)",
8358 (unsigned char*) key
->ptr
,
8359 (unsigned long long) page
, (unsigned long long) pages
);
8360 server
.vm_stats_swapped_objects
++;
8361 server
.vm_stats_swapouts
++;
8365 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
8368 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8369 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8370 redisLog(REDIS_WARNING
,
8371 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
8375 o
= rdbLoadObject(type
,server
.vm_fp
);
8377 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
8380 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8384 /* Load the value object relative to the 'key' object from swap to memory.
8385 * The newly allocated object is returned.
8387 * If preview is true the unserialized object is returned to the caller but
8388 * no changes are made to the key object, nor the pages are marked as freed */
8389 static robj
*vmGenericLoadObject(robj
*key
, int preview
) {
8392 redisAssert(key
->storage
== REDIS_VM_SWAPPED
|| key
->storage
== REDIS_VM_LOADING
);
8393 val
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
);
8395 key
->storage
= REDIS_VM_MEMORY
;
8396 key
->vm
.atime
= server
.unixtime
;
8397 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8398 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk",
8399 (unsigned char*) key
->ptr
);
8400 server
.vm_stats_swapped_objects
--;
8402 redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk",
8403 (unsigned char*) key
->ptr
);
8405 server
.vm_stats_swapins
++;
8409 /* Plain object loading, from swap to memory */
8410 static robj
*vmLoadObject(robj
*key
) {
8411 /* If we are loading the object in background, stop it, we
8412 * need to load this object synchronously ASAP. */
8413 if (key
->storage
== REDIS_VM_LOADING
)
8414 vmCancelThreadedIOJob(key
);
8415 return vmGenericLoadObject(key
,0);
8418 /* Just load the value on disk, without to modify the key.
8419 * This is useful when we want to perform some operation on the value
8420 * without to really bring it from swap to memory, like while saving the
8421 * dataset or rewriting the append only log. */
8422 static robj
*vmPreviewObject(robj
*key
) {
8423 return vmGenericLoadObject(key
,1);
8426 /* How a good candidate is this object for swapping?
8427 * The better candidate it is, the greater the returned value.
8429 * Currently we try to perform a fast estimation of the object size in
8430 * memory, and combine it with aging informations.
8432 * Basically swappability = idle-time * log(estimated size)
8434 * Bigger objects are preferred over smaller objects, but not
8435 * proportionally, this is why we use the logarithm. This algorithm is
8436 * just a first try and will probably be tuned later. */
8437 static double computeObjectSwappability(robj
*o
) {
8438 time_t age
= server
.unixtime
- o
->vm
.atime
;
8442 struct dictEntry
*de
;
8445 if (age
<= 0) return 0;
8448 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
8451 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
8456 listNode
*ln
= listFirst(l
);
8458 asize
= sizeof(list
);
8460 robj
*ele
= ln
->value
;
8463 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8464 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8466 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
8471 z
= (o
->type
== REDIS_ZSET
);
8472 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
8474 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8475 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
8480 de
= dictGetRandomKey(d
);
8481 ele
= dictGetEntryKey(de
);
8482 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8483 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8485 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8486 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
8490 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8491 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
8492 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
8493 unsigned int klen
, vlen
;
8494 unsigned char *key
, *val
;
8496 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
8500 asize
= len
*(klen
+vlen
+3);
8501 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
8503 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8508 de
= dictGetRandomKey(d
);
8509 ele
= dictGetEntryKey(de
);
8510 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8511 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8513 ele
= dictGetEntryVal(de
);
8514 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8515 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8517 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8522 return (double)age
*log(1+asize
);
8525 /* Try to swap an object that's a good candidate for swapping.
8526 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
8527 * to swap any object at all.
8529 * If 'usethreaded' is true, Redis will try to swap the object in background
8530 * using I/O threads. */
8531 static int vmSwapOneObject(int usethreads
) {
8533 struct dictEntry
*best
= NULL
;
8534 double best_swappability
= 0;
8535 redisDb
*best_db
= NULL
;
8538 for (j
= 0; j
< server
.dbnum
; j
++) {
8539 redisDb
*db
= server
.db
+j
;
8540 /* Why maxtries is set to 100?
8541 * Because this way (usually) we'll find 1 object even if just 1% - 2%
8542 * are swappable objects */
8545 if (dictSize(db
->dict
) == 0) continue;
8546 for (i
= 0; i
< 5; i
++) {
8548 double swappability
;
8550 if (maxtries
) maxtries
--;
8551 de
= dictGetRandomKey(db
->dict
);
8552 key
= dictGetEntryKey(de
);
8553 val
= dictGetEntryVal(de
);
8554 /* Only swap objects that are currently in memory.
8556 * Also don't swap shared objects if threaded VM is on, as we
8557 * try to ensure that the main thread does not touch the
8558 * object while the I/O thread is using it, but we can't
8559 * control other keys without adding additional mutex. */
8560 if (key
->storage
!= REDIS_VM_MEMORY
||
8561 (server
.vm_max_threads
!= 0 && val
->refcount
!= 1)) {
8562 if (maxtries
) i
--; /* don't count this try */
8565 swappability
= computeObjectSwappability(val
);
8566 if (!best
|| swappability
> best_swappability
) {
8568 best_swappability
= swappability
;
8573 if (best
== NULL
) return REDIS_ERR
;
8574 key
= dictGetEntryKey(best
);
8575 val
= dictGetEntryVal(best
);
8577 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
8578 key
->ptr
, best_swappability
);
8580 /* Unshare the key if needed */
8581 if (key
->refcount
> 1) {
8582 robj
*newkey
= dupStringObject(key
);
8584 key
= dictGetEntryKey(best
) = newkey
;
8588 vmSwapObjectThreaded(key
,val
,best_db
);
8591 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
8592 dictGetEntryVal(best
) = NULL
;
8600 static int vmSwapOneObjectBlocking() {
8601 return vmSwapOneObject(0);
8604 static int vmSwapOneObjectThreaded() {
8605 return vmSwapOneObject(1);
8608 /* Return true if it's safe to swap out objects in a given moment.
8609 * Basically we don't want to swap objects out while there is a BGSAVE
8610 * or a BGAEOREWRITE running in backgroud. */
8611 static int vmCanSwapOut(void) {
8612 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
8615 /* Delete a key if swapped. Returns 1 if the key was found, was swapped
8616 * and was deleted. Otherwise 0 is returned. */
8617 static int deleteIfSwapped(redisDb
*db
, robj
*key
) {
8621 if ((de
= dictFind(db
->dict
,key
)) == NULL
) return 0;
8622 foundkey
= dictGetEntryKey(de
);
8623 if (foundkey
->storage
== REDIS_VM_MEMORY
) return 0;
8628 /* =================== Virtual Memory - Threaded I/O ======================= */
8630 static void freeIOJob(iojob
*j
) {
8631 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
8632 j
->type
== REDIS_IOJOB_DO_SWAP
||
8633 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
8634 decrRefCount(j
->val
);
8635 /* We don't decrRefCount the j->key field as we did't incremented
8636 * the count creating IO Jobs. This is because the key field here is
8637 * just used as an indentifier and if a key is removed the Job should
8638 * never be touched again. */
8642 /* Every time a thread finished a Job, it writes a byte into the write side
8643 * of an unix pipe in order to "awake" the main thread, and this function
8645 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
8649 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
8651 REDIS_NOTUSED(mask
);
8652 REDIS_NOTUSED(privdata
);
8654 /* For every byte we read in the read side of the pipe, there is one
8655 * I/O job completed to process. */
8656 while((retval
= read(fd
,buf
,1)) == 1) {
8660 struct dictEntry
*de
;
8662 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
8664 /* Get the processed element (the oldest one) */
8666 assert(listLength(server
.io_processed
) != 0);
8667 if (toprocess
== -1) {
8668 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
8669 if (toprocess
<= 0) toprocess
= 1;
8671 ln
= listFirst(server
.io_processed
);
8673 listDelNode(server
.io_processed
,ln
);
8675 /* If this job is marked as canceled, just ignore it */
8680 /* Post process it in the main thread, as there are things we
8681 * can do just here to avoid race conditions and/or invasive locks */
8682 redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
);
8683 de
= dictFind(j
->db
->dict
,j
->key
);
8685 key
= dictGetEntryKey(de
);
8686 if (j
->type
== REDIS_IOJOB_LOAD
) {
8689 /* Key loaded, bring it at home */
8690 key
->storage
= REDIS_VM_MEMORY
;
8691 key
->vm
.atime
= server
.unixtime
;
8692 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8693 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
8694 (unsigned char*) key
->ptr
);
8695 server
.vm_stats_swapped_objects
--;
8696 server
.vm_stats_swapins
++;
8697 dictGetEntryVal(de
) = j
->val
;
8698 incrRefCount(j
->val
);
8701 /* Handle clients waiting for this key to be loaded. */
8702 handleClientsBlockedOnSwappedKey(db
,key
);
8703 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
8704 /* Now we know the amount of pages required to swap this object.
8705 * Let's find some space for it, and queue this task again
8706 * rebranded as REDIS_IOJOB_DO_SWAP. */
8707 if (!vmCanSwapOut() ||
8708 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
8710 /* Ooops... no space or we can't swap as there is
8711 * a fork()ed Redis trying to save stuff on disk. */
8713 key
->storage
= REDIS_VM_MEMORY
; /* undo operation */
8715 /* Note that we need to mark this pages as used now,
8716 * if the job will be canceled, we'll mark them as freed
8718 vmMarkPagesUsed(j
->page
,j
->pages
);
8719 j
->type
= REDIS_IOJOB_DO_SWAP
;
8724 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
8727 /* Key swapped. We can finally free some memory. */
8728 if (key
->storage
!= REDIS_VM_SWAPPING
) {
8729 printf("key->storage: %d\n",key
->storage
);
8730 printf("key->name: %s\n",(char*)key
->ptr
);
8731 printf("key->refcount: %d\n",key
->refcount
);
8732 printf("val: %p\n",(void*)j
->val
);
8733 printf("val->type: %d\n",j
->val
->type
);
8734 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
8736 redisAssert(key
->storage
== REDIS_VM_SWAPPING
);
8737 val
= dictGetEntryVal(de
);
8738 key
->vm
.page
= j
->page
;
8739 key
->vm
.usedpages
= j
->pages
;
8740 key
->storage
= REDIS_VM_SWAPPED
;
8741 key
->vtype
= j
->val
->type
;
8742 decrRefCount(val
); /* Deallocate the object from memory. */
8743 dictGetEntryVal(de
) = NULL
;
8744 redisLog(REDIS_DEBUG
,
8745 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
8746 (unsigned char*) key
->ptr
,
8747 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
8748 server
.vm_stats_swapped_objects
++;
8749 server
.vm_stats_swapouts
++;
8751 /* Put a few more swap requests in queue if we are still
8753 if (trytoswap
&& vmCanSwapOut() &&
8754 zmalloc_used_memory() > server
.vm_max_memory
)
8759 more
= listLength(server
.io_newjobs
) <
8760 (unsigned) server
.vm_max_threads
;
8762 /* Don't waste CPU time if swappable objects are rare. */
8763 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
8771 if (processed
== toprocess
) return;
8773 if (retval
< 0 && errno
!= EAGAIN
) {
8774 redisLog(REDIS_WARNING
,
8775 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
8780 static void lockThreadedIO(void) {
8781 pthread_mutex_lock(&server
.io_mutex
);
8784 static void unlockThreadedIO(void) {
8785 pthread_mutex_unlock(&server
.io_mutex
);
8788 /* Remove the specified object from the threaded I/O queue if still not
8789 * processed, otherwise make sure to flag it as canceled. */
8790 static void vmCancelThreadedIOJob(robj
*o
) {
8792 server
.io_newjobs
, /* 0 */
8793 server
.io_processing
, /* 1 */
8794 server
.io_processed
/* 2 */
8798 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
8801 /* Search for a matching key in one of the queues */
8802 for (i
= 0; i
< 3; i
++) {
8806 listRewind(lists
[i
],&li
);
8807 while ((ln
= listNext(&li
)) != NULL
) {
8808 iojob
*job
= ln
->value
;
8810 if (job
->canceled
) continue; /* Skip this, already canceled. */
8811 if (job
->key
== o
) {
8812 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n",
8813 (void*)job
, (char*)o
->ptr
, job
->type
, i
);
8814 /* Mark the pages as free since the swap didn't happened
8815 * or happened but is now discarded. */
8816 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
8817 vmMarkPagesFree(job
->page
,job
->pages
);
8818 /* Cancel the job. It depends on the list the job is
8821 case 0: /* io_newjobs */
8822 /* If the job was yet not processed the best thing to do
8823 * is to remove it from the queue at all */
8825 listDelNode(lists
[i
],ln
);
8827 case 1: /* io_processing */
8828 /* Oh Shi- the thread is messing with the Job:
8830 * Probably it's accessing the object if this is a
8831 * PREPARE_SWAP or DO_SWAP job.
8832 * If it's a LOAD job it may be reading from disk and
8833 * if we don't wait for the job to terminate before to
8834 * cancel it, maybe in a few microseconds data can be
8835 * corrupted in this pages. So the short story is:
8837 * Better to wait for the job to move into the
8838 * next queue (processed)... */
8840 /* We try again and again until the job is completed. */
8842 /* But let's wait some time for the I/O thread
8843 * to finish with this job. After all this condition
8844 * should be very rare. */
8847 case 2: /* io_processed */
8848 /* The job was already processed, that's easy...
8849 * just mark it as canceled so that we'll ignore it
8850 * when processing completed jobs. */
8854 /* Finally we have to adjust the storage type of the object
8855 * in order to "UNDO" the operaiton. */
8856 if (o
->storage
== REDIS_VM_LOADING
)
8857 o
->storage
= REDIS_VM_SWAPPED
;
8858 else if (o
->storage
== REDIS_VM_SWAPPING
)
8859 o
->storage
= REDIS_VM_MEMORY
;
8866 assert(1 != 1); /* We should never reach this */
8869 static void *IOThreadEntryPoint(void *arg
) {
8874 pthread_detach(pthread_self());
8876 /* Get a new job to process */
8878 if (listLength(server
.io_newjobs
) == 0) {
8879 /* No new jobs in queue, exit. */
8880 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
8881 (long) pthread_self());
8882 server
.io_active_threads
--;
8886 ln
= listFirst(server
.io_newjobs
);
8888 listDelNode(server
.io_newjobs
,ln
);
8889 /* Add the job in the processing queue */
8890 j
->thread
= pthread_self();
8891 listAddNodeTail(server
.io_processing
,j
);
8892 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
8894 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
8895 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
8897 /* Process the Job */
8898 if (j
->type
== REDIS_IOJOB_LOAD
) {
8899 j
->val
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
);
8900 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
8901 FILE *fp
= fopen("/dev/null","w+");
8902 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
8904 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
8905 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
8909 /* Done: insert the job into the processed queue */
8910 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
8911 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
8913 listDelNode(server
.io_processing
,ln
);
8914 listAddNodeTail(server
.io_processed
,j
);
8917 /* Signal the main thread there is new stuff to process */
8918 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
8920 return NULL
; /* never reached */
8923 static void spawnIOThread(void) {
8925 sigset_t mask
, omask
;
8929 sigaddset(&mask
,SIGCHLD
);
8930 sigaddset(&mask
,SIGHUP
);
8931 sigaddset(&mask
,SIGPIPE
);
8932 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
8933 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
8934 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
8938 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
8939 server
.io_active_threads
++;
8942 /* We need to wait for the last thread to exit before we are able to
8943 * fork() in order to BGSAVE or BGREWRITEAOF. */
8944 static void waitEmptyIOJobsQueue(void) {
8946 int io_processed_len
;
8949 if (listLength(server
.io_newjobs
) == 0 &&
8950 listLength(server
.io_processing
) == 0 &&
8951 server
.io_active_threads
== 0)
8956 /* While waiting for empty jobs queue condition we post-process some
8957 * finshed job, as I/O threads may be hanging trying to write against
8958 * the io_ready_pipe_write FD but there are so much pending jobs that
8960 io_processed_len
= listLength(server
.io_processed
);
8962 if (io_processed_len
) {
8963 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
8964 usleep(1000); /* 1 millisecond */
8966 usleep(10000); /* 10 milliseconds */
8971 static void vmReopenSwapFile(void) {
8972 /* Note: we don't close the old one as we are in the child process
8973 * and don't want to mess at all with the original file object. */
8974 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
8975 if (server
.vm_fp
== NULL
) {
8976 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
8977 server
.vm_swap_file
);
8980 server
.vm_fd
= fileno(server
.vm_fp
);
8983 /* This function must be called while with threaded IO locked */
8984 static void queueIOJob(iojob
*j
) {
8985 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
8986 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
8987 listAddNodeTail(server
.io_newjobs
,j
);
8988 if (server
.io_active_threads
< server
.vm_max_threads
)
8992 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
8995 assert(key
->storage
== REDIS_VM_MEMORY
);
8996 assert(key
->refcount
== 1);
8998 j
= zmalloc(sizeof(*j
));
8999 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
9005 j
->thread
= (pthread_t
) -1;
9006 key
->storage
= REDIS_VM_SWAPPING
;
9014 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
9016 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
9017 * If there is not already a job loading the key, it is craeted.
9018 * The key is added to the io_keys list in the client structure, and also
9019 * in the hash table mapping swapped keys to waiting clients, that is,
9020 * server.io_waited_keys. */
9021 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
9022 struct dictEntry
*de
;
9026 /* If the key does not exist or is already in RAM we don't need to
9027 * block the client at all. */
9028 de
= dictFind(c
->db
->dict
,key
);
9029 if (de
== NULL
) return 0;
9030 o
= dictGetEntryKey(de
);
9031 if (o
->storage
== REDIS_VM_MEMORY
) {
9033 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
9034 /* We were swapping the key, undo it! */
9035 vmCancelThreadedIOJob(o
);
9039 /* OK: the key is either swapped, or being loaded just now. */
9041 /* Add the key to the list of keys this client is waiting for.
9042 * This maps clients to keys they are waiting for. */
9043 listAddNodeTail(c
->io_keys
,key
);
9046 /* Add the client to the swapped keys => clients waiting map. */
9047 de
= dictFind(c
->db
->io_keys
,key
);
9051 /* For every key we take a list of clients blocked for it */
9053 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
9055 assert(retval
== DICT_OK
);
9057 l
= dictGetEntryVal(de
);
9059 listAddNodeTail(l
,c
);
9061 /* Are we already loading the key from disk? If not create a job */
9062 if (o
->storage
== REDIS_VM_SWAPPED
) {
9065 o
->storage
= REDIS_VM_LOADING
;
9066 j
= zmalloc(sizeof(*j
));
9067 j
->type
= REDIS_IOJOB_LOAD
;
9070 j
->key
->vtype
= o
->vtype
;
9071 j
->page
= o
->vm
.page
;
9074 j
->thread
= (pthread_t
) -1;
9082 /* Preload keys needed for the ZUNION and ZINTER commands. */
9083 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
) {
9085 num
= atoi(c
->argv
[2]->ptr
);
9086 for (i
= 0; i
< num
; i
++) {
9087 waitForSwappedKey(c
,c
->argv
[3+i
]);
9091 /* Is this client attempting to run a command against swapped keys?
9092 * If so, block it ASAP, load the keys in background, then resume it.
9094 * The important idea about this function is that it can fail! If keys will
9095 * still be swapped when the client is resumed, this key lookups will
9096 * just block loading keys from disk. In practical terms this should only
9097 * happen with SORT BY command or if there is a bug in this function.
9099 * Return 1 if the client is marked as blocked, 0 if the client can
9100 * continue as the keys it is going to access appear to be in memory. */
9101 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
) {
9104 if (cmd
->vm_preload_proc
!= NULL
) {
9105 cmd
->vm_preload_proc(c
);
9107 if (cmd
->vm_firstkey
== 0) return 0;
9108 last
= cmd
->vm_lastkey
;
9109 if (last
< 0) last
= c
->argc
+last
;
9110 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
)
9111 waitForSwappedKey(c
,c
->argv
[j
]);
9114 /* If the client was blocked for at least one key, mark it as blocked. */
9115 if (listLength(c
->io_keys
)) {
9116 c
->flags
|= REDIS_IO_WAIT
;
9117 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
9118 server
.vm_blocked_clients
++;
9125 /* Remove the 'key' from the list of blocked keys for a given client.
9127 * The function returns 1 when there are no longer blocking keys after
9128 * the current one was removed (and the client can be unblocked). */
9129 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
9133 struct dictEntry
*de
;
9135 /* Remove the key from the list of keys this client is waiting for. */
9136 listRewind(c
->io_keys
,&li
);
9137 while ((ln
= listNext(&li
)) != NULL
) {
9138 if (compareStringObjects(ln
->value
,key
) == 0) {
9139 listDelNode(c
->io_keys
,ln
);
9145 /* Remove the client form the key => waiting clients map. */
9146 de
= dictFind(c
->db
->io_keys
,key
);
9148 l
= dictGetEntryVal(de
);
9149 ln
= listSearchKey(l
,c
);
9152 if (listLength(l
) == 0)
9153 dictDelete(c
->db
->io_keys
,key
);
9155 return listLength(c
->io_keys
) == 0;
9158 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
9159 struct dictEntry
*de
;
9164 de
= dictFind(db
->io_keys
,key
);
9167 l
= dictGetEntryVal(de
);
9168 len
= listLength(l
);
9169 /* Note: we can't use something like while(listLength(l)) as the list
9170 * can be freed by the calling function when we remove the last element. */
9173 redisClient
*c
= ln
->value
;
9175 if (dontWaitForSwappedKey(c
,key
)) {
9176 /* Put the client in the list of clients ready to go as we
9177 * loaded all the keys about it. */
9178 listAddNodeTail(server
.io_ready_clients
,c
);
9183 /* =========================== Remote Configuration ========================= */
9185 static void configSetCommand(redisClient
*c
) {
9186 robj
*o
= getDecodedObject(c
->argv
[3]);
9187 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
9188 zfree(server
.dbfilename
);
9189 server
.dbfilename
= zstrdup(o
->ptr
);
9190 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
9191 zfree(server
.requirepass
);
9192 server
.requirepass
= zstrdup(o
->ptr
);
9193 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
9194 zfree(server
.masterauth
);
9195 server
.masterauth
= zstrdup(o
->ptr
);
9196 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
9197 server
.maxmemory
= strtoll(o
->ptr
, NULL
, 10);
9199 addReplySds(c
,sdscatprintf(sdsempty(),
9200 "-ERR not supported CONFIG parameter %s\r\n",
9201 (char*)c
->argv
[2]->ptr
));
9206 addReply(c
,shared
.ok
);
9209 static void configGetCommand(redisClient
*c
) {
9210 robj
*o
= getDecodedObject(c
->argv
[2]);
9211 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
9212 char *pattern
= o
->ptr
;
9216 decrRefCount(lenobj
);
9218 if (stringmatch(pattern
,"dbfilename",0)) {
9219 addReplyBulkCString(c
,"dbfilename");
9220 addReplyBulkCString(c
,server
.dbfilename
);
9223 if (stringmatch(pattern
,"requirepass",0)) {
9224 addReplyBulkCString(c
,"requirepass");
9225 addReplyBulkCString(c
,server
.requirepass
);
9228 if (stringmatch(pattern
,"masterauth",0)) {
9229 addReplyBulkCString(c
,"masterauth");
9230 addReplyBulkCString(c
,server
.masterauth
);
9233 if (stringmatch(pattern
,"maxmemory",0)) {
9236 snprintf(buf
,128,"%llu\n",server
.maxmemory
);
9237 addReplyBulkCString(c
,"maxmemory");
9238 addReplyBulkCString(c
,buf
);
9242 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
9245 static void configCommand(redisClient
*c
) {
9246 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
9247 if (c
->argc
!= 4) goto badarity
;
9248 configSetCommand(c
);
9249 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
9250 if (c
->argc
!= 3) goto badarity
;
9251 configGetCommand(c
);
9252 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
9253 if (c
->argc
!= 2) goto badarity
;
9254 server
.stat_numcommands
= 0;
9255 server
.stat_numconnections
= 0;
9256 server
.stat_expiredkeys
= 0;
9257 server
.stat_starttime
= time(NULL
);
9258 addReply(c
,shared
.ok
);
9260 addReplySds(c
,sdscatprintf(sdsempty(),
9261 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
9266 addReplySds(c
,sdscatprintf(sdsempty(),
9267 "-ERR Wrong number of arguments for CONFIG %s\r\n",
9268 (char*) c
->argv
[1]->ptr
));
9271 /* =========================== Pubsub implementation ======================== */
9273 static void freePubsubPattern(void *p
) {
9274 pubsubPattern
*pat
= p
;
9276 decrRefCount(pat
->pattern
);
9280 static int listMatchPubsubPattern(void *a
, void *b
) {
9281 pubsubPattern
*pa
= a
, *pb
= b
;
9283 return (pa
->client
== pb
->client
) &&
9284 (compareStringObjects(pa
->pattern
,pb
->pattern
) == 0);
9287 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
9288 * 0 if the client was already subscribed to that channel. */
9289 static int pubsubSubscribeChannel(redisClient
*c
, robj
*channel
) {
9290 struct dictEntry
*de
;
9291 list
*clients
= NULL
;
9294 /* Add the channel to the client -> channels hash table */
9295 if (dictAdd(c
->pubsub_channels
,channel
,NULL
) == DICT_OK
) {
9297 incrRefCount(channel
);
9298 /* Add the client to the channel -> list of clients hash table */
9299 de
= dictFind(server
.pubsub_channels
,channel
);
9301 clients
= listCreate();
9302 dictAdd(server
.pubsub_channels
,channel
,clients
);
9303 incrRefCount(channel
);
9305 clients
= dictGetEntryVal(de
);
9307 listAddNodeTail(clients
,c
);
9309 /* Notify the client */
9310 addReply(c
,shared
.mbulk3
);
9311 addReply(c
,shared
.subscribebulk
);
9312 addReplyBulk(c
,channel
);
9313 addReplyLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
9317 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
9318 * 0 if the client was not subscribed to the specified channel. */
9319 static int pubsubUnsubscribeChannel(redisClient
*c
, robj
*channel
, int notify
) {
9320 struct dictEntry
*de
;
9325 /* Remove the channel from the client -> channels hash table */
9326 incrRefCount(channel
); /* channel may be just a pointer to the same object
9327 we have in the hash tables. Protect it... */
9328 if (dictDelete(c
->pubsub_channels
,channel
) == DICT_OK
) {
9330 /* Remove the client from the channel -> clients list hash table */
9331 de
= dictFind(server
.pubsub_channels
,channel
);
9333 clients
= dictGetEntryVal(de
);
9334 ln
= listSearchKey(clients
,c
);
9336 listDelNode(clients
,ln
);
9337 if (listLength(clients
) == 0) {
9338 /* Free the list and associated hash entry at all if this was
9339 * the latest client, so that it will be possible to abuse
9340 * Redis PUBSUB creating millions of channels. */
9341 dictDelete(server
.pubsub_channels
,channel
);
9344 /* Notify the client */
9346 addReply(c
,shared
.mbulk3
);
9347 addReply(c
,shared
.unsubscribebulk
);
9348 addReplyBulk(c
,channel
);
9349 addReplyLong(c
,dictSize(c
->pubsub_channels
)+
9350 listLength(c
->pubsub_patterns
));
9353 decrRefCount(channel
); /* it is finally safe to release it */
9357 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */
9358 static int pubsubSubscribePattern(redisClient
*c
, robj
*pattern
) {
9361 if (listSearchKey(c
->pubsub_patterns
,pattern
) == NULL
) {
9364 listAddNodeTail(c
->pubsub_patterns
,pattern
);
9365 incrRefCount(pattern
);
9366 pat
= zmalloc(sizeof(*pat
));
9367 pat
->pattern
= getDecodedObject(pattern
);
9369 listAddNodeTail(server
.pubsub_patterns
,pat
);
9371 /* Notify the client */
9372 addReply(c
,shared
.mbulk3
);
9373 addReply(c
,shared
.psubscribebulk
);
9374 addReplyBulk(c
,pattern
);
9375 addReplyLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
9379 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
9380 * 0 if the client was not subscribed to the specified channel. */
9381 static int pubsubUnsubscribePattern(redisClient
*c
, robj
*pattern
, int notify
) {
9386 incrRefCount(pattern
); /* Protect the object. May be the same we remove */
9387 if ((ln
= listSearchKey(c
->pubsub_patterns
,pattern
)) != NULL
) {
9389 listDelNode(c
->pubsub_patterns
,ln
);
9391 pat
.pattern
= pattern
;
9392 ln
= listSearchKey(server
.pubsub_patterns
,&pat
);
9393 listDelNode(server
.pubsub_patterns
,ln
);
9395 /* Notify the client */
9397 addReply(c
,shared
.mbulk3
);
9398 addReply(c
,shared
.punsubscribebulk
);
9399 addReplyBulk(c
,pattern
);
9400 addReplyLong(c
,dictSize(c
->pubsub_channels
)+
9401 listLength(c
->pubsub_patterns
));
9403 decrRefCount(pattern
);
9407 /* Unsubscribe from all the channels. Return the number of channels the
9408 * client was subscribed from. */
9409 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
) {
9410 dictIterator
*di
= dictGetIterator(c
->pubsub_channels
);
9414 while((de
= dictNext(di
)) != NULL
) {
9415 robj
*channel
= dictGetEntryKey(de
);
9417 count
+= pubsubUnsubscribeChannel(c
,channel
,notify
);
9419 dictReleaseIterator(di
);
9423 /* Unsubscribe from all the patterns. Return the number of patterns the
9424 * client was subscribed from. */
9425 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
) {
9430 listRewind(c
->pubsub_patterns
,&li
);
9431 while ((ln
= listNext(&li
)) != NULL
) {
9432 robj
*pattern
= ln
->value
;
9434 count
+= pubsubUnsubscribePattern(c
,pattern
,notify
);
9439 /* Publish a message */
9440 static int pubsubPublishMessage(robj
*channel
, robj
*message
) {
9442 struct dictEntry
*de
;
9446 /* Send to clients listening for that channel */
9447 de
= dictFind(server
.pubsub_channels
,channel
);
9449 list
*list
= dictGetEntryVal(de
);
9453 listRewind(list
,&li
);
9454 while ((ln
= listNext(&li
)) != NULL
) {
9455 redisClient
*c
= ln
->value
;
9457 addReply(c
,shared
.mbulk3
);
9458 addReply(c
,shared
.messagebulk
);
9459 addReplyBulk(c
,channel
);
9460 addReplyBulk(c
,message
);
9464 /* Send to clients listening to matching channels */
9465 if (listLength(server
.pubsub_patterns
)) {
9466 listRewind(server
.pubsub_patterns
,&li
);
9467 channel
= getDecodedObject(channel
);
9468 while ((ln
= listNext(&li
)) != NULL
) {
9469 pubsubPattern
*pat
= ln
->value
;
9471 if (stringmatchlen((char*)pat
->pattern
->ptr
,
9472 sdslen(pat
->pattern
->ptr
),
9473 (char*)channel
->ptr
,
9474 sdslen(channel
->ptr
),0)) {
9475 addReply(pat
->client
,shared
.mbulk3
);
9476 addReply(pat
->client
,shared
.messagebulk
);
9477 addReplyBulk(pat
->client
,channel
);
9478 addReplyBulk(pat
->client
,message
);
9482 decrRefCount(channel
);
9487 static void subscribeCommand(redisClient
*c
) {
9490 for (j
= 1; j
< c
->argc
; j
++)
9491 pubsubSubscribeChannel(c
,c
->argv
[j
]);
9494 static void unsubscribeCommand(redisClient
*c
) {
9496 pubsubUnsubscribeAllChannels(c
,1);
9501 for (j
= 1; j
< c
->argc
; j
++)
9502 pubsubUnsubscribeChannel(c
,c
->argv
[j
],1);
9506 static void psubscribeCommand(redisClient
*c
) {
9509 for (j
= 1; j
< c
->argc
; j
++)
9510 pubsubSubscribePattern(c
,c
->argv
[j
]);
9513 static void punsubscribeCommand(redisClient
*c
) {
9515 pubsubUnsubscribeAllPatterns(c
,1);
9520 for (j
= 1; j
< c
->argc
; j
++)
9521 pubsubUnsubscribePattern(c
,c
->argv
[j
],1);
9525 static void publishCommand(redisClient
*c
) {
9526 int receivers
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]);
9527 addReplyLong(c
,receivers
);
9530 /* ================================= Debugging ============================== */
9532 static void debugCommand(redisClient
*c
) {
9533 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
9535 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
9536 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
9537 addReply(c
,shared
.err
);
9541 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
9542 addReply(c
,shared
.err
);
9545 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
9546 addReply(c
,shared
.ok
);
9547 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
9549 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
9550 addReply(c
,shared
.err
);
9553 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
9554 addReply(c
,shared
.ok
);
9555 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
9556 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9560 addReply(c
,shared
.nokeyerr
);
9563 key
= dictGetEntryKey(de
);
9564 val
= dictGetEntryVal(de
);
9565 if (!server
.vm_enabled
|| (key
->storage
== REDIS_VM_MEMORY
||
9566 key
->storage
== REDIS_VM_SWAPPING
)) {
9570 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
9571 strenc
= strencoding
[val
->encoding
];
9573 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
9576 addReplySds(c
,sdscatprintf(sdsempty(),
9577 "+Key at:%p refcount:%d, value at:%p refcount:%d "
9578 "encoding:%s serializedlength:%lld\r\n",
9579 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
,
9580 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
9582 addReplySds(c
,sdscatprintf(sdsempty(),
9583 "+Key at:%p refcount:%d, value swapped at: page %llu "
9584 "using %llu pages\r\n",
9585 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
,
9586 (unsigned long long) key
->vm
.usedpages
));
9588 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapin") && c
->argc
== 3) {
9589 lookupKeyRead(c
->db
,c
->argv
[2]);
9590 addReply(c
,shared
.ok
);
9591 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
9592 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9595 if (!server
.vm_enabled
) {
9596 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
9600 addReply(c
,shared
.nokeyerr
);
9603 key
= dictGetEntryKey(de
);
9604 val
= dictGetEntryVal(de
);
9605 /* If the key is shared we want to create a copy */
9606 if (key
->refcount
> 1) {
9607 robj
*newkey
= dupStringObject(key
);
9609 key
= dictGetEntryKey(de
) = newkey
;
9612 if (key
->storage
!= REDIS_VM_MEMORY
) {
9613 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
9614 } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
9615 dictGetEntryVal(de
) = NULL
;
9616 addReply(c
,shared
.ok
);
9618 addReply(c
,shared
.err
);
9621 addReplySds(c
,sdsnew(
9622 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n"));
9626 static void _redisAssert(char *estr
, char *file
, int line
) {
9627 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
9628 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true\n",file
,line
,estr
);
9629 #ifdef HAVE_BACKTRACE
9630 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
9635 /* =================================== Main! ================================ */
9638 int linuxOvercommitMemoryValue(void) {
9639 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
9643 if (fgets(buf
,64,fp
) == NULL
) {
9652 void linuxOvercommitMemoryWarning(void) {
9653 if (linuxOvercommitMemoryValue() == 0) {
9654 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low condition memory. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
9657 #endif /* __linux__ */
9659 static void daemonize(void) {
9663 if (fork() != 0) exit(0); /* parent exits */
9664 setsid(); /* create a new session */
9666 /* Every output goes to /dev/null. If Redis is daemonized but
9667 * the 'logfile' is set to 'stdout' in the configuration file
9668 * it will not log at all. */
9669 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
9670 dup2(fd
, STDIN_FILENO
);
9671 dup2(fd
, STDOUT_FILENO
);
9672 dup2(fd
, STDERR_FILENO
);
9673 if (fd
> STDERR_FILENO
) close(fd
);
9675 /* Try to write the pid file */
9676 fp
= fopen(server
.pidfile
,"w");
9678 fprintf(fp
,"%d\n",getpid());
9683 static void version() {
9684 printf("Redis server version %s\n", REDIS_VERSION
);
9688 static void usage() {
9689 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
9690 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
9694 int main(int argc
, char **argv
) {
9699 if (strcmp(argv
[1], "-v") == 0 ||
9700 strcmp(argv
[1], "--version") == 0) version();
9701 if (strcmp(argv
[1], "--help") == 0) usage();
9702 resetServerSaveParams();
9703 loadServerConfig(argv
[1]);
9704 } else if ((argc
> 2)) {
9707 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
9709 if (server
.daemonize
) daemonize();
9711 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
9713 linuxOvercommitMemoryWarning();
9716 if (server
.appendonly
) {
9717 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
9718 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
9720 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
9721 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
9723 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
9724 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
9726 aeDeleteEventLoop(server
.el
);
9730 /* ============================= Backtrace support ========================= */
9732 #ifdef HAVE_BACKTRACE
9733 static char *findFuncName(void *pointer
, unsigned long *offset
);
9735 static void *getMcontextEip(ucontext_t
*uc
) {
9736 #if defined(__FreeBSD__)
9737 return (void*) uc
->uc_mcontext
.mc_eip
;
9738 #elif defined(__dietlibc__)
9739 return (void*) uc
->uc_mcontext
.eip
;
9740 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
9742 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
9744 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
9746 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
9747 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
9748 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
9750 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
9752 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
9753 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
9754 #elif defined(__ia64__) /* Linux IA64 */
9755 return (void*) uc
->uc_mcontext
.sc_ip
;
9761 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
9763 char **messages
= NULL
;
9764 int i
, trace_size
= 0;
9765 unsigned long offset
=0;
9766 ucontext_t
*uc
= (ucontext_t
*) secret
;
9768 REDIS_NOTUSED(info
);
9770 redisLog(REDIS_WARNING
,
9771 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
9772 infostring
= genRedisInfoString();
9773 redisLog(REDIS_WARNING
, "%s",infostring
);
9774 /* It's not safe to sdsfree() the returned string under memory
9775 * corruption conditions. Let it leak as we are going to abort */
9777 trace_size
= backtrace(trace
, 100);
9778 /* overwrite sigaction with caller's address */
9779 if (getMcontextEip(uc
) != NULL
) {
9780 trace
[1] = getMcontextEip(uc
);
9782 messages
= backtrace_symbols(trace
, trace_size
);
9784 for (i
=1; i
<trace_size
; ++i
) {
9785 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
9787 p
= strchr(messages
[i
],'+');
9788 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
9789 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
9791 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
9794 /* free(messages); Don't call free() with possibly corrupted memory. */
9798 static void setupSigSegvAction(void) {
9799 struct sigaction act
;
9801 sigemptyset (&act
.sa_mask
);
9802 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
9803 * is used. Otherwise, sa_handler is used */
9804 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
9805 act
.sa_sigaction
= segvHandler
;
9806 sigaction (SIGSEGV
, &act
, NULL
);
9807 sigaction (SIGBUS
, &act
, NULL
);
9808 sigaction (SIGFPE
, &act
, NULL
);
9809 sigaction (SIGILL
, &act
, NULL
);
9810 sigaction (SIGBUS
, &act
, NULL
);
9814 #include "staticsymbols.h"
9815 /* This function try to convert a pointer into a function name. It's used in
9816 * oreder to provide a backtrace under segmentation fault that's able to
9817 * display functions declared as static (otherwise the backtrace is useless). */
9818 static char *findFuncName(void *pointer
, unsigned long *offset
){
9820 unsigned long off
, minoff
= 0;
9822 /* Try to match against the Symbol with the smallest offset */
9823 for (i
=0; symsTable
[i
].pointer
; i
++) {
9824 unsigned long lp
= (unsigned long) pointer
;
9826 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
9827 off
=lp
-symsTable
[i
].pointer
;
9828 if (ret
< 0 || off
< minoff
) {
9834 if (ret
== -1) return NULL
;
9836 return symsTable
[ret
].name
;
9838 #else /* HAVE_BACKTRACE */
9839 static void setupSigSegvAction(void) {
9841 #endif /* HAVE_BACKTRACE */