2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "1.3.8"
40 #define __USE_POSIX199309
47 #endif /* HAVE_BACKTRACE */
55 #include <arpa/inet.h>
59 #include <sys/resource.h>
66 #include "solarisfixes.h"
70 #include "ae.h" /* Event driven programming library */
71 #include "sds.h" /* Dynamic safe strings */
72 #include "anet.h" /* Networking the easy way */
73 #include "dict.h" /* Hash tables */
74 #include "adlist.h" /* Linked lists */
75 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
76 #include "lzf.h" /* LZF compression library */
77 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
84 /* Static server configuration */
85 #define REDIS_SERVERPORT 6379 /* TCP port */
86 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
87 #define REDIS_IOBUF_LEN 1024
88 #define REDIS_LOADBUF_LEN 1024
89 #define REDIS_STATIC_ARGS 8
90 #define REDIS_DEFAULT_DBNUM 16
91 #define REDIS_CONFIGLINE_MAX 1024
92 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
93 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
94 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* try to expire 10 keys/loop */
95 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
96 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
98 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
99 #define REDIS_WRITEV_THRESHOLD 3
100 /* Max number of iovecs used for each writev call */
101 #define REDIS_WRITEV_IOVEC_COUNT 256
103 /* Hash table parameters */
104 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
107 #define REDIS_CMD_BULK 1 /* Bulk write command */
108 #define REDIS_CMD_INLINE 2 /* Inline command */
109 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
110 this flags will return an error when the 'maxmemory' option is set in the
111 config file and the server is using more than maxmemory bytes of memory.
112 In short this commands are denied on low memory conditions. */
113 #define REDIS_CMD_DENYOOM 4
114 #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */
117 #define REDIS_STRING 0
123 /* Objects encoding. Some kind of objects like Strings and Hashes can be
124 * internally represented in multiple ways. The 'encoding' field of the object
125 * is set to one of this fields for this object. */
126 #define REDIS_ENCODING_RAW 0 /* Raw representation */
127 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
128 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
129 #define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
131 static char* strencoding
[] = {
132 "raw", "int", "zipmap", "hashtable"
135 /* Object types only used for dumping to disk */
136 #define REDIS_EXPIRETIME 253
137 #define REDIS_SELECTDB 254
138 #define REDIS_EOF 255
140 /* Defines related to the dump file format. To store 32 bits lengths for short
141 * keys requires a lot of space, so we check the most significant 2 bits of
142 * the first byte to interpreter the length:
144 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
145 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
146 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
147 * 11|000000 this means: specially encoded object will follow. The six bits
148 * number specify the kind of object that follows.
149 * See the REDIS_RDB_ENC_* defines.
151 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
152 * values, will fit inside. */
153 #define REDIS_RDB_6BITLEN 0
154 #define REDIS_RDB_14BITLEN 1
155 #define REDIS_RDB_32BITLEN 2
156 #define REDIS_RDB_ENCVAL 3
157 #define REDIS_RDB_LENERR UINT_MAX
159 /* When a length of a string object stored on disk has the first two bits
160 * set, the remaining two bits specify a special encoding for the object
161 * accordingly to the following defines: */
162 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
163 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
164 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
165 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
167 /* Virtual memory object->where field. */
168 #define REDIS_VM_MEMORY 0 /* The object is on memory */
169 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
170 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
171 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
173 /* Virtual memory static configuration stuff.
174 * Check vmFindContiguousPages() to know more about this magic numbers. */
175 #define REDIS_VM_MAX_NEAR_PAGES 65536
176 #define REDIS_VM_MAX_RANDOM_JUMP 4096
177 #define REDIS_VM_MAX_THREADS 32
178 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
179 /* The following is the *percentage* of completed I/O jobs to process when the
180 * handelr is called. While Virtual Memory I/O operations are performed by
181 * threads, this operations must be processed by the main thread when completed
182 * in order to take effect. */
183 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
186 #define REDIS_SLAVE 1 /* This client is a slave server */
187 #define REDIS_MASTER 2 /* This client is a master server */
188 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
189 #define REDIS_MULTI 8 /* This client is in a MULTI context */
190 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
191 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
193 /* Slave replication state - slave side */
194 #define REDIS_REPL_NONE 0 /* No active replication */
195 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
196 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
198 /* Slave replication state - from the point of view of master
199 * Note that in SEND_BULK and ONLINE state the slave receives new updates
200 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
201 * to start the next background saving in order to send updates to it. */
202 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
203 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
204 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
205 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
207 /* List related stuff */
211 /* Sort operations */
212 #define REDIS_SORT_GET 0
213 #define REDIS_SORT_ASC 1
214 #define REDIS_SORT_DESC 2
215 #define REDIS_SORTKEY_MAX 1024
218 #define REDIS_DEBUG 0
219 #define REDIS_VERBOSE 1
220 #define REDIS_NOTICE 2
221 #define REDIS_WARNING 3
223 /* Anti-warning macro... */
224 #define REDIS_NOTUSED(V) ((void) V)
226 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
227 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
229 /* Append only defines */
230 #define APPENDFSYNC_NO 0
231 #define APPENDFSYNC_ALWAYS 1
232 #define APPENDFSYNC_EVERYSEC 2
234 /* Hashes related defaults */
235 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
236 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
238 /* We can print the stacktrace, so our assert is defined this way: */
239 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
240 static void _redisAssert(char *estr
, char *file
, int line
);
242 /*================================= Data types ============================== */
244 /* A redis object, that is a type able to hold a string / list / set */
246 /* The VM object structure */
247 struct redisObjectVM
{
248 off_t page
; /* the page at witch the object is stored on disk */
249 off_t usedpages
; /* number of pages used on disk */
250 time_t atime
; /* Last access time */
253 /* The actual Redis Object */
254 typedef struct redisObject
{
257 unsigned char encoding
;
258 unsigned char storage
; /* If this object is a key, where is the value?
259 * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
260 unsigned char vtype
; /* If this object is a key, and value is swapped out,
261 * this is the type of the swapped out object. */
263 /* VM fields, this are only allocated if VM is active, otherwise the
264 * object allocation function will just allocate
265 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
266 * Redis without VM active will not have any overhead. */
267 struct redisObjectVM vm
;
270 /* Macro used to initalize a Redis object allocated on the stack.
271 * Note that this macro is taken near the structure definition to make sure
272 * we'll update it when the structure is changed, to avoid bugs like
273 * bug #85 introduced exactly in this way. */
274 #define initStaticStringObject(_var,_ptr) do { \
276 _var.type = REDIS_STRING; \
277 _var.encoding = REDIS_ENCODING_RAW; \
279 if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \
282 typedef struct redisDb
{
283 dict
*dict
; /* The keyspace for this DB */
284 dict
*expires
; /* Timeout of keys with a timeout set */
285 dict
*blockingkeys
; /* Keys with clients waiting for data (BLPOP) */
286 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
290 /* Client MULTI/EXEC state */
291 typedef struct multiCmd
{
294 struct redisCommand
*cmd
;
297 typedef struct multiState
{
298 multiCmd
*commands
; /* Array of MULTI commands */
299 int count
; /* Total number of MULTI commands */
302 /* With multiplexing we need to take per-clinet state.
303 * Clients are taken in a liked list. */
304 typedef struct redisClient
{
309 robj
**argv
, **mbargv
;
311 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
312 int multibulk
; /* multi bulk command format active */
315 time_t lastinteraction
; /* time of the last interaction, used for timeout */
316 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
317 int slaveseldb
; /* slave selected db, if this client is a slave */
318 int authenticated
; /* when requirepass is non-NULL */
319 int replstate
; /* replication state if this is a slave */
320 int repldbfd
; /* replication DB file descriptor */
321 long repldboff
; /* replication DB file offset */
322 off_t repldbsize
; /* replication DB file size */
323 multiState mstate
; /* MULTI/EXEC state */
324 robj
**blockingkeys
; /* The key we are waiting to terminate a blocking
325 * operation such as BLPOP. Otherwise NULL. */
326 int blockingkeysnum
; /* Number of blocking keys */
327 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
328 * is >= blockingto then the operation timed out. */
329 list
*io_keys
; /* Keys this client is waiting to be loaded from the
330 * swap file in order to continue. */
331 dict
*pubsub_channels
; /* channels a client is interested in (SUBSCRIBE) */
332 list
*pubsub_patterns
; /* patterns a client is interested in (SUBSCRIBE) */
340 /* Global server state structure */
345 long long dirty
; /* changes to DB from the last save */
347 list
*slaves
, *monitors
;
348 char neterr
[ANET_ERR_LEN
];
350 int cronloops
; /* number of times the cron function run */
351 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
352 time_t lastsave
; /* Unix time of last save succeeede */
353 /* Fields used only for stats */
354 time_t stat_starttime
; /* server start time */
355 long long stat_numcommands
; /* number of processed commands */
356 long long stat_numconnections
; /* number of connections received */
357 long long stat_expiredkeys
; /* number of expired keys */
370 pid_t bgsavechildpid
;
371 pid_t bgrewritechildpid
;
372 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
373 struct saveparam
*saveparams
;
378 char *appendfilename
;
382 /* Replication related */
387 redisClient
*master
; /* client that is master for this slave */
389 unsigned int maxclients
;
390 unsigned long long maxmemory
;
391 unsigned int blpop_blocked_clients
;
392 unsigned int vm_blocked_clients
;
393 /* Sort parameters - qsort_r() is only available under BSD so we
394 * have to take this state global, in order to pass it to sortCompare() */
398 /* Virtual memory configuration */
403 unsigned long long vm_max_memory
;
405 size_t hash_max_zipmap_entries
;
406 size_t hash_max_zipmap_value
;
407 /* Virtual memory state */
410 off_t vm_next_page
; /* Next probably empty page */
411 off_t vm_near_pages
; /* Number of pages allocated sequentially */
412 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
413 time_t unixtime
; /* Unix time sampled every second. */
414 /* Virtual memory I/O threads stuff */
415 /* An I/O thread process an element taken from the io_jobs queue and
416 * put the result of the operation in the io_done list. While the
417 * job is being processed, it's put on io_processing queue. */
418 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
419 list
*io_processing
; /* List of VM I/O jobs being processed */
420 list
*io_processed
; /* List of VM I/O jobs already processed */
421 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
422 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
423 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
424 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
425 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
426 int io_active_threads
; /* Number of running I/O threads */
427 int vm_max_threads
; /* Max number of I/O threads running at the same time */
428 /* Our main thread is blocked on the event loop, locking for sockets ready
429 * to be read or written, so when a threaded I/O operation is ready to be
430 * processed by the main thread, the I/O thread will use a unix pipe to
431 * awake the main thread. The followings are the two pipe FDs. */
432 int io_ready_pipe_read
;
433 int io_ready_pipe_write
;
434 /* Virtual memory stats */
435 unsigned long long vm_stats_used_pages
;
436 unsigned long long vm_stats_swapped_objects
;
437 unsigned long long vm_stats_swapouts
;
438 unsigned long long vm_stats_swapins
;
440 dict
*pubsub_channels
; /* Map channels to list of subscribed clients */
441 list
*pubsub_patterns
; /* A list of pubsub_patterns */
446 typedef struct pubsubPattern
{
451 typedef void redisCommandProc(redisClient
*c
);
452 struct redisCommand
{
454 redisCommandProc
*proc
;
457 /* Use a function to determine which keys need to be loaded
458 * in the background prior to executing this command. Takes precedence
459 * over vm_firstkey and others, ignored when NULL */
460 redisCommandProc
*vm_preload_proc
;
461 /* What keys should be loaded in background when calling this command? */
462 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
463 int vm_lastkey
; /* THe last argument that's a key */
464 int vm_keystep
; /* The step between first and last key */
467 struct redisFunctionSym
{
469 unsigned long pointer
;
472 typedef struct _redisSortObject
{
480 typedef struct _redisSortOperation
{
483 } redisSortOperation
;
485 /* ZSETs use a specialized version of Skiplists */
487 typedef struct zskiplistNode
{
488 struct zskiplistNode
**forward
;
489 struct zskiplistNode
*backward
;
495 typedef struct zskiplist
{
496 struct zskiplistNode
*header
, *tail
;
497 unsigned long length
;
501 typedef struct zset
{
506 /* Our shared "common" objects */
508 #define REDIS_SHARED_INTEGERS 10000
509 struct sharedObjectsStruct
{
510 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
511 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
512 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
513 *outofrangeerr
, *plus
,
514 *select0
, *select1
, *select2
, *select3
, *select4
,
515 *select5
, *select6
, *select7
, *select8
, *select9
,
516 *messagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
,
517 *psubscribebulk
, *punsubscribebulk
, *integers
[REDIS_SHARED_INTEGERS
];
520 /* Global vars that are actally used as constants. The following double
521 * values are used for double on-disk serialization, and are initialized
522 * at runtime to avoid strange compiler optimizations. */
524 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
526 /* VM threaded I/O request message */
527 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
528 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
529 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
530 typedef struct iojob
{
531 int type
; /* Request type, REDIS_IOJOB_* */
532 redisDb
*db
;/* Redis database */
533 robj
*key
; /* This I/O request is about swapping this key */
534 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
535 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
536 off_t page
; /* Swap page where to read/write the object */
537 off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */
538 int canceled
; /* True if this command was canceled by blocking side of VM */
539 pthread_t thread
; /* ID of the thread processing this entry */
542 /*================================ Prototypes =============================== */
544 static void freeStringObject(robj
*o
);
545 static void freeListObject(robj
*o
);
546 static void freeSetObject(robj
*o
);
547 static void decrRefCount(void *o
);
548 static robj
*createObject(int type
, void *ptr
);
549 static void freeClient(redisClient
*c
);
550 static int rdbLoad(char *filename
);
551 static void addReply(redisClient
*c
, robj
*obj
);
552 static void addReplySds(redisClient
*c
, sds s
);
553 static void incrRefCount(robj
*o
);
554 static int rdbSaveBackground(char *filename
);
555 static robj
*createStringObject(char *ptr
, size_t len
);
556 static robj
*dupStringObject(robj
*o
);
557 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
);
558 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
559 static int syncWithMaster(void);
560 static robj
*tryObjectEncoding(robj
*o
);
561 static robj
*getDecodedObject(robj
*o
);
562 static int removeExpire(redisDb
*db
, robj
*key
);
563 static int expireIfNeeded(redisDb
*db
, robj
*key
);
564 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
565 static int deleteIfSwapped(redisDb
*db
, robj
*key
);
566 static int deleteKey(redisDb
*db
, robj
*key
);
567 static time_t getExpire(redisDb
*db
, robj
*key
);
568 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
569 static void updateSlavesWaitingBgsave(int bgsaveerr
);
570 static void freeMemoryIfNeeded(void);
571 static int processCommand(redisClient
*c
);
572 static void setupSigSegvAction(void);
573 static void rdbRemoveTempFile(pid_t childpid
);
574 static void aofRemoveTempFile(pid_t childpid
);
575 static size_t stringObjectLen(robj
*o
);
576 static void processInputBuffer(redisClient
*c
);
577 static zskiplist
*zslCreate(void);
578 static void zslFree(zskiplist
*zsl
);
579 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
580 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
581 static void initClientMultiState(redisClient
*c
);
582 static void freeClientMultiState(redisClient
*c
);
583 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
584 static void unblockClientWaitingData(redisClient
*c
);
585 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
586 static void vmInit(void);
587 static void vmMarkPagesFree(off_t page
, off_t count
);
588 static robj
*vmLoadObject(robj
*key
);
589 static robj
*vmPreviewObject(robj
*key
);
590 static int vmSwapOneObjectBlocking(void);
591 static int vmSwapOneObjectThreaded(void);
592 static int vmCanSwapOut(void);
593 static int tryFreeOneObjectFromFreelist(void);
594 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
595 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
596 static void vmCancelThreadedIOJob(robj
*o
);
597 static void lockThreadedIO(void);
598 static void unlockThreadedIO(void);
599 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
600 static void freeIOJob(iojob
*j
);
601 static void queueIOJob(iojob
*j
);
602 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
603 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
604 static void waitEmptyIOJobsQueue(void);
605 static void vmReopenSwapFile(void);
606 static int vmFreePage(off_t page
);
607 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
);
608 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
);
609 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
610 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
611 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
612 static struct redisCommand
*lookupCommand(char *name
);
613 static void call(redisClient
*c
, struct redisCommand
*cmd
);
614 static void resetClient(redisClient
*c
);
615 static void convertToRealHash(robj
*o
);
616 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
);
617 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
);
618 static void freePubsubPattern(void *p
);
619 static int listMatchPubsubPattern(void *a
, void *b
);
620 static int compareStringObjects(robj
*a
, robj
*b
);
623 static void authCommand(redisClient
*c
);
624 static void pingCommand(redisClient
*c
);
625 static void echoCommand(redisClient
*c
);
626 static void setCommand(redisClient
*c
);
627 static void setnxCommand(redisClient
*c
);
628 static void getCommand(redisClient
*c
);
629 static void delCommand(redisClient
*c
);
630 static void existsCommand(redisClient
*c
);
631 static void incrCommand(redisClient
*c
);
632 static void decrCommand(redisClient
*c
);
633 static void incrbyCommand(redisClient
*c
);
634 static void decrbyCommand(redisClient
*c
);
635 static void selectCommand(redisClient
*c
);
636 static void randomkeyCommand(redisClient
*c
);
637 static void keysCommand(redisClient
*c
);
638 static void dbsizeCommand(redisClient
*c
);
639 static void lastsaveCommand(redisClient
*c
);
640 static void saveCommand(redisClient
*c
);
641 static void bgsaveCommand(redisClient
*c
);
642 static void bgrewriteaofCommand(redisClient
*c
);
643 static void shutdownCommand(redisClient
*c
);
644 static void moveCommand(redisClient
*c
);
645 static void renameCommand(redisClient
*c
);
646 static void renamenxCommand(redisClient
*c
);
647 static void lpushCommand(redisClient
*c
);
648 static void rpushCommand(redisClient
*c
);
649 static void lpopCommand(redisClient
*c
);
650 static void rpopCommand(redisClient
*c
);
651 static void llenCommand(redisClient
*c
);
652 static void lindexCommand(redisClient
*c
);
653 static void lrangeCommand(redisClient
*c
);
654 static void ltrimCommand(redisClient
*c
);
655 static void typeCommand(redisClient
*c
);
656 static void lsetCommand(redisClient
*c
);
657 static void saddCommand(redisClient
*c
);
658 static void sremCommand(redisClient
*c
);
659 static void smoveCommand(redisClient
*c
);
660 static void sismemberCommand(redisClient
*c
);
661 static void scardCommand(redisClient
*c
);
662 static void spopCommand(redisClient
*c
);
663 static void srandmemberCommand(redisClient
*c
);
664 static void sinterCommand(redisClient
*c
);
665 static void sinterstoreCommand(redisClient
*c
);
666 static void sunionCommand(redisClient
*c
);
667 static void sunionstoreCommand(redisClient
*c
);
668 static void sdiffCommand(redisClient
*c
);
669 static void sdiffstoreCommand(redisClient
*c
);
670 static void syncCommand(redisClient
*c
);
671 static void flushdbCommand(redisClient
*c
);
672 static void flushallCommand(redisClient
*c
);
673 static void sortCommand(redisClient
*c
);
674 static void lremCommand(redisClient
*c
);
675 static void rpoplpushcommand(redisClient
*c
);
676 static void infoCommand(redisClient
*c
);
677 static void mgetCommand(redisClient
*c
);
678 static void monitorCommand(redisClient
*c
);
679 static void expireCommand(redisClient
*c
);
680 static void expireatCommand(redisClient
*c
);
681 static void getsetCommand(redisClient
*c
);
682 static void ttlCommand(redisClient
*c
);
683 static void slaveofCommand(redisClient
*c
);
684 static void debugCommand(redisClient
*c
);
685 static void msetCommand(redisClient
*c
);
686 static void msetnxCommand(redisClient
*c
);
687 static void zaddCommand(redisClient
*c
);
688 static void zincrbyCommand(redisClient
*c
);
689 static void zrangeCommand(redisClient
*c
);
690 static void zrangebyscoreCommand(redisClient
*c
);
691 static void zcountCommand(redisClient
*c
);
692 static void zrevrangeCommand(redisClient
*c
);
693 static void zcardCommand(redisClient
*c
);
694 static void zremCommand(redisClient
*c
);
695 static void zscoreCommand(redisClient
*c
);
696 static void zremrangebyscoreCommand(redisClient
*c
);
697 static void multiCommand(redisClient
*c
);
698 static void execCommand(redisClient
*c
);
699 static void discardCommand(redisClient
*c
);
700 static void blpopCommand(redisClient
*c
);
701 static void brpopCommand(redisClient
*c
);
702 static void appendCommand(redisClient
*c
);
703 static void substrCommand(redisClient
*c
);
704 static void zrankCommand(redisClient
*c
);
705 static void zrevrankCommand(redisClient
*c
);
706 static void hsetCommand(redisClient
*c
);
707 static void hgetCommand(redisClient
*c
);
708 static void hdelCommand(redisClient
*c
);
709 static void hlenCommand(redisClient
*c
);
710 static void zremrangebyrankCommand(redisClient
*c
);
711 static void zunionCommand(redisClient
*c
);
712 static void zinterCommand(redisClient
*c
);
713 static void hkeysCommand(redisClient
*c
);
714 static void hvalsCommand(redisClient
*c
);
715 static void hgetallCommand(redisClient
*c
);
716 static void hexistsCommand(redisClient
*c
);
717 static void configCommand(redisClient
*c
);
718 static void hincrbyCommand(redisClient
*c
);
719 static void subscribeCommand(redisClient
*c
);
720 static void unsubscribeCommand(redisClient
*c
);
721 static void psubscribeCommand(redisClient
*c
);
722 static void punsubscribeCommand(redisClient
*c
);
723 static void publishCommand(redisClient
*c
);
725 /*================================= Globals ================================= */
728 static struct redisServer server
; /* server global state */
729 static struct redisCommand cmdTable
[] = {
730 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
731 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
732 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
733 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
734 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
735 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
736 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
737 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
738 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
739 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
740 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
741 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
742 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
743 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
744 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
745 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
746 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
747 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
748 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
749 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
750 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
751 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
752 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
753 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
754 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
755 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
756 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
757 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
758 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
759 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
760 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
761 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
762 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
763 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
764 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
765 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
766 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
767 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
768 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
769 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
770 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
771 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
772 {"zunion",zunionCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
773 {"zinter",zinterCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
774 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
775 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
776 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
777 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
778 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
779 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
780 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
781 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
782 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
783 {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
784 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
785 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
786 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
787 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
788 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
789 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
790 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
791 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
792 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
793 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
794 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
795 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
796 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
797 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
798 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
799 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
800 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
801 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
802 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
803 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
804 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
805 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
806 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
807 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
808 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
809 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
810 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
811 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
812 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
813 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
814 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
815 {"exec",execCommand
,1,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
816 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
817 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
818 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
819 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
820 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
821 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
822 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
823 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
824 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
825 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
826 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
827 {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
828 {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
829 {"psubscribe",psubscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
830 {"punsubscribe",punsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
831 {"publish",publishCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_FORCE_REPLICATION
,NULL
,0,0,0},
832 {NULL
,NULL
,0,0,NULL
,0,0,0}
835 /*============================ Utility functions ============================ */
837 /* Glob-style pattern matching. */
838 static int stringmatchlen(const char *pattern
, int patternLen
,
839 const char *string
, int stringLen
, int nocase
)
844 while (pattern
[1] == '*') {
849 return 1; /* match */
851 if (stringmatchlen(pattern
+1, patternLen
-1,
852 string
, stringLen
, nocase
))
853 return 1; /* match */
857 return 0; /* no match */
861 return 0; /* no match */
871 not = pattern
[0] == '^';
878 if (pattern
[0] == '\\') {
881 if (pattern
[0] == string
[0])
883 } else if (pattern
[0] == ']') {
885 } else if (patternLen
== 0) {
889 } else if (pattern
[1] == '-' && patternLen
>= 3) {
890 int start
= pattern
[0];
891 int end
= pattern
[2];
899 start
= tolower(start
);
905 if (c
>= start
&& c
<= end
)
909 if (pattern
[0] == string
[0])
912 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
922 return 0; /* no match */
928 if (patternLen
>= 2) {
935 if (pattern
[0] != string
[0])
936 return 0; /* no match */
938 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
939 return 0; /* no match */
947 if (stringLen
== 0) {
948 while(*pattern
== '*') {
955 if (patternLen
== 0 && stringLen
== 0)
960 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
961 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
964 static void redisLog(int level
, const char *fmt
, ...) {
968 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
972 if (level
>= server
.verbosity
) {
978 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
979 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
980 vfprintf(fp
, fmt
, ap
);
986 if (server
.logfile
) fclose(fp
);
989 /*====================== Hash table type implementation ==================== */
991 /* This is an hash table type that uses the SDS dynamic strings libary as
992 * keys and radis objects as values (objects can hold SDS strings,
995 static void dictVanillaFree(void *privdata
, void *val
)
997 DICT_NOTUSED(privdata
);
1001 static void dictListDestructor(void *privdata
, void *val
)
1003 DICT_NOTUSED(privdata
);
1004 listRelease((list
*)val
);
1007 static int sdsDictKeyCompare(void *privdata
, const void *key1
,
1011 DICT_NOTUSED(privdata
);
1013 l1
= sdslen((sds
)key1
);
1014 l2
= sdslen((sds
)key2
);
1015 if (l1
!= l2
) return 0;
1016 return memcmp(key1
, key2
, l1
) == 0;
1019 static void dictRedisObjectDestructor(void *privdata
, void *val
)
1021 DICT_NOTUSED(privdata
);
1023 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
1027 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1030 const robj
*o1
= key1
, *o2
= key2
;
1031 return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1034 static unsigned int dictObjHash(const void *key
) {
1035 const robj
*o
= key
;
1036 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1039 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1042 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1045 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1046 o2
->encoding
== REDIS_ENCODING_INT
&&
1047 o1
->ptr
== o2
->ptr
) return 1;
1049 o1
= getDecodedObject(o1
);
1050 o2
= getDecodedObject(o2
);
1051 cmp
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1057 static unsigned int dictEncObjHash(const void *key
) {
1058 robj
*o
= (robj
*) key
;
1060 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1061 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1063 if (o
->encoding
== REDIS_ENCODING_INT
) {
1067 len
= snprintf(buf
,32,"%ld",(long)o
->ptr
);
1068 return dictGenHashFunction((unsigned char*)buf
, len
);
1072 o
= getDecodedObject(o
);
1073 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1080 /* Sets type and expires */
1081 static dictType setDictType
= {
1082 dictEncObjHash
, /* hash function */
1085 dictEncObjKeyCompare
, /* key compare */
1086 dictRedisObjectDestructor
, /* key destructor */
1087 NULL
/* val destructor */
1090 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1091 static dictType zsetDictType
= {
1092 dictEncObjHash
, /* hash function */
1095 dictEncObjKeyCompare
, /* key compare */
1096 dictRedisObjectDestructor
, /* key destructor */
1097 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1101 static dictType dbDictType
= {
1102 dictObjHash
, /* hash function */
1105 dictObjKeyCompare
, /* key compare */
1106 dictRedisObjectDestructor
, /* key destructor */
1107 dictRedisObjectDestructor
/* val destructor */
1111 static dictType keyptrDictType
= {
1112 dictObjHash
, /* hash function */
1115 dictObjKeyCompare
, /* key compare */
1116 dictRedisObjectDestructor
, /* key destructor */
1117 NULL
/* val destructor */
1120 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1121 static dictType hashDictType
= {
1122 dictEncObjHash
, /* hash function */
1125 dictEncObjKeyCompare
, /* key compare */
1126 dictRedisObjectDestructor
, /* key destructor */
1127 dictRedisObjectDestructor
/* val destructor */
1130 /* Keylist hash table type has unencoded redis objects as keys and
1131 * lists as values. It's used for blocking operations (BLPOP) and to
1132 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1133 static dictType keylistDictType
= {
1134 dictObjHash
, /* hash function */
1137 dictObjKeyCompare
, /* key compare */
1138 dictRedisObjectDestructor
, /* key destructor */
1139 dictListDestructor
/* val destructor */
1142 static void version();
1144 /* ========================= Random utility functions ======================= */
1146 /* Redis generally does not try to recover from out of memory conditions
1147 * when allocating objects or strings, it is not clear if it will be possible
1148 * to report this condition to the client since the networking layer itself
1149 * is based on heap allocation for send buffers, so we simply abort.
1150 * At least the code will be simpler to read... */
1151 static void oom(const char *msg
) {
1152 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1157 /* ====================== Redis server networking stuff ===================== */
1158 static void closeTimedoutClients(void) {
1161 time_t now
= time(NULL
);
1164 listRewind(server
.clients
,&li
);
1165 while ((ln
= listNext(&li
)) != NULL
) {
1166 c
= listNodeValue(ln
);
1167 if (server
.maxidletime
&&
1168 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1169 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1170 dictSize(c
->pubsub_channels
) == 0 && /* no timeout for pubsub */
1171 listLength(c
->pubsub_patterns
) == 0 &&
1172 (now
- c
->lastinteraction
> server
.maxidletime
))
1174 redisLog(REDIS_VERBOSE
,"Closing idle client");
1176 } else if (c
->flags
& REDIS_BLOCKED
) {
1177 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1178 addReply(c
,shared
.nullmultibulk
);
1179 unblockClientWaitingData(c
);
1185 static int htNeedsResize(dict
*dict
) {
1186 long long size
, used
;
1188 size
= dictSlots(dict
);
1189 used
= dictSize(dict
);
1190 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1191 (used
*100/size
< REDIS_HT_MINFILL
));
1194 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1195 * we resize the hash table to save memory */
1196 static void tryResizeHashTables(void) {
1199 for (j
= 0; j
< server
.dbnum
; j
++) {
1200 if (htNeedsResize(server
.db
[j
].dict
)) {
1201 redisLog(REDIS_VERBOSE
,"The hash table %d is too sparse, resize it...",j
);
1202 dictResize(server
.db
[j
].dict
);
1203 redisLog(REDIS_VERBOSE
,"Hash table %d resized.",j
);
1205 if (htNeedsResize(server
.db
[j
].expires
))
1206 dictResize(server
.db
[j
].expires
);
1210 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1211 void backgroundSaveDoneHandler(int statloc
) {
1212 int exitcode
= WEXITSTATUS(statloc
);
1213 int bysignal
= WIFSIGNALED(statloc
);
1215 if (!bysignal
&& exitcode
== 0) {
1216 redisLog(REDIS_NOTICE
,
1217 "Background saving terminated with success");
1219 server
.lastsave
= time(NULL
);
1220 } else if (!bysignal
&& exitcode
!= 0) {
1221 redisLog(REDIS_WARNING
, "Background saving error");
1223 redisLog(REDIS_WARNING
,
1224 "Background saving terminated by signal %d", WTERMSIG(statloc
));
1225 rdbRemoveTempFile(server
.bgsavechildpid
);
1227 server
.bgsavechildpid
= -1;
1228 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1229 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1230 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1233 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1235 void backgroundRewriteDoneHandler(int statloc
) {
1236 int exitcode
= WEXITSTATUS(statloc
);
1237 int bysignal
= WIFSIGNALED(statloc
);
1239 if (!bysignal
&& exitcode
== 0) {
1243 redisLog(REDIS_NOTICE
,
1244 "Background append only file rewriting terminated with success");
1245 /* Now it's time to flush the differences accumulated by the parent */
1246 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1247 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1249 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1252 /* Flush our data... */
1253 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1254 (signed) sdslen(server
.bgrewritebuf
)) {
1255 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1259 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1260 /* Now our work is to rename the temp file into the stable file. And
1261 * switch the file descriptor used by the server for append only. */
1262 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1263 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1267 /* Mission completed... almost */
1268 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1269 if (server
.appendfd
!= -1) {
1270 /* If append only is actually enabled... */
1271 close(server
.appendfd
);
1272 server
.appendfd
= fd
;
1274 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1275 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1277 /* If append only is disabled we just generate a dump in this
1278 * format. Why not? */
1281 } else if (!bysignal
&& exitcode
!= 0) {
1282 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1284 redisLog(REDIS_WARNING
,
1285 "Background append only file rewriting terminated by signal %d",
1289 sdsfree(server
.bgrewritebuf
);
1290 server
.bgrewritebuf
= sdsempty();
1291 aofRemoveTempFile(server
.bgrewritechildpid
);
1292 server
.bgrewritechildpid
= -1;
1295 /* This function is called once a background process of some kind terminates,
1296 * as we want to avoid resizing the hash tables when there is a child in order
1297 * to play well with copy-on-write (otherwise when a resize happens lots of
1298 * memory pages are copied). The goal of this function is to update the ability
1299 * for dict.c to resize the hash tables accordingly to the fact we have o not
1300 * running childs. */
1301 static void updateDictResizePolicy(void) {
1302 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1)
1305 dictDisableResize();
1308 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1309 int j
, loops
= server
.cronloops
++;
1310 REDIS_NOTUSED(eventLoop
);
1312 REDIS_NOTUSED(clientData
);
1314 /* We take a cached value of the unix time in the global state because
1315 * with virtual memory and aging there is to store the current time
1316 * in objects at every object access, and accuracy is not needed.
1317 * To access a global var is faster than calling time(NULL) */
1318 server
.unixtime
= time(NULL
);
1320 /* Show some info about non-empty databases */
1321 for (j
= 0; j
< server
.dbnum
; j
++) {
1322 long long size
, used
, vkeys
;
1324 size
= dictSlots(server
.db
[j
].dict
);
1325 used
= dictSize(server
.db
[j
].dict
);
1326 vkeys
= dictSize(server
.db
[j
].expires
);
1327 if (!(loops
% 50) && (used
|| vkeys
)) {
1328 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1329 /* dictPrintStats(server.dict); */
1333 /* We don't want to resize the hash tables while a bacground saving
1334 * is in progress: the saving child is created using fork() that is
1335 * implemented with a copy-on-write semantic in most modern systems, so
1336 * if we resize the HT while there is the saving child at work actually
1337 * a lot of memory movements in the parent will cause a lot of pages
1339 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1 &&
1342 tryResizeHashTables();
1345 /* Show information about connected clients */
1346 if (!(loops
% 50)) {
1347 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use",
1348 listLength(server
.clients
)-listLength(server
.slaves
),
1349 listLength(server
.slaves
),
1350 zmalloc_used_memory());
1353 /* Close connections of timedout clients */
1354 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1355 closeTimedoutClients();
1357 /* Check if a background saving or AOF rewrite in progress terminated */
1358 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1362 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1363 if (pid
== server
.bgsavechildpid
) {
1364 backgroundSaveDoneHandler(statloc
);
1366 backgroundRewriteDoneHandler(statloc
);
1368 updateDictResizePolicy();
1371 /* If there is not a background saving in progress check if
1372 * we have to save now */
1373 time_t now
= time(NULL
);
1374 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1375 struct saveparam
*sp
= server
.saveparams
+j
;
1377 if (server
.dirty
>= sp
->changes
&&
1378 now
-server
.lastsave
> sp
->seconds
) {
1379 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1380 sp
->changes
, sp
->seconds
);
1381 rdbSaveBackground(server
.dbfilename
);
1387 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1388 * will use few CPU cycles if there are few expiring keys, otherwise
1389 * it will get more aggressive to avoid that too much memory is used by
1390 * keys that can be removed from the keyspace. */
1391 for (j
= 0; j
< server
.dbnum
; j
++) {
1393 redisDb
*db
= server
.db
+j
;
1395 /* Continue to expire if at the end of the cycle more than 25%
1396 * of the keys were expired. */
1398 long num
= dictSize(db
->expires
);
1399 time_t now
= time(NULL
);
1402 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1403 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1408 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1409 t
= (time_t) dictGetEntryVal(de
);
1411 deleteKey(db
,dictGetEntryKey(de
));
1413 server
.stat_expiredkeys
++;
1416 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1419 /* Swap a few keys on disk if we are over the memory limit and VM
1420 * is enbled. Try to free objects from the free list first. */
1421 if (vmCanSwapOut()) {
1422 while (server
.vm_enabled
&& zmalloc_used_memory() >
1423 server
.vm_max_memory
)
1427 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1428 retval
= (server
.vm_max_threads
== 0) ?
1429 vmSwapOneObjectBlocking() :
1430 vmSwapOneObjectThreaded();
1431 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1432 zmalloc_used_memory() >
1433 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1435 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1437 /* Note that when using threade I/O we free just one object,
1438 * because anyway when the I/O thread in charge to swap this
1439 * object out will finish, the handler of completed jobs
1440 * will try to swap more objects if we are still out of memory. */
1441 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1445 /* Check if we should connect to a MASTER */
1446 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1447 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1448 if (syncWithMaster() == REDIS_OK
) {
1449 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1455 /* This function gets called every time Redis is entering the
1456 * main loop of the event driven library, that is, before to sleep
1457 * for ready file descriptors. */
1458 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1459 REDIS_NOTUSED(eventLoop
);
1461 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1465 listRewind(server
.io_ready_clients
,&li
);
1466 while((ln
= listNext(&li
))) {
1467 redisClient
*c
= ln
->value
;
1468 struct redisCommand
*cmd
;
1470 /* Resume the client. */
1471 listDelNode(server
.io_ready_clients
,ln
);
1472 c
->flags
&= (~REDIS_IO_WAIT
);
1473 server
.vm_blocked_clients
--;
1474 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1475 readQueryFromClient
, c
);
1476 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1477 assert(cmd
!= NULL
);
1480 /* There may be more data to process in the input buffer. */
1481 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1482 processInputBuffer(c
);
1487 static void createSharedObjects(void) {
1490 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1491 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1492 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1493 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1494 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1495 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1496 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1497 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1498 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1499 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1500 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1501 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1502 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1503 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1504 "-ERR no such key\r\n"));
1505 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1506 "-ERR syntax error\r\n"));
1507 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1508 "-ERR source and destination objects are the same\r\n"));
1509 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1510 "-ERR index out of range\r\n"));
1511 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1512 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1513 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1514 shared
.select0
= createStringObject("select 0\r\n",10);
1515 shared
.select1
= createStringObject("select 1\r\n",10);
1516 shared
.select2
= createStringObject("select 2\r\n",10);
1517 shared
.select3
= createStringObject("select 3\r\n",10);
1518 shared
.select4
= createStringObject("select 4\r\n",10);
1519 shared
.select5
= createStringObject("select 5\r\n",10);
1520 shared
.select6
= createStringObject("select 6\r\n",10);
1521 shared
.select7
= createStringObject("select 7\r\n",10);
1522 shared
.select8
= createStringObject("select 8\r\n",10);
1523 shared
.select9
= createStringObject("select 9\r\n",10);
1524 shared
.messagebulk
= createStringObject("$7\r\nmessage\r\n",13);
1525 shared
.subscribebulk
= createStringObject("$9\r\nsubscribe\r\n",15);
1526 shared
.unsubscribebulk
= createStringObject("$11\r\nunsubscribe\r\n",18);
1527 shared
.psubscribebulk
= createStringObject("$10\r\npsubscribe\r\n",17);
1528 shared
.punsubscribebulk
= createStringObject("$12\r\npunsubscribe\r\n",19);
1529 shared
.mbulk3
= createStringObject("*3\r\n",4);
1530 for (j
= 0; j
< REDIS_SHARED_INTEGERS
; j
++) {
1531 shared
.integers
[j
] = createObject(REDIS_STRING
,(void*)(long)j
);
1532 shared
.integers
[j
]->encoding
= REDIS_ENCODING_INT
;
1536 static void appendServerSaveParams(time_t seconds
, int changes
) {
1537 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1538 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1539 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1540 server
.saveparamslen
++;
1543 static void resetServerSaveParams() {
1544 zfree(server
.saveparams
);
1545 server
.saveparams
= NULL
;
1546 server
.saveparamslen
= 0;
1549 static void initServerConfig() {
1550 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1551 server
.port
= REDIS_SERVERPORT
;
1552 server
.verbosity
= REDIS_VERBOSE
;
1553 server
.maxidletime
= REDIS_MAXIDLETIME
;
1554 server
.saveparams
= NULL
;
1555 server
.logfile
= NULL
; /* NULL = log on standard output */
1556 server
.bindaddr
= NULL
;
1557 server
.glueoutputbuf
= 1;
1558 server
.daemonize
= 0;
1559 server
.appendonly
= 0;
1560 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1561 server
.lastfsync
= time(NULL
);
1562 server
.appendfd
= -1;
1563 server
.appendseldb
= -1; /* Make sure the first time will not match */
1564 server
.pidfile
= zstrdup("/var/run/redis.pid");
1565 server
.dbfilename
= zstrdup("dump.rdb");
1566 server
.appendfilename
= zstrdup("appendonly.aof");
1567 server
.requirepass
= NULL
;
1568 server
.shareobjects
= 0;
1569 server
.rdbcompression
= 1;
1570 server
.maxclients
= 0;
1571 server
.blpop_blocked_clients
= 0;
1572 server
.maxmemory
= 0;
1573 server
.vm_enabled
= 0;
1574 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1575 server
.vm_page_size
= 256; /* 256 bytes per page */
1576 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1577 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1578 server
.vm_max_threads
= 4;
1579 server
.vm_blocked_clients
= 0;
1580 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1581 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1583 resetServerSaveParams();
1585 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1586 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1587 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1588 /* Replication related */
1590 server
.masterauth
= NULL
;
1591 server
.masterhost
= NULL
;
1592 server
.masterport
= 6379;
1593 server
.master
= NULL
;
1594 server
.replstate
= REDIS_REPL_NONE
;
1596 /* Double constants initialization */
1598 R_PosInf
= 1.0/R_Zero
;
1599 R_NegInf
= -1.0/R_Zero
;
1600 R_Nan
= R_Zero
/R_Zero
;
1603 static void initServer() {
1606 signal(SIGHUP
, SIG_IGN
);
1607 signal(SIGPIPE
, SIG_IGN
);
1608 setupSigSegvAction();
1610 server
.devnull
= fopen("/dev/null","w");
1611 if (server
.devnull
== NULL
) {
1612 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1615 server
.clients
= listCreate();
1616 server
.slaves
= listCreate();
1617 server
.monitors
= listCreate();
1618 server
.objfreelist
= listCreate();
1619 createSharedObjects();
1620 server
.el
= aeCreateEventLoop();
1621 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1622 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1623 if (server
.fd
== -1) {
1624 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1627 for (j
= 0; j
< server
.dbnum
; j
++) {
1628 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1629 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1630 server
.db
[j
].blockingkeys
= dictCreate(&keylistDictType
,NULL
);
1631 if (server
.vm_enabled
)
1632 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1633 server
.db
[j
].id
= j
;
1635 server
.pubsub_channels
= dictCreate(&keylistDictType
,NULL
);
1636 server
.pubsub_patterns
= listCreate();
1637 listSetFreeMethod(server
.pubsub_patterns
,freePubsubPattern
);
1638 listSetMatchMethod(server
.pubsub_patterns
,listMatchPubsubPattern
);
1639 server
.cronloops
= 0;
1640 server
.bgsavechildpid
= -1;
1641 server
.bgrewritechildpid
= -1;
1642 server
.bgrewritebuf
= sdsempty();
1643 server
.lastsave
= time(NULL
);
1645 server
.stat_numcommands
= 0;
1646 server
.stat_numconnections
= 0;
1647 server
.stat_expiredkeys
= 0;
1648 server
.stat_starttime
= time(NULL
);
1649 server
.unixtime
= time(NULL
);
1650 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1651 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1652 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1654 if (server
.appendonly
) {
1655 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1656 if (server
.appendfd
== -1) {
1657 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1663 if (server
.vm_enabled
) vmInit();
1666 /* Empty the whole database */
1667 static long long emptyDb() {
1669 long long removed
= 0;
1671 for (j
= 0; j
< server
.dbnum
; j
++) {
1672 removed
+= dictSize(server
.db
[j
].dict
);
1673 dictEmpty(server
.db
[j
].dict
);
1674 dictEmpty(server
.db
[j
].expires
);
1679 static int yesnotoi(char *s
) {
1680 if (!strcasecmp(s
,"yes")) return 1;
1681 else if (!strcasecmp(s
,"no")) return 0;
1685 /* I agree, this is a very rudimental way to load a configuration...
1686 will improve later if the config gets more complex */
1687 static void loadServerConfig(char *filename
) {
1689 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1692 char *errormsg
= "Fatal error, can't open config file '%s'";
1693 char *errorbuf
= zmalloc(sizeof(char)*(strlen(errormsg
)+strlen(filename
)));
1694 sprintf(errorbuf
, errormsg
, filename
);
1696 if (filename
[0] == '-' && filename
[1] == '\0')
1699 if ((fp
= fopen(filename
,"r")) == NULL
) {
1700 redisLog(REDIS_WARNING
, errorbuf
);
1705 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1711 line
= sdstrim(line
," \t\r\n");
1713 /* Skip comments and blank lines*/
1714 if (line
[0] == '#' || line
[0] == '\0') {
1719 /* Split into arguments */
1720 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1721 sdstolower(argv
[0]);
1723 /* Execute config directives */
1724 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1725 server
.maxidletime
= atoi(argv
[1]);
1726 if (server
.maxidletime
< 0) {
1727 err
= "Invalid timeout value"; goto loaderr
;
1729 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1730 server
.port
= atoi(argv
[1]);
1731 if (server
.port
< 1 || server
.port
> 65535) {
1732 err
= "Invalid port"; goto loaderr
;
1734 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1735 server
.bindaddr
= zstrdup(argv
[1]);
1736 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1737 int seconds
= atoi(argv
[1]);
1738 int changes
= atoi(argv
[2]);
1739 if (seconds
< 1 || changes
< 0) {
1740 err
= "Invalid save parameters"; goto loaderr
;
1742 appendServerSaveParams(seconds
,changes
);
1743 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1744 if (chdir(argv
[1]) == -1) {
1745 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1746 argv
[1], strerror(errno
));
1749 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1750 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1751 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1752 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1753 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1755 err
= "Invalid log level. Must be one of debug, notice, warning";
1758 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1761 server
.logfile
= zstrdup(argv
[1]);
1762 if (!strcasecmp(server
.logfile
,"stdout")) {
1763 zfree(server
.logfile
);
1764 server
.logfile
= NULL
;
1766 if (server
.logfile
) {
1767 /* Test if we are able to open the file. The server will not
1768 * be able to abort just for this problem later... */
1769 logfp
= fopen(server
.logfile
,"a");
1770 if (logfp
== NULL
) {
1771 err
= sdscatprintf(sdsempty(),
1772 "Can't open the log file: %s", strerror(errno
));
1777 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1778 server
.dbnum
= atoi(argv
[1]);
1779 if (server
.dbnum
< 1) {
1780 err
= "Invalid number of databases"; goto loaderr
;
1782 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1783 loadServerConfig(argv
[1]);
1784 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1785 server
.maxclients
= atoi(argv
[1]);
1786 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1787 server
.maxmemory
= strtoll(argv
[1], NULL
, 10);
1788 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1789 server
.masterhost
= sdsnew(argv
[1]);
1790 server
.masterport
= atoi(argv
[2]);
1791 server
.replstate
= REDIS_REPL_CONNECT
;
1792 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1793 server
.masterauth
= zstrdup(argv
[1]);
1794 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1795 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1796 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1798 } else if (!strcasecmp(argv
[0],"shareobjects") && argc
== 2) {
1799 if ((server
.shareobjects
= yesnotoi(argv
[1])) == -1) {
1800 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1802 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1803 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1804 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1806 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1807 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1808 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1810 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1811 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1812 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1814 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
1815 if (!strcasecmp(argv
[1],"no")) {
1816 server
.appendfsync
= APPENDFSYNC_NO
;
1817 } else if (!strcasecmp(argv
[1],"always")) {
1818 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1819 } else if (!strcasecmp(argv
[1],"everysec")) {
1820 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1822 err
= "argument must be 'no', 'always' or 'everysec'";
1825 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
1826 server
.requirepass
= zstrdup(argv
[1]);
1827 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
1828 zfree(server
.pidfile
);
1829 server
.pidfile
= zstrdup(argv
[1]);
1830 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
1831 zfree(server
.dbfilename
);
1832 server
.dbfilename
= zstrdup(argv
[1]);
1833 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
1834 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
1835 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1837 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
1838 zfree(server
.vm_swap_file
);
1839 server
.vm_swap_file
= zstrdup(argv
[1]);
1840 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
1841 server
.vm_max_memory
= strtoll(argv
[1], NULL
, 10);
1842 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
1843 server
.vm_page_size
= strtoll(argv
[1], NULL
, 10);
1844 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
1845 server
.vm_pages
= strtoll(argv
[1], NULL
, 10);
1846 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1847 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1848 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
1849 server
.hash_max_zipmap_entries
= strtol(argv
[1], NULL
, 10);
1850 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
1851 server
.hash_max_zipmap_value
= strtol(argv
[1], NULL
, 10);
1852 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1853 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1855 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
1857 for (j
= 0; j
< argc
; j
++)
1862 if (fp
!= stdin
) fclose(fp
);
1866 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
1867 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
1868 fprintf(stderr
, ">>> '%s'\n", line
);
1869 fprintf(stderr
, "%s\n", err
);
1873 static void freeClientArgv(redisClient
*c
) {
1876 for (j
= 0; j
< c
->argc
; j
++)
1877 decrRefCount(c
->argv
[j
]);
1878 for (j
= 0; j
< c
->mbargc
; j
++)
1879 decrRefCount(c
->mbargv
[j
]);
1884 static void freeClient(redisClient
*c
) {
1887 /* Note that if the client we are freeing is blocked into a blocking
1888 * call, we have to set querybuf to NULL *before* to call
1889 * unblockClientWaitingData() to avoid processInputBuffer() will get
1890 * called. Also it is important to remove the file events after
1891 * this, because this call adds the READABLE event. */
1892 sdsfree(c
->querybuf
);
1894 if (c
->flags
& REDIS_BLOCKED
)
1895 unblockClientWaitingData(c
);
1897 /* Unsubscribe from all the pubsub channels */
1898 pubsubUnsubscribeAllChannels(c
,0);
1899 pubsubUnsubscribeAllPatterns(c
,0);
1900 dictRelease(c
->pubsub_channels
);
1901 listRelease(c
->pubsub_patterns
);
1902 /* Obvious cleanup */
1903 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
1904 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
1905 listRelease(c
->reply
);
1908 /* Remove from the list of clients */
1909 ln
= listSearchKey(server
.clients
,c
);
1910 redisAssert(ln
!= NULL
);
1911 listDelNode(server
.clients
,ln
);
1912 /* Remove from the list of clients waiting for swapped keys */
1913 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
1914 ln
= listSearchKey(server
.io_ready_clients
,c
);
1916 listDelNode(server
.io_ready_clients
,ln
);
1917 server
.vm_blocked_clients
--;
1920 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
1921 ln
= listFirst(c
->io_keys
);
1922 dontWaitForSwappedKey(c
,ln
->value
);
1924 listRelease(c
->io_keys
);
1925 /* Master/slave cleanup */
1926 if (c
->flags
& REDIS_SLAVE
) {
1927 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
1929 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
1930 ln
= listSearchKey(l
,c
);
1931 redisAssert(ln
!= NULL
);
1934 if (c
->flags
& REDIS_MASTER
) {
1935 server
.master
= NULL
;
1936 server
.replstate
= REDIS_REPL_CONNECT
;
1938 /* Release memory */
1941 freeClientMultiState(c
);
1945 #define GLUEREPLY_UP_TO (1024)
1946 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
1948 char buf
[GLUEREPLY_UP_TO
];
1953 listRewind(c
->reply
,&li
);
1954 while((ln
= listNext(&li
))) {
1958 objlen
= sdslen(o
->ptr
);
1959 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
1960 memcpy(buf
+copylen
,o
->ptr
,objlen
);
1962 listDelNode(c
->reply
,ln
);
1964 if (copylen
== 0) return;
1968 /* Now the output buffer is empty, add the new single element */
1969 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
1970 listAddNodeHead(c
->reply
,o
);
1973 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
1974 redisClient
*c
= privdata
;
1975 int nwritten
= 0, totwritten
= 0, objlen
;
1978 REDIS_NOTUSED(mask
);
1980 /* Use writev() if we have enough buffers to send */
1981 if (!server
.glueoutputbuf
&&
1982 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
1983 !(c
->flags
& REDIS_MASTER
))
1985 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
1989 while(listLength(c
->reply
)) {
1990 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
1991 glueReplyBuffersIfNeeded(c
);
1993 o
= listNodeValue(listFirst(c
->reply
));
1994 objlen
= sdslen(o
->ptr
);
1997 listDelNode(c
->reply
,listFirst(c
->reply
));
2001 if (c
->flags
& REDIS_MASTER
) {
2002 /* Don't reply to a master */
2003 nwritten
= objlen
- c
->sentlen
;
2005 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
2006 if (nwritten
<= 0) break;
2008 c
->sentlen
+= nwritten
;
2009 totwritten
+= nwritten
;
2010 /* If we fully sent the object on head go to the next one */
2011 if (c
->sentlen
== objlen
) {
2012 listDelNode(c
->reply
,listFirst(c
->reply
));
2015 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
2016 * bytes, in a single threaded server it's a good idea to serve
2017 * other clients as well, even if a very large request comes from
2018 * super fast link that is always able to accept data (in real world
2019 * scenario think about 'KEYS *' against the loopback interfae) */
2020 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
2022 if (nwritten
== -1) {
2023 if (errno
== EAGAIN
) {
2026 redisLog(REDIS_VERBOSE
,
2027 "Error writing to client: %s", strerror(errno
));
2032 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
2033 if (listLength(c
->reply
) == 0) {
2035 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2039 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
2041 redisClient
*c
= privdata
;
2042 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
2044 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
2045 int offset
, ion
= 0;
2047 REDIS_NOTUSED(mask
);
2050 while (listLength(c
->reply
)) {
2051 offset
= c
->sentlen
;
2055 /* fill-in the iov[] array */
2056 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
2057 o
= listNodeValue(node
);
2058 objlen
= sdslen(o
->ptr
);
2060 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
2063 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2064 break; /* no more iovecs */
2066 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2067 iov
[ion
].iov_len
= objlen
- offset
;
2068 willwrite
+= objlen
- offset
;
2069 offset
= 0; /* just for the first item */
2076 /* write all collected blocks at once */
2077 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2078 if (errno
!= EAGAIN
) {
2079 redisLog(REDIS_VERBOSE
,
2080 "Error writing to client: %s", strerror(errno
));
2087 totwritten
+= nwritten
;
2088 offset
= c
->sentlen
;
2090 /* remove written robjs from c->reply */
2091 while (nwritten
&& listLength(c
->reply
)) {
2092 o
= listNodeValue(listFirst(c
->reply
));
2093 objlen
= sdslen(o
->ptr
);
2095 if(nwritten
>= objlen
- offset
) {
2096 listDelNode(c
->reply
, listFirst(c
->reply
));
2097 nwritten
-= objlen
- offset
;
2101 c
->sentlen
+= nwritten
;
2109 c
->lastinteraction
= time(NULL
);
2111 if (listLength(c
->reply
) == 0) {
2113 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2117 static struct redisCommand
*lookupCommand(char *name
) {
2119 while(cmdTable
[j
].name
!= NULL
) {
2120 if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
];
2126 /* resetClient prepare the client to process the next command */
2127 static void resetClient(redisClient
*c
) {
2133 /* Call() is the core of Redis execution of a command */
2134 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2137 dirty
= server
.dirty
;
2139 dirty
= server
.dirty
-dirty
;
2141 if (server
.appendonly
&& dirty
)
2142 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2143 if ((dirty
|| cmd
->flags
& REDIS_CMD_FORCE_REPLICATION
) &&
2144 listLength(server
.slaves
))
2145 replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
);
2146 if (listLength(server
.monitors
))
2147 replicationFeedSlaves(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
);
2148 server
.stat_numcommands
++;
2151 /* If this function gets called we already read a whole
2152 * command, argments are in the client argv/argc fields.
2153 * processCommand() execute the command or prepare the
2154 * server for a bulk read from the client.
2156 * If 1 is returned the client is still alive and valid and
2157 * and other operations can be performed by the caller. Otherwise
2158 * if 0 is returned the client was destroied (i.e. after QUIT). */
2159 static int processCommand(redisClient
*c
) {
2160 struct redisCommand
*cmd
;
2162 /* Free some memory if needed (maxmemory setting) */
2163 if (server
.maxmemory
) freeMemoryIfNeeded();
2165 /* Handle the multi bulk command type. This is an alternative protocol
2166 * supported by Redis in order to receive commands that are composed of
2167 * multiple binary-safe "bulk" arguments. The latency of processing is
2168 * a bit higher but this allows things like multi-sets, so if this
2169 * protocol is used only for MSET and similar commands this is a big win. */
2170 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2171 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2172 if (c
->multibulk
<= 0) {
2176 decrRefCount(c
->argv
[c
->argc
-1]);
2180 } else if (c
->multibulk
) {
2181 if (c
->bulklen
== -1) {
2182 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2183 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2187 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2188 decrRefCount(c
->argv
[0]);
2189 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2191 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2196 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2200 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2201 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2205 if (c
->multibulk
== 0) {
2209 /* Here we need to swap the multi-bulk argc/argv with the
2210 * normal argc/argv of the client structure. */
2212 c
->argv
= c
->mbargv
;
2213 c
->mbargv
= auxargv
;
2216 c
->argc
= c
->mbargc
;
2217 c
->mbargc
= auxargc
;
2219 /* We need to set bulklen to something different than -1
2220 * in order for the code below to process the command without
2221 * to try to read the last argument of a bulk command as
2222 * a special argument. */
2224 /* continue below and process the command */
2231 /* -- end of multi bulk commands processing -- */
2233 /* The QUIT command is handled as a special case. Normal command
2234 * procs are unable to close the client connection safely */
2235 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2240 /* Now lookup the command and check ASAP about trivial error conditions
2241 * such wrong arity, bad command name and so forth. */
2242 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2245 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2246 (char*)c
->argv
[0]->ptr
));
2249 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2250 (c
->argc
< -cmd
->arity
)) {
2252 sdscatprintf(sdsempty(),
2253 "-ERR wrong number of arguments for '%s' command\r\n",
2257 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2258 /* This is a bulk command, we have to read the last argument yet. */
2259 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2261 decrRefCount(c
->argv
[c
->argc
-1]);
2262 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2264 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2269 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2270 /* It is possible that the bulk read is already in the
2271 * buffer. Check this condition and handle it accordingly.
2272 * This is just a fast path, alternative to call processInputBuffer().
2273 * It's a good idea since the code is small and this condition
2274 * happens most of the times. */
2275 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2276 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2278 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2280 /* Otherwise return... there is to read the last argument
2281 * from the socket. */
2285 /* Let's try to encode the bulk object to save space. */
2286 if (cmd
->flags
& REDIS_CMD_BULK
)
2287 c
->argv
[c
->argc
-1] = tryObjectEncoding(c
->argv
[c
->argc
-1]);
2289 /* Check if the user is authenticated */
2290 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2291 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2296 /* Handle the maxmemory directive */
2297 if (server
.maxmemory
&& (cmd
->flags
& REDIS_CMD_DENYOOM
) &&
2298 zmalloc_used_memory() > server
.maxmemory
)
2300 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2305 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
2306 if (dictSize(c
->pubsub_channels
) > 0 &&
2307 cmd
->proc
!= subscribeCommand
&& cmd
->proc
!= unsubscribeCommand
&&
2308 cmd
->proc
!= psubscribeCommand
&& cmd
->proc
!= punsubscribeCommand
) {
2309 addReplySds(c
,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n"));
2314 /* Exec the command */
2315 if (c
->flags
& REDIS_MULTI
&& cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
) {
2316 queueMultiCommand(c
,cmd
);
2317 addReply(c
,shared
.queued
);
2319 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2320 blockClientOnSwappedKeys(cmd
,c
)) return 1;
2324 /* Prepare the client for the next command */
2329 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
) {
2334 /* We need 1+(ARGS*3) objects since commands are using the new protocol
2335 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
2336 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
2337 robj
*static_outv
[REDIS_STATIC_ARGS
*3+1];
2340 if (argc
<= REDIS_STATIC_ARGS
) {
2343 outv
= zmalloc(sizeof(robj
*)*(argc
*3+1));
2346 lenobj
= createObject(REDIS_STRING
,
2347 sdscatprintf(sdsempty(), "*%d\r\n", argc
));
2348 lenobj
->refcount
= 0;
2349 outv
[outc
++] = lenobj
;
2350 for (j
= 0; j
< argc
; j
++) {
2351 lenobj
= createObject(REDIS_STRING
,
2352 sdscatprintf(sdsempty(),"$%lu\r\n",
2353 (unsigned long) stringObjectLen(argv
[j
])));
2354 lenobj
->refcount
= 0;
2355 outv
[outc
++] = lenobj
;
2356 outv
[outc
++] = argv
[j
];
2357 outv
[outc
++] = shared
.crlf
;
2360 /* Increment all the refcounts at start and decrement at end in order to
2361 * be sure to free objects if there is no slave in a replication state
2362 * able to be feed with commands */
2363 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2364 listRewind(slaves
,&li
);
2365 while((ln
= listNext(&li
))) {
2366 redisClient
*slave
= ln
->value
;
2368 /* Don't feed slaves that are still waiting for BGSAVE to start */
2369 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2371 /* Feed all the other slaves, MONITORs and so on */
2372 if (slave
->slaveseldb
!= dictid
) {
2376 case 0: selectcmd
= shared
.select0
; break;
2377 case 1: selectcmd
= shared
.select1
; break;
2378 case 2: selectcmd
= shared
.select2
; break;
2379 case 3: selectcmd
= shared
.select3
; break;
2380 case 4: selectcmd
= shared
.select4
; break;
2381 case 5: selectcmd
= shared
.select5
; break;
2382 case 6: selectcmd
= shared
.select6
; break;
2383 case 7: selectcmd
= shared
.select7
; break;
2384 case 8: selectcmd
= shared
.select8
; break;
2385 case 9: selectcmd
= shared
.select9
; break;
2387 selectcmd
= createObject(REDIS_STRING
,
2388 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2389 selectcmd
->refcount
= 0;
2392 addReply(slave
,selectcmd
);
2393 slave
->slaveseldb
= dictid
;
2395 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2397 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2398 if (outv
!= static_outv
) zfree(outv
);
2401 static void processInputBuffer(redisClient
*c
) {
2403 /* Before to process the input buffer, make sure the client is not
2404 * waitig for a blocking operation such as BLPOP. Note that the first
2405 * iteration the client is never blocked, otherwise the processInputBuffer
2406 * would not be called at all, but after the execution of the first commands
2407 * in the input buffer the client may be blocked, and the "goto again"
2408 * will try to reiterate. The following line will make it return asap. */
2409 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2410 if (c
->bulklen
== -1) {
2411 /* Read the first line of the query */
2412 char *p
= strchr(c
->querybuf
,'\n');
2419 query
= c
->querybuf
;
2420 c
->querybuf
= sdsempty();
2421 querylen
= 1+(p
-(query
));
2422 if (sdslen(query
) > querylen
) {
2423 /* leave data after the first line of the query in the buffer */
2424 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2426 *p
= '\0'; /* remove "\n" */
2427 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2428 sdsupdatelen(query
);
2430 /* Now we can split the query in arguments */
2431 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2434 if (c
->argv
) zfree(c
->argv
);
2435 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2437 for (j
= 0; j
< argc
; j
++) {
2438 if (sdslen(argv
[j
])) {
2439 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2447 /* Execute the command. If the client is still valid
2448 * after processCommand() return and there is something
2449 * on the query buffer try to process the next command. */
2450 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2452 /* Nothing to process, argc == 0. Just process the query
2453 * buffer if it's not empty or return to the caller */
2454 if (sdslen(c
->querybuf
)) goto again
;
2457 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2458 redisLog(REDIS_VERBOSE
, "Client protocol error");
2463 /* Bulk read handling. Note that if we are at this point
2464 the client already sent a command terminated with a newline,
2465 we are reading the bulk data that is actually the last
2466 argument of the command. */
2467 int qbl
= sdslen(c
->querybuf
);
2469 if (c
->bulklen
<= qbl
) {
2470 /* Copy everything but the final CRLF as final argument */
2471 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2473 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2474 /* Process the command. If the client is still valid after
2475 * the processing and there is more data in the buffer
2476 * try to parse it. */
2477 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2483 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2484 redisClient
*c
= (redisClient
*) privdata
;
2485 char buf
[REDIS_IOBUF_LEN
];
2488 REDIS_NOTUSED(mask
);
2490 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2492 if (errno
== EAGAIN
) {
2495 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2499 } else if (nread
== 0) {
2500 redisLog(REDIS_VERBOSE
, "Client closed connection");
2505 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2506 c
->lastinteraction
= time(NULL
);
2510 processInputBuffer(c
);
2513 static int selectDb(redisClient
*c
, int id
) {
2514 if (id
< 0 || id
>= server
.dbnum
)
2516 c
->db
= &server
.db
[id
];
2520 static void *dupClientReplyValue(void *o
) {
2521 incrRefCount((robj
*)o
);
2525 static int listMatchObjects(void *a
, void *b
) {
2526 return compareStringObjects(a
,b
) == 0;
2529 static redisClient
*createClient(int fd
) {
2530 redisClient
*c
= zmalloc(sizeof(*c
));
2532 anetNonBlock(NULL
,fd
);
2533 anetTcpNoDelay(NULL
,fd
);
2534 if (!c
) return NULL
;
2537 c
->querybuf
= sdsempty();
2546 c
->lastinteraction
= time(NULL
);
2547 c
->authenticated
= 0;
2548 c
->replstate
= REDIS_REPL_NONE
;
2549 c
->reply
= listCreate();
2550 listSetFreeMethod(c
->reply
,decrRefCount
);
2551 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2552 c
->blockingkeys
= NULL
;
2553 c
->blockingkeysnum
= 0;
2554 c
->io_keys
= listCreate();
2555 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2556 c
->pubsub_channels
= dictCreate(&setDictType
,NULL
);
2557 c
->pubsub_patterns
= listCreate();
2558 listSetFreeMethod(c
->pubsub_patterns
,decrRefCount
);
2559 listSetMatchMethod(c
->pubsub_patterns
,listMatchObjects
);
2560 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2561 readQueryFromClient
, c
) == AE_ERR
) {
2565 listAddNodeTail(server
.clients
,c
);
2566 initClientMultiState(c
);
2570 static void addReply(redisClient
*c
, robj
*obj
) {
2571 if (listLength(c
->reply
) == 0 &&
2572 (c
->replstate
== REDIS_REPL_NONE
||
2573 c
->replstate
== REDIS_REPL_ONLINE
) &&
2574 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2575 sendReplyToClient
, c
) == AE_ERR
) return;
2577 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2578 obj
= dupStringObject(obj
);
2579 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2581 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2584 static void addReplySds(redisClient
*c
, sds s
) {
2585 robj
*o
= createObject(REDIS_STRING
,s
);
2590 static void addReplyDouble(redisClient
*c
, double d
) {
2593 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2594 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2595 (unsigned long) strlen(buf
),buf
));
2598 static void addReplyLong(redisClient
*c
, long l
) {
2603 addReply(c
,shared
.czero
);
2605 } else if (l
== 1) {
2606 addReply(c
,shared
.cone
);
2609 len
= snprintf(buf
,sizeof(buf
),":%ld\r\n",l
);
2610 addReplySds(c
,sdsnewlen(buf
,len
));
2613 static void addReplyLongLong(redisClient
*c
, long long ll
) {
2618 addReply(c
,shared
.czero
);
2620 } else if (ll
== 1) {
2621 addReply(c
,shared
.cone
);
2624 len
= snprintf(buf
,sizeof(buf
),":%lld\r\n",ll
);
2625 addReplySds(c
,sdsnewlen(buf
,len
));
2628 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2633 addReply(c
,shared
.czero
);
2635 } else if (ul
== 1) {
2636 addReply(c
,shared
.cone
);
2639 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2640 addReplySds(c
,sdsnewlen(buf
,len
));
2643 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2646 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2647 len
= sdslen(obj
->ptr
);
2649 long n
= (long)obj
->ptr
;
2651 /* Compute how many bytes will take this integer as a radix 10 string */
2657 while((n
= n
/10) != 0) {
2661 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",(unsigned long)len
));
2664 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2665 addReplyBulkLen(c
,obj
);
2667 addReply(c
,shared
.crlf
);
2670 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2671 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2673 addReply(c
,shared
.nullbulk
);
2675 robj
*o
= createStringObject(s
,strlen(s
));
2681 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2686 REDIS_NOTUSED(mask
);
2687 REDIS_NOTUSED(privdata
);
2689 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2690 if (cfd
== AE_ERR
) {
2691 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2694 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2695 if ((c
= createClient(cfd
)) == NULL
) {
2696 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2697 close(cfd
); /* May be already closed, just ingore errors */
2700 /* If maxclient directive is set and this is one client more... close the
2701 * connection. Note that we create the client instead to check before
2702 * for this condition, since now the socket is already set in nonblocking
2703 * mode and we can send an error for free using the Kernel I/O */
2704 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2705 char *err
= "-ERR max number of clients reached\r\n";
2707 /* That's a best effort error message, don't check write errors */
2708 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2709 /* Nothing to do, Just to avoid the warning... */
2714 server
.stat_numconnections
++;
2717 /* ======================= Redis objects implementation ===================== */
2719 static robj
*createObject(int type
, void *ptr
) {
2722 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2723 if (listLength(server
.objfreelist
)) {
2724 listNode
*head
= listFirst(server
.objfreelist
);
2725 o
= listNodeValue(head
);
2726 listDelNode(server
.objfreelist
,head
);
2727 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2729 if (server
.vm_enabled
) {
2730 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2731 o
= zmalloc(sizeof(*o
));
2733 o
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
));
2737 o
->encoding
= REDIS_ENCODING_RAW
;
2740 if (server
.vm_enabled
) {
2741 /* Note that this code may run in the context of an I/O thread
2742 * and accessing to server.unixtime in theory is an error
2743 * (no locks). But in practice this is safe, and even if we read
2744 * garbage Redis will not fail, as it's just a statistical info */
2745 o
->vm
.atime
= server
.unixtime
;
2746 o
->storage
= REDIS_VM_MEMORY
;
2751 static robj
*createStringObject(char *ptr
, size_t len
) {
2752 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
2755 static robj
*dupStringObject(robj
*o
) {
2756 assert(o
->encoding
== REDIS_ENCODING_RAW
);
2757 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
2760 static robj
*createListObject(void) {
2761 list
*l
= listCreate();
2763 listSetFreeMethod(l
,decrRefCount
);
2764 return createObject(REDIS_LIST
,l
);
2767 static robj
*createSetObject(void) {
2768 dict
*d
= dictCreate(&setDictType
,NULL
);
2769 return createObject(REDIS_SET
,d
);
2772 static robj
*createHashObject(void) {
2773 /* All the Hashes start as zipmaps. Will be automatically converted
2774 * into hash tables if there are enough elements or big elements
2776 unsigned char *zm
= zipmapNew();
2777 robj
*o
= createObject(REDIS_HASH
,zm
);
2778 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
2782 static robj
*createZsetObject(void) {
2783 zset
*zs
= zmalloc(sizeof(*zs
));
2785 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
2786 zs
->zsl
= zslCreate();
2787 return createObject(REDIS_ZSET
,zs
);
2790 static void freeStringObject(robj
*o
) {
2791 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2796 static void freeListObject(robj
*o
) {
2797 listRelease((list
*) o
->ptr
);
2800 static void freeSetObject(robj
*o
) {
2801 dictRelease((dict
*) o
->ptr
);
2804 static void freeZsetObject(robj
*o
) {
2807 dictRelease(zs
->dict
);
2812 static void freeHashObject(robj
*o
) {
2813 switch (o
->encoding
) {
2814 case REDIS_ENCODING_HT
:
2815 dictRelease((dict
*) o
->ptr
);
2817 case REDIS_ENCODING_ZIPMAP
:
2826 static void incrRefCount(robj
*o
) {
2830 static void decrRefCount(void *obj
) {
2833 /* Object is a key of a swapped out value, or in the process of being
2835 if (server
.vm_enabled
&&
2836 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
2838 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
);
2839 redisAssert(o
->type
== REDIS_STRING
);
2840 freeStringObject(o
);
2841 vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
);
2842 pthread_mutex_lock(&server
.obj_freelist_mutex
);
2843 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2844 !listAddNodeHead(server
.objfreelist
,o
))
2846 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2847 server
.vm_stats_swapped_objects
--;
2850 /* Object is in memory, or in the process of being swapped out. */
2851 if (--(o
->refcount
) == 0) {
2852 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
2853 vmCancelThreadedIOJob(obj
);
2855 case REDIS_STRING
: freeStringObject(o
); break;
2856 case REDIS_LIST
: freeListObject(o
); break;
2857 case REDIS_SET
: freeSetObject(o
); break;
2858 case REDIS_ZSET
: freeZsetObject(o
); break;
2859 case REDIS_HASH
: freeHashObject(o
); break;
2860 default: redisAssert(0); break;
2862 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2863 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2864 !listAddNodeHead(server
.objfreelist
,o
))
2866 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2870 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
2871 dictEntry
*de
= dictFind(db
->dict
,key
);
2873 robj
*key
= dictGetEntryKey(de
);
2874 robj
*val
= dictGetEntryVal(de
);
2876 if (server
.vm_enabled
) {
2877 if (key
->storage
== REDIS_VM_MEMORY
||
2878 key
->storage
== REDIS_VM_SWAPPING
)
2880 /* If we were swapping the object out, stop it, this key
2882 if (key
->storage
== REDIS_VM_SWAPPING
)
2883 vmCancelThreadedIOJob(key
);
2884 /* Update the access time of the key for the aging algorithm. */
2885 key
->vm
.atime
= server
.unixtime
;
2887 int notify
= (key
->storage
== REDIS_VM_LOADING
);
2889 /* Our value was swapped on disk. Bring it at home. */
2890 redisAssert(val
== NULL
);
2891 val
= vmLoadObject(key
);
2892 dictGetEntryVal(de
) = val
;
2894 /* Clients blocked by the VM subsystem may be waiting for
2896 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
2905 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
2906 expireIfNeeded(db
,key
);
2907 return lookupKey(db
,key
);
2910 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
2911 deleteIfVolatile(db
,key
);
2912 return lookupKey(db
,key
);
2915 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
2916 robj
*o
= lookupKeyRead(c
->db
, key
);
2917 if (!o
) addReply(c
,reply
);
2921 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
2922 robj
*o
= lookupKeyWrite(c
->db
, key
);
2923 if (!o
) addReply(c
,reply
);
2927 static int checkType(redisClient
*c
, robj
*o
, int type
) {
2928 if (o
->type
!= type
) {
2929 addReply(c
,shared
.wrongtypeerr
);
2935 static int deleteKey(redisDb
*db
, robj
*key
) {
2938 /* We need to protect key from destruction: after the first dictDelete()
2939 * it may happen that 'key' is no longer valid if we don't increment
2940 * it's count. This may happen when we get the object reference directly
2941 * from the hash table with dictRandomKey() or dict iterators */
2943 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
);
2944 retval
= dictDelete(db
->dict
,key
);
2947 return retval
== DICT_OK
;
2950 /* Check if the nul-terminated string 's' can be represented by a long
2951 * (that is, is a number that fits into long without any other space or
2952 * character before or after the digits).
2954 * If so, the function returns REDIS_OK and *longval is set to the value
2955 * of the number. Otherwise REDIS_ERR is returned */
2956 static int isStringRepresentableAsLong(sds s
, long *longval
) {
2957 char buf
[32], *endptr
;
2961 value
= strtol(s
, &endptr
, 10);
2962 if (endptr
[0] != '\0') return REDIS_ERR
;
2963 slen
= snprintf(buf
,32,"%ld",value
);
2965 /* If the number converted back into a string is not identical
2966 * then it's not possible to encode the string as integer */
2967 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
2968 if (longval
) *longval
= value
;
2972 /* Try to encode a string object in order to save space */
2973 static robj
*tryObjectEncoding(robj
*o
) {
2977 if (o
->encoding
!= REDIS_ENCODING_RAW
)
2978 return o
; /* Already encoded */
2980 /* It's not safe to encode shared objects: shared objects can be shared
2981 * everywhere in the "object space" of Redis. Encoded objects can only
2982 * appear as "values" (and not, for instance, as keys) */
2983 if (o
->refcount
> 1) return o
;
2985 /* Currently we try to encode only strings */
2986 redisAssert(o
->type
== REDIS_STRING
);
2988 /* Check if we can represent this string as a long integer */
2989 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return o
;
2991 /* Ok, this object can be encoded */
2992 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
2994 incrRefCount(shared
.integers
[value
]);
2995 return shared
.integers
[value
];
2997 o
->encoding
= REDIS_ENCODING_INT
;
2999 o
->ptr
= (void*) value
;
3004 /* Get a decoded version of an encoded object (returned as a new object).
3005 * If the object is already raw-encoded just increment the ref count. */
3006 static robj
*getDecodedObject(robj
*o
) {
3009 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3013 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
3016 snprintf(buf
,32,"%ld",(long)o
->ptr
);
3017 dec
= createStringObject(buf
,strlen(buf
));
3020 redisAssert(1 != 1);
3024 /* Compare two string objects via strcmp() or alike.
3025 * Note that the objects may be integer-encoded. In such a case we
3026 * use snprintf() to get a string representation of the numbers on the stack
3027 * and compare the strings, it's much faster than calling getDecodedObject().
3029 * Important note: if objects are not integer encoded, but binary-safe strings,
3030 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
3032 static int compareStringObjects(robj
*a
, robj
*b
) {
3033 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
3034 char bufa
[128], bufb
[128], *astr
, *bstr
;
3037 if (a
== b
) return 0;
3038 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
3039 snprintf(bufa
,sizeof(bufa
),"%ld",(long) a
->ptr
);
3045 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
3046 snprintf(bufb
,sizeof(bufb
),"%ld",(long) b
->ptr
);
3052 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
3055 static size_t stringObjectLen(robj
*o
) {
3056 redisAssert(o
->type
== REDIS_STRING
);
3057 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3058 return sdslen(o
->ptr
);
3062 return snprintf(buf
,32,"%ld",(long)o
->ptr
);
3066 /*============================ RDB saving/loading =========================== */
3068 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3069 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3073 static int rdbSaveTime(FILE *fp
, time_t t
) {
3074 int32_t t32
= (int32_t) t
;
3075 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3079 /* check rdbLoadLen() comments for more info */
3080 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3081 unsigned char buf
[2];
3084 /* Save a 6 bit len */
3085 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3086 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3087 } else if (len
< (1<<14)) {
3088 /* Save a 14 bit len */
3089 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3091 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3093 /* Save a 32 bit len */
3094 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3095 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3097 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3102 /* String objects in the form "2391" "-100" without any space and with a
3103 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3104 * encoded as integers to save space */
3105 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3107 char *endptr
, buf
[32];
3109 /* Check if it's possible to encode this value as a number */
3110 value
= strtoll(s
, &endptr
, 10);
3111 if (endptr
[0] != '\0') return 0;
3112 snprintf(buf
,32,"%lld",value
);
3114 /* If the number converted back into a string is not identical
3115 * then it's not possible to encode the string as integer */
3116 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3118 /* Finally check if it fits in our ranges */
3119 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3120 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3121 enc
[1] = value
&0xFF;
3123 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3124 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3125 enc
[1] = value
&0xFF;
3126 enc
[2] = (value
>>8)&0xFF;
3128 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3129 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3130 enc
[1] = value
&0xFF;
3131 enc
[2] = (value
>>8)&0xFF;
3132 enc
[3] = (value
>>16)&0xFF;
3133 enc
[4] = (value
>>24)&0xFF;
3140 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3141 size_t comprlen
, outlen
;
3145 /* We require at least four bytes compression for this to be worth it */
3146 if (len
<= 4) return 0;
3148 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3149 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3150 if (comprlen
== 0) {
3154 /* Data compressed! Let's save it on disk */
3155 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3156 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3157 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3158 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3159 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3168 /* Save a string objet as [len][data] on disk. If the object is a string
3169 * representation of an integer value we try to safe it in a special form */
3170 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3173 /* Try integer encoding */
3175 unsigned char buf
[5];
3176 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3177 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3182 /* Try LZF compression - under 20 bytes it's unable to compress even
3183 * aaaaaaaaaaaaaaaaaa so skip it */
3184 if (server
.rdbcompression
&& len
> 20) {
3187 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3188 if (retval
== -1) return -1;
3189 if (retval
> 0) return 0;
3190 /* retval == 0 means data can't be compressed, save the old way */
3193 /* Store verbatim */
3194 if (rdbSaveLen(fp
,len
) == -1) return -1;
3195 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3199 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3200 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3203 /* Avoid incr/decr ref count business when possible.
3204 * This plays well with copy-on-write given that we are probably
3205 * in a child process (BGSAVE). Also this makes sure key objects
3206 * of swapped objects are not incRefCount-ed (an assert does not allow
3207 * this in order to avoid bugs) */
3208 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
3209 obj
= getDecodedObject(obj
);
3210 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3213 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3218 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3219 * 8 bit integer specifing the length of the representation.
3220 * This 8 bit integer has special values in order to specify the following
3226 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3227 unsigned char buf
[128];
3233 } else if (!isfinite(val
)) {
3235 buf
[0] = (val
< 0) ? 255 : 254;
3237 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3238 buf
[0] = strlen((char*)buf
+1);
3241 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3245 /* Save a Redis object. */
3246 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3247 if (o
->type
== REDIS_STRING
) {
3248 /* Save a string value */
3249 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3250 } else if (o
->type
== REDIS_LIST
) {
3251 /* Save a list value */
3252 list
*list
= o
->ptr
;
3256 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3257 listRewind(list
,&li
);
3258 while((ln
= listNext(&li
))) {
3259 robj
*eleobj
= listNodeValue(ln
);
3261 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3263 } else if (o
->type
== REDIS_SET
) {
3264 /* Save a set value */
3266 dictIterator
*di
= dictGetIterator(set
);
3269 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3270 while((de
= dictNext(di
)) != NULL
) {
3271 robj
*eleobj
= dictGetEntryKey(de
);
3273 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3275 dictReleaseIterator(di
);
3276 } else if (o
->type
== REDIS_ZSET
) {
3277 /* Save a set value */
3279 dictIterator
*di
= dictGetIterator(zs
->dict
);
3282 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3283 while((de
= dictNext(di
)) != NULL
) {
3284 robj
*eleobj
= dictGetEntryKey(de
);
3285 double *score
= dictGetEntryVal(de
);
3287 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3288 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3290 dictReleaseIterator(di
);
3291 } else if (o
->type
== REDIS_HASH
) {
3292 /* Save a hash value */
3293 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3294 unsigned char *p
= zipmapRewind(o
->ptr
);
3295 unsigned int count
= zipmapLen(o
->ptr
);
3296 unsigned char *key
, *val
;
3297 unsigned int klen
, vlen
;
3299 if (rdbSaveLen(fp
,count
) == -1) return -1;
3300 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3301 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3302 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3305 dictIterator
*di
= dictGetIterator(o
->ptr
);
3308 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3309 while((de
= dictNext(di
)) != NULL
) {
3310 robj
*key
= dictGetEntryKey(de
);
3311 robj
*val
= dictGetEntryVal(de
);
3313 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3314 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3316 dictReleaseIterator(di
);
3324 /* Return the length the object will have on disk if saved with
3325 * the rdbSaveObject() function. Currently we use a trick to get
3326 * this length with very little changes to the code. In the future
3327 * we could switch to a faster solution. */
3328 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3329 if (fp
== NULL
) fp
= server
.devnull
;
3331 assert(rdbSaveObject(fp
,o
) != 1);
3335 /* Return the number of pages required to save this object in the swap file */
3336 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3337 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3339 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3342 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3343 static int rdbSave(char *filename
) {
3344 dictIterator
*di
= NULL
;
3349 time_t now
= time(NULL
);
3351 /* Wait for I/O therads to terminate, just in case this is a
3352 * foreground-saving, to avoid seeking the swap file descriptor at the
3354 if (server
.vm_enabled
)
3355 waitEmptyIOJobsQueue();
3357 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3358 fp
= fopen(tmpfile
,"w");
3360 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3363 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3364 for (j
= 0; j
< server
.dbnum
; j
++) {
3365 redisDb
*db
= server
.db
+j
;
3367 if (dictSize(d
) == 0) continue;
3368 di
= dictGetIterator(d
);
3374 /* Write the SELECT DB opcode */
3375 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3376 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3378 /* Iterate this DB writing every entry */
3379 while((de
= dictNext(di
)) != NULL
) {
3380 robj
*key
= dictGetEntryKey(de
);
3381 robj
*o
= dictGetEntryVal(de
);
3382 time_t expiretime
= getExpire(db
,key
);
3384 /* Save the expire time */
3385 if (expiretime
!= -1) {
3386 /* If this key is already expired skip it */
3387 if (expiretime
< now
) continue;
3388 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3389 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3391 /* Save the key and associated value. This requires special
3392 * handling if the value is swapped out. */
3393 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
3394 key
->storage
== REDIS_VM_SWAPPING
) {
3395 /* Save type, key, value */
3396 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3397 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3398 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3400 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3402 /* Get a preview of the object in memory */
3403 po
= vmPreviewObject(key
);
3404 /* Save type, key, value */
3405 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
;
3406 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3407 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3408 /* Remove the loaded object from memory */
3412 dictReleaseIterator(di
);
3415 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3417 /* Make sure data will not remain on the OS's output buffers */
3422 /* Use RENAME to make sure the DB file is changed atomically only
3423 * if the generate DB file is ok. */
3424 if (rename(tmpfile
,filename
) == -1) {
3425 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3429 redisLog(REDIS_NOTICE
,"DB saved on disk");
3431 server
.lastsave
= time(NULL
);
3437 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3438 if (di
) dictReleaseIterator(di
);
3442 static int rdbSaveBackground(char *filename
) {
3445 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3446 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3447 if ((childpid
= fork()) == 0) {
3449 if (server
.vm_enabled
) vmReopenSwapFile();
3451 if (rdbSave(filename
) == REDIS_OK
) {
3458 if (childpid
== -1) {
3459 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3463 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3464 server
.bgsavechildpid
= childpid
;
3465 updateDictResizePolicy();
3468 return REDIS_OK
; /* unreached */
3471 static void rdbRemoveTempFile(pid_t childpid
) {
3474 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
3478 static int rdbLoadType(FILE *fp
) {
3480 if (fread(&type
,1,1,fp
) == 0) return -1;
3484 static time_t rdbLoadTime(FILE *fp
) {
3486 if (fread(&t32
,4,1,fp
) == 0) return -1;
3487 return (time_t) t32
;
3490 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3491 * of this file for a description of how this are stored on disk.
3493 * isencoded is set to 1 if the readed length is not actually a length but
3494 * an "encoding type", check the above comments for more info */
3495 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
3496 unsigned char buf
[2];
3500 if (isencoded
) *isencoded
= 0;
3501 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3502 type
= (buf
[0]&0xC0)>>6;
3503 if (type
== REDIS_RDB_6BITLEN
) {
3504 /* Read a 6 bit len */
3506 } else if (type
== REDIS_RDB_ENCVAL
) {
3507 /* Read a 6 bit len encoding type */
3508 if (isencoded
) *isencoded
= 1;
3510 } else if (type
== REDIS_RDB_14BITLEN
) {
3511 /* Read a 14 bit len */
3512 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3513 return ((buf
[0]&0x3F)<<8)|buf
[1];
3515 /* Read a 32 bit len */
3516 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
3521 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
) {
3522 unsigned char enc
[4];
3525 if (enctype
== REDIS_RDB_ENC_INT8
) {
3526 if (fread(enc
,1,1,fp
) == 0) return NULL
;
3527 val
= (signed char)enc
[0];
3528 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
3530 if (fread(enc
,2,1,fp
) == 0) return NULL
;
3531 v
= enc
[0]|(enc
[1]<<8);
3533 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
3535 if (fread(enc
,4,1,fp
) == 0) return NULL
;
3536 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
3539 val
= 0; /* anti-warning */
3542 return createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",val
));
3545 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
3546 unsigned int len
, clen
;
3547 unsigned char *c
= NULL
;
3550 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3551 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3552 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
3553 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
3554 if (fread(c
,clen
,1,fp
) == 0) goto err
;
3555 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
3557 return createObject(REDIS_STRING
,val
);
3564 static robj
*rdbLoadStringObject(FILE*fp
) {
3569 len
= rdbLoadLen(fp
,&isencoded
);
3572 case REDIS_RDB_ENC_INT8
:
3573 case REDIS_RDB_ENC_INT16
:
3574 case REDIS_RDB_ENC_INT32
:
3575 return rdbLoadIntegerObject(fp
,len
);
3576 case REDIS_RDB_ENC_LZF
:
3577 return rdbLoadLzfStringObject(fp
);
3583 if (len
== REDIS_RDB_LENERR
) return NULL
;
3584 val
= sdsnewlen(NULL
,len
);
3585 if (len
&& fread(val
,len
,1,fp
) == 0) {
3589 return createObject(REDIS_STRING
,val
);
3592 /* For information about double serialization check rdbSaveDoubleValue() */
3593 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
3597 if (fread(&len
,1,1,fp
) == 0) return -1;
3599 case 255: *val
= R_NegInf
; return 0;
3600 case 254: *val
= R_PosInf
; return 0;
3601 case 253: *val
= R_Nan
; return 0;
3603 if (fread(buf
,len
,1,fp
) == 0) return -1;
3605 sscanf(buf
, "%lg", val
);
3610 /* Load a Redis object of the specified type from the specified file.
3611 * On success a newly allocated object is returned, otherwise NULL. */
3612 static robj
*rdbLoadObject(int type
, FILE *fp
) {
3615 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
3616 if (type
== REDIS_STRING
) {
3617 /* Read string value */
3618 if ((o
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3619 o
= tryObjectEncoding(o
);
3620 } else if (type
== REDIS_LIST
|| type
== REDIS_SET
) {
3621 /* Read list/set value */
3624 if ((listlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3625 o
= (type
== REDIS_LIST
) ? createListObject() : createSetObject();
3626 /* It's faster to expand the dict to the right size asap in order
3627 * to avoid rehashing */
3628 if (type
== REDIS_SET
&& listlen
> DICT_HT_INITIAL_SIZE
)
3629 dictExpand(o
->ptr
,listlen
);
3630 /* Load every single element of the list/set */
3634 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3635 ele
= tryObjectEncoding(ele
);
3636 if (type
== REDIS_LIST
) {
3637 listAddNodeTail((list
*)o
->ptr
,ele
);
3639 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
3642 } else if (type
== REDIS_ZSET
) {
3643 /* Read list/set value */
3647 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3648 o
= createZsetObject();
3650 /* Load every single element of the list/set */
3653 double *score
= zmalloc(sizeof(double));
3655 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3656 ele
= tryObjectEncoding(ele
);
3657 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
3658 dictAdd(zs
->dict
,ele
,score
);
3659 zslInsert(zs
->zsl
,*score
,ele
);
3660 incrRefCount(ele
); /* added to skiplist */
3662 } else if (type
== REDIS_HASH
) {
3665 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3666 o
= createHashObject();
3667 /* Too many entries? Use an hash table. */
3668 if (hashlen
> server
.hash_max_zipmap_entries
)
3669 convertToRealHash(o
);
3670 /* Load every key/value, then set it into the zipmap or hash
3671 * table, as needed. */
3675 if ((key
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3676 if ((val
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3677 /* If we are using a zipmap and there are too big values
3678 * the object is converted to real hash table encoding. */
3679 if (o
->encoding
!= REDIS_ENCODING_HT
&&
3680 (sdslen(key
->ptr
) > server
.hash_max_zipmap_value
||
3681 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
))
3683 convertToRealHash(o
);
3686 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3687 unsigned char *zm
= o
->ptr
;
3689 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
3690 val
->ptr
,sdslen(val
->ptr
),NULL
);
3695 key
= tryObjectEncoding(key
);
3696 val
= tryObjectEncoding(val
);
3697 dictAdd((dict
*)o
->ptr
,key
,val
);
3706 static int rdbLoad(char *filename
) {
3708 robj
*keyobj
= NULL
;
3710 int type
, retval
, rdbver
;
3711 dict
*d
= server
.db
[0].dict
;
3712 redisDb
*db
= server
.db
+0;
3714 time_t expiretime
= -1, now
= time(NULL
);
3715 long long loadedkeys
= 0;
3717 fp
= fopen(filename
,"r");
3718 if (!fp
) return REDIS_ERR
;
3719 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
3721 if (memcmp(buf
,"REDIS",5) != 0) {
3723 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
3726 rdbver
= atoi(buf
+5);
3729 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
3736 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3737 if (type
== REDIS_EXPIRETIME
) {
3738 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
3739 /* We read the time so we need to read the object type again */
3740 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3742 if (type
== REDIS_EOF
) break;
3743 /* Handle SELECT DB opcode as a special case */
3744 if (type
== REDIS_SELECTDB
) {
3745 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
3747 if (dbid
>= (unsigned)server
.dbnum
) {
3748 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
3751 db
= server
.db
+dbid
;
3756 if ((keyobj
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
3758 if ((o
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
3759 /* Add the new object in the hash table */
3760 retval
= dictAdd(d
,keyobj
,o
);
3761 if (retval
== DICT_ERR
) {
3762 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj
->ptr
);
3765 /* Set the expire time if needed */
3766 if (expiretime
!= -1) {
3767 setExpire(db
,keyobj
,expiretime
);
3768 /* Delete this key if already expired */
3769 if (expiretime
< now
) deleteKey(db
,keyobj
);
3773 /* Handle swapping while loading big datasets when VM is on */
3775 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
3776 while (zmalloc_used_memory() > server
.vm_max_memory
) {
3777 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
3784 eoferr
: /* unexpected end of file is handled here with a fatal exit */
3785 if (keyobj
) decrRefCount(keyobj
);
3786 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
3788 return REDIS_ERR
; /* Just to avoid warning */
3791 /*================================== Commands =============================== */
3793 static void authCommand(redisClient
*c
) {
3794 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
3795 c
->authenticated
= 1;
3796 addReply(c
,shared
.ok
);
3798 c
->authenticated
= 0;
3799 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
3803 static void pingCommand(redisClient
*c
) {
3804 addReply(c
,shared
.pong
);
3807 static void echoCommand(redisClient
*c
) {
3808 addReplyBulk(c
,c
->argv
[1]);
3811 /*=================================== Strings =============================== */
3813 static void setGenericCommand(redisClient
*c
, int nx
) {
3816 if (nx
) deleteIfVolatile(c
->db
,c
->argv
[1]);
3817 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3818 if (retval
== DICT_ERR
) {
3820 /* If the key is about a swapped value, we want a new key object
3821 * to overwrite the old. So we delete the old key in the database.
3822 * This will also make sure that swap pages about the old object
3823 * will be marked as free. */
3824 if (server
.vm_enabled
&& deleteIfSwapped(c
->db
,c
->argv
[1]))
3825 incrRefCount(c
->argv
[1]);
3826 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3827 incrRefCount(c
->argv
[2]);
3829 addReply(c
,shared
.czero
);
3833 incrRefCount(c
->argv
[1]);
3834 incrRefCount(c
->argv
[2]);
3837 removeExpire(c
->db
,c
->argv
[1]);
3838 addReply(c
, nx
? shared
.cone
: shared
.ok
);
3841 static void setCommand(redisClient
*c
) {
3842 setGenericCommand(c
,0);
3845 static void setnxCommand(redisClient
*c
) {
3846 setGenericCommand(c
,1);
3849 static int getGenericCommand(redisClient
*c
) {
3852 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
3855 if (o
->type
!= REDIS_STRING
) {
3856 addReply(c
,shared
.wrongtypeerr
);
3864 static void getCommand(redisClient
*c
) {
3865 getGenericCommand(c
);
3868 static void getsetCommand(redisClient
*c
) {
3869 if (getGenericCommand(c
) == REDIS_ERR
) return;
3870 if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) {
3871 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3873 incrRefCount(c
->argv
[1]);
3875 incrRefCount(c
->argv
[2]);
3877 removeExpire(c
->db
,c
->argv
[1]);
3880 static void mgetCommand(redisClient
*c
) {
3883 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
3884 for (j
= 1; j
< c
->argc
; j
++) {
3885 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
3887 addReply(c
,shared
.nullbulk
);
3889 if (o
->type
!= REDIS_STRING
) {
3890 addReply(c
,shared
.nullbulk
);
3898 static void msetGenericCommand(redisClient
*c
, int nx
) {
3899 int j
, busykeys
= 0;
3901 if ((c
->argc
% 2) == 0) {
3902 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
3905 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
3906 * set nothing at all if at least one already key exists. */
3908 for (j
= 1; j
< c
->argc
; j
+= 2) {
3909 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
3915 addReply(c
, shared
.czero
);
3919 for (j
= 1; j
< c
->argc
; j
+= 2) {
3922 c
->argv
[j
+1] = tryObjectEncoding(c
->argv
[j
+1]);
3923 retval
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
3924 if (retval
== DICT_ERR
) {
3925 dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
3926 incrRefCount(c
->argv
[j
+1]);
3928 incrRefCount(c
->argv
[j
]);
3929 incrRefCount(c
->argv
[j
+1]);
3931 removeExpire(c
->db
,c
->argv
[j
]);
3933 server
.dirty
+= (c
->argc
-1)/2;
3934 addReply(c
, nx
? shared
.cone
: shared
.ok
);
3937 static void msetCommand(redisClient
*c
) {
3938 msetGenericCommand(c
,0);
3941 static void msetnxCommand(redisClient
*c
) {
3942 msetGenericCommand(c
,1);
3945 static void incrDecrCommand(redisClient
*c
, long long incr
) {
3950 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
3954 if (o
->type
!= REDIS_STRING
) {
3959 if (o
->encoding
== REDIS_ENCODING_RAW
)
3960 value
= strtoll(o
->ptr
, &eptr
, 10);
3961 else if (o
->encoding
== REDIS_ENCODING_INT
)
3962 value
= (long)o
->ptr
;
3964 redisAssert(1 != 1);
3969 o
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
3970 o
= tryObjectEncoding(o
);
3971 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],o
);
3972 if (retval
== DICT_ERR
) {
3973 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
3974 removeExpire(c
->db
,c
->argv
[1]);
3976 incrRefCount(c
->argv
[1]);
3979 addReply(c
,shared
.colon
);
3981 addReply(c
,shared
.crlf
);
3984 static void incrCommand(redisClient
*c
) {
3985 incrDecrCommand(c
,1);
3988 static void decrCommand(redisClient
*c
) {
3989 incrDecrCommand(c
,-1);
3992 static void incrbyCommand(redisClient
*c
) {
3993 long long incr
= strtoll(c
->argv
[2]->ptr
, NULL
, 10);
3994 incrDecrCommand(c
,incr
);
3997 static void decrbyCommand(redisClient
*c
) {
3998 long long incr
= strtoll(c
->argv
[2]->ptr
, NULL
, 10);
3999 incrDecrCommand(c
,-incr
);
4002 static void appendCommand(redisClient
*c
) {
4007 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4009 /* Create the key */
4010 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4011 incrRefCount(c
->argv
[1]);
4012 incrRefCount(c
->argv
[2]);
4013 totlen
= stringObjectLen(c
->argv
[2]);
4017 de
= dictFind(c
->db
->dict
,c
->argv
[1]);
4020 o
= dictGetEntryVal(de
);
4021 if (o
->type
!= REDIS_STRING
) {
4022 addReply(c
,shared
.wrongtypeerr
);
4025 /* If the object is specially encoded or shared we have to make
4027 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
4028 robj
*decoded
= getDecodedObject(o
);
4030 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
4031 decrRefCount(decoded
);
4032 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4035 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
4036 o
->ptr
= sdscatlen(o
->ptr
,
4037 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
4039 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
4040 (unsigned long) c
->argv
[2]->ptr
);
4042 totlen
= sdslen(o
->ptr
);
4045 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
4048 static void substrCommand(redisClient
*c
) {
4050 long start
= atoi(c
->argv
[2]->ptr
);
4051 long end
= atoi(c
->argv
[3]->ptr
);
4052 size_t rangelen
, strlen
;
4055 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4056 checkType(c
,o
,REDIS_STRING
)) return;
4058 o
= getDecodedObject(o
);
4059 strlen
= sdslen(o
->ptr
);
4061 /* convert negative indexes */
4062 if (start
< 0) start
= strlen
+start
;
4063 if (end
< 0) end
= strlen
+end
;
4064 if (start
< 0) start
= 0;
4065 if (end
< 0) end
= 0;
4067 /* indexes sanity checks */
4068 if (start
> end
|| (size_t)start
>= strlen
) {
4069 /* Out of range start or start > end result in null reply */
4070 addReply(c
,shared
.nullbulk
);
4074 if ((size_t)end
>= strlen
) end
= strlen
-1;
4075 rangelen
= (end
-start
)+1;
4077 /* Return the result */
4078 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4079 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4080 addReplySds(c
,range
);
4081 addReply(c
,shared
.crlf
);
4085 /* ========================= Type agnostic commands ========================= */
4087 static void delCommand(redisClient
*c
) {
4090 for (j
= 1; j
< c
->argc
; j
++) {
4091 if (deleteKey(c
->db
,c
->argv
[j
])) {
4096 addReplyLong(c
,deleted
);
4099 static void existsCommand(redisClient
*c
) {
4100 addReply(c
,lookupKeyRead(c
->db
,c
->argv
[1]) ? shared
.cone
: shared
.czero
);
4103 static void selectCommand(redisClient
*c
) {
4104 int id
= atoi(c
->argv
[1]->ptr
);
4106 if (selectDb(c
,id
) == REDIS_ERR
) {
4107 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4109 addReply(c
,shared
.ok
);
4113 static void randomkeyCommand(redisClient
*c
) {
4117 de
= dictGetRandomKey(c
->db
->dict
);
4118 if (!de
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break;
4121 addReply(c
,shared
.plus
);
4122 addReply(c
,shared
.crlf
);
4124 addReply(c
,shared
.plus
);
4125 addReply(c
,dictGetEntryKey(de
));
4126 addReply(c
,shared
.crlf
);
4130 static void keysCommand(redisClient
*c
) {
4133 sds pattern
= c
->argv
[1]->ptr
;
4134 int plen
= sdslen(pattern
);
4135 unsigned long numkeys
= 0;
4136 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4138 di
= dictGetIterator(c
->db
->dict
);
4140 decrRefCount(lenobj
);
4141 while((de
= dictNext(di
)) != NULL
) {
4142 robj
*keyobj
= dictGetEntryKey(de
);
4144 sds key
= keyobj
->ptr
;
4145 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4146 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4147 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4148 addReplyBulk(c
,keyobj
);
4153 dictReleaseIterator(di
);
4154 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4157 static void dbsizeCommand(redisClient
*c
) {
4159 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4162 static void lastsaveCommand(redisClient
*c
) {
4164 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4167 static void typeCommand(redisClient
*c
) {
4171 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4176 case REDIS_STRING
: type
= "+string"; break;
4177 case REDIS_LIST
: type
= "+list"; break;
4178 case REDIS_SET
: type
= "+set"; break;
4179 case REDIS_ZSET
: type
= "+zset"; break;
4180 case REDIS_HASH
: type
= "+hash"; break;
4181 default: type
= "+unknown"; break;
4184 addReplySds(c
,sdsnew(type
));
4185 addReply(c
,shared
.crlf
);
4188 static void saveCommand(redisClient
*c
) {
4189 if (server
.bgsavechildpid
!= -1) {
4190 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4193 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4194 addReply(c
,shared
.ok
);
4196 addReply(c
,shared
.err
);
4200 static void bgsaveCommand(redisClient
*c
) {
4201 if (server
.bgsavechildpid
!= -1) {
4202 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4205 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4206 char *status
= "+Background saving started\r\n";
4207 addReplySds(c
,sdsnew(status
));
4209 addReply(c
,shared
.err
);
4213 static void shutdownCommand(redisClient
*c
) {
4214 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4215 /* Kill the saving child if there is a background saving in progress.
4216 We want to avoid race conditions, for instance our saving child may
4217 overwrite the synchronous saving did by SHUTDOWN. */
4218 if (server
.bgsavechildpid
!= -1) {
4219 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4220 kill(server
.bgsavechildpid
,SIGKILL
);
4221 rdbRemoveTempFile(server
.bgsavechildpid
);
4223 if (server
.appendonly
) {
4224 /* Append only file: fsync() the AOF and exit */
4225 fsync(server
.appendfd
);
4226 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4229 /* Snapshotting. Perform a SYNC SAVE and exit */
4230 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4231 if (server
.daemonize
)
4232 unlink(server
.pidfile
);
4233 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4234 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4235 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4238 /* Ooops.. error saving! The best we can do is to continue
4239 * operating. Note that if there was a background saving process,
4240 * in the next cron() Redis will be notified that the background
4241 * saving aborted, handling special stuff like slaves pending for
4242 * synchronization... */
4243 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4245 sdsnew("-ERR can't quit, problems saving the DB\r\n"));
4250 static void renameGenericCommand(redisClient
*c
, int nx
) {
4253 /* To use the same key as src and dst is probably an error */
4254 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4255 addReply(c
,shared
.sameobjecterr
);
4259 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4263 deleteIfVolatile(c
->db
,c
->argv
[2]);
4264 if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) {
4267 addReply(c
,shared
.czero
);
4270 dictReplace(c
->db
->dict
,c
->argv
[2],o
);
4272 incrRefCount(c
->argv
[2]);
4274 deleteKey(c
->db
,c
->argv
[1]);
4276 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4279 static void renameCommand(redisClient
*c
) {
4280 renameGenericCommand(c
,0);
4283 static void renamenxCommand(redisClient
*c
) {
4284 renameGenericCommand(c
,1);
4287 static void moveCommand(redisClient
*c
) {
4292 /* Obtain source and target DB pointers */
4295 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4296 addReply(c
,shared
.outofrangeerr
);
4300 selectDb(c
,srcid
); /* Back to the source DB */
4302 /* If the user is moving using as target the same
4303 * DB as the source DB it is probably an error. */
4305 addReply(c
,shared
.sameobjecterr
);
4309 /* Check if the element exists and get a reference */
4310 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4312 addReply(c
,shared
.czero
);
4316 /* Try to add the element to the target DB */
4317 deleteIfVolatile(dst
,c
->argv
[1]);
4318 if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) {
4319 addReply(c
,shared
.czero
);
4322 incrRefCount(c
->argv
[1]);
4325 /* OK! key moved, free the entry in the source DB */
4326 deleteKey(src
,c
->argv
[1]);
4328 addReply(c
,shared
.cone
);
4331 /* =================================== Lists ================================ */
4332 static void pushGenericCommand(redisClient
*c
, int where
) {
4336 lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4338 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4339 addReply(c
,shared
.cone
);
4342 lobj
= createListObject();
4344 if (where
== REDIS_HEAD
) {
4345 listAddNodeHead(list
,c
->argv
[2]);
4347 listAddNodeTail(list
,c
->argv
[2]);
4349 dictAdd(c
->db
->dict
,c
->argv
[1],lobj
);
4350 incrRefCount(c
->argv
[1]);
4351 incrRefCount(c
->argv
[2]);
4353 if (lobj
->type
!= REDIS_LIST
) {
4354 addReply(c
,shared
.wrongtypeerr
);
4357 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4358 addReply(c
,shared
.cone
);
4362 if (where
== REDIS_HEAD
) {
4363 listAddNodeHead(list
,c
->argv
[2]);
4365 listAddNodeTail(list
,c
->argv
[2]);
4367 incrRefCount(c
->argv
[2]);
4370 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",listLength(list
)));
4373 static void lpushCommand(redisClient
*c
) {
4374 pushGenericCommand(c
,REDIS_HEAD
);
4377 static void rpushCommand(redisClient
*c
) {
4378 pushGenericCommand(c
,REDIS_TAIL
);
4381 static void llenCommand(redisClient
*c
) {
4385 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4386 checkType(c
,o
,REDIS_LIST
)) return;
4389 addReplyUlong(c
,listLength(l
));
4392 static void lindexCommand(redisClient
*c
) {
4394 int index
= atoi(c
->argv
[2]->ptr
);
4398 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4399 checkType(c
,o
,REDIS_LIST
)) return;
4402 ln
= listIndex(list
, index
);
4404 addReply(c
,shared
.nullbulk
);
4406 robj
*ele
= listNodeValue(ln
);
4407 addReplyBulk(c
,ele
);
4411 static void lsetCommand(redisClient
*c
) {
4413 int index
= atoi(c
->argv
[2]->ptr
);
4417 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
||
4418 checkType(c
,o
,REDIS_LIST
)) return;
4421 ln
= listIndex(list
, index
);
4423 addReply(c
,shared
.outofrangeerr
);
4425 robj
*ele
= listNodeValue(ln
);
4428 listNodeValue(ln
) = c
->argv
[3];
4429 incrRefCount(c
->argv
[3]);
4430 addReply(c
,shared
.ok
);
4435 static void popGenericCommand(redisClient
*c
, int where
) {
4440 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4441 checkType(c
,o
,REDIS_LIST
)) return;
4444 if (where
== REDIS_HEAD
)
4445 ln
= listFirst(list
);
4447 ln
= listLast(list
);
4450 addReply(c
,shared
.nullbulk
);
4452 robj
*ele
= listNodeValue(ln
);
4453 addReplyBulk(c
,ele
);
4454 listDelNode(list
,ln
);
4455 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4460 static void lpopCommand(redisClient
*c
) {
4461 popGenericCommand(c
,REDIS_HEAD
);
4464 static void rpopCommand(redisClient
*c
) {
4465 popGenericCommand(c
,REDIS_TAIL
);
4468 static void lrangeCommand(redisClient
*c
) {
4470 int start
= atoi(c
->argv
[2]->ptr
);
4471 int end
= atoi(c
->argv
[3]->ptr
);
4478 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL
||
4479 checkType(c
,o
,REDIS_LIST
)) return;
4481 llen
= listLength(list
);
4483 /* convert negative indexes */
4484 if (start
< 0) start
= llen
+start
;
4485 if (end
< 0) end
= llen
+end
;
4486 if (start
< 0) start
= 0;
4487 if (end
< 0) end
= 0;
4489 /* indexes sanity checks */
4490 if (start
> end
|| start
>= llen
) {
4491 /* Out of range start or start > end result in empty list */
4492 addReply(c
,shared
.emptymultibulk
);
4495 if (end
>= llen
) end
= llen
-1;
4496 rangelen
= (end
-start
)+1;
4498 /* Return the result in form of a multi-bulk reply */
4499 ln
= listIndex(list
, start
);
4500 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
4501 for (j
= 0; j
< rangelen
; j
++) {
4502 ele
= listNodeValue(ln
);
4503 addReplyBulk(c
,ele
);
4508 static void ltrimCommand(redisClient
*c
) {
4510 int start
= atoi(c
->argv
[2]->ptr
);
4511 int end
= atoi(c
->argv
[3]->ptr
);
4513 int j
, ltrim
, rtrim
;
4517 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
4518 checkType(c
,o
,REDIS_LIST
)) return;
4520 llen
= listLength(list
);
4522 /* convert negative indexes */
4523 if (start
< 0) start
= llen
+start
;
4524 if (end
< 0) end
= llen
+end
;
4525 if (start
< 0) start
= 0;
4526 if (end
< 0) end
= 0;
4528 /* indexes sanity checks */
4529 if (start
> end
|| start
>= llen
) {
4530 /* Out of range start or start > end result in empty list */
4534 if (end
>= llen
) end
= llen
-1;
4539 /* Remove list elements to perform the trim */
4540 for (j
= 0; j
< ltrim
; j
++) {
4541 ln
= listFirst(list
);
4542 listDelNode(list
,ln
);
4544 for (j
= 0; j
< rtrim
; j
++) {
4545 ln
= listLast(list
);
4546 listDelNode(list
,ln
);
4548 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4550 addReply(c
,shared
.ok
);
4553 static void lremCommand(redisClient
*c
) {
4556 listNode
*ln
, *next
;
4557 int toremove
= atoi(c
->argv
[2]->ptr
);
4561 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4562 checkType(c
,o
,REDIS_LIST
)) return;
4566 toremove
= -toremove
;
4569 ln
= fromtail
? list
->tail
: list
->head
;
4571 robj
*ele
= listNodeValue(ln
);
4573 next
= fromtail
? ln
->prev
: ln
->next
;
4574 if (compareStringObjects(ele
,c
->argv
[3]) == 0) {
4575 listDelNode(list
,ln
);
4578 if (toremove
&& removed
== toremove
) break;
4582 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4583 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
4586 /* This is the semantic of this command:
4587 * RPOPLPUSH srclist dstlist:
4588 * IF LLEN(srclist) > 0
4589 * element = RPOP srclist
4590 * LPUSH dstlist element
4597 * The idea is to be able to get an element from a list in a reliable way
4598 * since the element is not just returned but pushed against another list
4599 * as well. This command was originally proposed by Ezra Zygmuntowicz.
4601 static void rpoplpushcommand(redisClient
*c
) {
4606 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4607 checkType(c
,sobj
,REDIS_LIST
)) return;
4608 srclist
= sobj
->ptr
;
4609 ln
= listLast(srclist
);
4612 addReply(c
,shared
.nullbulk
);
4614 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4615 robj
*ele
= listNodeValue(ln
);
4618 if (dobj
&& dobj
->type
!= REDIS_LIST
) {
4619 addReply(c
,shared
.wrongtypeerr
);
4623 /* Add the element to the target list (unless it's directly
4624 * passed to some BLPOP-ing client */
4625 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) {
4627 /* Create the list if the key does not exist */
4628 dobj
= createListObject();
4629 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
);
4630 incrRefCount(c
->argv
[2]);
4632 dstlist
= dobj
->ptr
;
4633 listAddNodeHead(dstlist
,ele
);
4637 /* Send the element to the client as reply as well */
4638 addReplyBulk(c
,ele
);
4640 /* Finally remove the element from the source list */
4641 listDelNode(srclist
,ln
);
4642 if (listLength(srclist
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4647 /* ==================================== Sets ================================ */
4649 static void saddCommand(redisClient
*c
) {
4652 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4654 set
= createSetObject();
4655 dictAdd(c
->db
->dict
,c
->argv
[1],set
);
4656 incrRefCount(c
->argv
[1]);
4658 if (set
->type
!= REDIS_SET
) {
4659 addReply(c
,shared
.wrongtypeerr
);
4663 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
4664 incrRefCount(c
->argv
[2]);
4666 addReply(c
,shared
.cone
);
4668 addReply(c
,shared
.czero
);
4672 static void sremCommand(redisClient
*c
) {
4675 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4676 checkType(c
,set
,REDIS_SET
)) return;
4678 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
4680 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4681 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4682 addReply(c
,shared
.cone
);
4684 addReply(c
,shared
.czero
);
4688 static void smoveCommand(redisClient
*c
) {
4689 robj
*srcset
, *dstset
;
4691 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4692 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4694 /* If the source key does not exist return 0, if it's of the wrong type
4696 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
4697 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
4700 /* Error if the destination key is not a set as well */
4701 if (dstset
&& dstset
->type
!= REDIS_SET
) {
4702 addReply(c
,shared
.wrongtypeerr
);
4705 /* Remove the element from the source set */
4706 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
4707 /* Key not found in the src set! return zero */
4708 addReply(c
,shared
.czero
);
4711 if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset
!= dstset
)
4712 deleteKey(c
->db
,c
->argv
[1]);
4714 /* Add the element to the destination set */
4716 dstset
= createSetObject();
4717 dictAdd(c
->db
->dict
,c
->argv
[2],dstset
);
4718 incrRefCount(c
->argv
[2]);
4720 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
4721 incrRefCount(c
->argv
[3]);
4722 addReply(c
,shared
.cone
);
4725 static void sismemberCommand(redisClient
*c
) {
4728 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4729 checkType(c
,set
,REDIS_SET
)) return;
4731 if (dictFind(set
->ptr
,c
->argv
[2]))
4732 addReply(c
,shared
.cone
);
4734 addReply(c
,shared
.czero
);
4737 static void scardCommand(redisClient
*c
) {
4741 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4742 checkType(c
,o
,REDIS_SET
)) return;
4745 addReplyUlong(c
,dictSize(s
));
4748 static void spopCommand(redisClient
*c
) {
4752 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4753 checkType(c
,set
,REDIS_SET
)) return;
4755 de
= dictGetRandomKey(set
->ptr
);
4757 addReply(c
,shared
.nullbulk
);
4759 robj
*ele
= dictGetEntryKey(de
);
4761 addReplyBulk(c
,ele
);
4762 dictDelete(set
->ptr
,ele
);
4763 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4764 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4769 static void srandmemberCommand(redisClient
*c
) {
4773 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4774 checkType(c
,set
,REDIS_SET
)) return;
4776 de
= dictGetRandomKey(set
->ptr
);
4778 addReply(c
,shared
.nullbulk
);
4780 robj
*ele
= dictGetEntryKey(de
);
4782 addReplyBulk(c
,ele
);
4786 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
4787 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
4789 return dictSize(*d1
)-dictSize(*d2
);
4792 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
4793 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4796 robj
*lenobj
= NULL
, *dstset
= NULL
;
4797 unsigned long j
, cardinality
= 0;
4799 for (j
= 0; j
< setsnum
; j
++) {
4803 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4804 lookupKeyRead(c
->db
,setskeys
[j
]);
4808 if (deleteKey(c
->db
,dstkey
))
4810 addReply(c
,shared
.czero
);
4812 addReply(c
,shared
.nullmultibulk
);
4816 if (setobj
->type
!= REDIS_SET
) {
4818 addReply(c
,shared
.wrongtypeerr
);
4821 dv
[j
] = setobj
->ptr
;
4823 /* Sort sets from the smallest to largest, this will improve our
4824 * algorithm's performace */
4825 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
4827 /* The first thing we should output is the total number of elements...
4828 * since this is a multi-bulk write, but at this stage we don't know
4829 * the intersection set size, so we use a trick, append an empty object
4830 * to the output list and save the pointer to later modify it with the
4833 lenobj
= createObject(REDIS_STRING
,NULL
);
4835 decrRefCount(lenobj
);
4837 /* If we have a target key where to store the resulting set
4838 * create this key with an empty set inside */
4839 dstset
= createSetObject();
4842 /* Iterate all the elements of the first (smallest) set, and test
4843 * the element against all the other sets, if at least one set does
4844 * not include the element it is discarded */
4845 di
= dictGetIterator(dv
[0]);
4847 while((de
= dictNext(di
)) != NULL
) {
4850 for (j
= 1; j
< setsnum
; j
++)
4851 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
4853 continue; /* at least one set does not contain the member */
4854 ele
= dictGetEntryKey(de
);
4856 addReplyBulk(c
,ele
);
4859 dictAdd(dstset
->ptr
,ele
,NULL
);
4863 dictReleaseIterator(di
);
4866 /* Store the resulting set into the target, if the intersection
4867 * is not an empty set. */
4868 deleteKey(c
->db
,dstkey
);
4869 if (dictSize((dict
*)dstset
->ptr
) > 0) {
4870 dictAdd(c
->db
->dict
,dstkey
,dstset
);
4871 incrRefCount(dstkey
);
4872 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
4874 decrRefCount(dstset
);
4875 addReply(c
,shared
.czero
);
4879 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
4884 static void sinterCommand(redisClient
*c
) {
4885 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
4888 static void sinterstoreCommand(redisClient
*c
) {
4889 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
4892 #define REDIS_OP_UNION 0
4893 #define REDIS_OP_DIFF 1
4894 #define REDIS_OP_INTER 2
4896 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
4897 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4900 robj
*dstset
= NULL
;
4901 int j
, cardinality
= 0;
4903 for (j
= 0; j
< setsnum
; j
++) {
4907 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4908 lookupKeyRead(c
->db
,setskeys
[j
]);
4913 if (setobj
->type
!= REDIS_SET
) {
4915 addReply(c
,shared
.wrongtypeerr
);
4918 dv
[j
] = setobj
->ptr
;
4921 /* We need a temp set object to store our union. If the dstkey
4922 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
4923 * this set object will be the resulting object to set into the target key*/
4924 dstset
= createSetObject();
4926 /* Iterate all the elements of all the sets, add every element a single
4927 * time to the result set */
4928 for (j
= 0; j
< setsnum
; j
++) {
4929 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
4930 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
4932 di
= dictGetIterator(dv
[j
]);
4934 while((de
= dictNext(di
)) != NULL
) {
4937 /* dictAdd will not add the same element multiple times */
4938 ele
= dictGetEntryKey(de
);
4939 if (op
== REDIS_OP_UNION
|| j
== 0) {
4940 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
4944 } else if (op
== REDIS_OP_DIFF
) {
4945 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
4950 dictReleaseIterator(di
);
4952 /* result set is empty? Exit asap. */
4953 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
4956 /* Output the content of the resulting set, if not in STORE mode */
4958 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
4959 di
= dictGetIterator(dstset
->ptr
);
4960 while((de
= dictNext(di
)) != NULL
) {
4963 ele
= dictGetEntryKey(de
);
4964 addReplyBulk(c
,ele
);
4966 dictReleaseIterator(di
);
4967 decrRefCount(dstset
);
4969 /* If we have a target key where to store the resulting set
4970 * create this key with the result set inside */
4971 deleteKey(c
->db
,dstkey
);
4972 if (dictSize((dict
*)dstset
->ptr
) > 0) {
4973 dictAdd(c
->db
->dict
,dstkey
,dstset
);
4974 incrRefCount(dstkey
);
4975 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
4977 decrRefCount(dstset
);
4978 addReply(c
,shared
.czero
);
4985 static void sunionCommand(redisClient
*c
) {
4986 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
4989 static void sunionstoreCommand(redisClient
*c
) {
4990 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
4993 static void sdiffCommand(redisClient
*c
) {
4994 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
4997 static void sdiffstoreCommand(redisClient
*c
) {
4998 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
5001 /* ==================================== ZSets =============================== */
5003 /* ZSETs are ordered sets using two data structures to hold the same elements
5004 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
5007 * The elements are added to an hash table mapping Redis objects to scores.
5008 * At the same time the elements are added to a skip list mapping scores
5009 * to Redis objects (so objects are sorted by scores in this "view"). */
5011 /* This skiplist implementation is almost a C translation of the original
5012 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
5013 * Alternative to Balanced Trees", modified in three ways:
5014 * a) this implementation allows for repeated values.
5015 * b) the comparison is not just by key (our 'score') but by satellite data.
5016 * c) there is a back pointer, so it's a doubly linked list with the back
5017 * pointers being only at "level 1". This allows to traverse the list
5018 * from tail to head, useful for ZREVRANGE. */
5020 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
5021 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
5023 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
5025 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
5031 static zskiplist
*zslCreate(void) {
5035 zsl
= zmalloc(sizeof(*zsl
));
5038 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
5039 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
5040 zsl
->header
->forward
[j
] = NULL
;
5042 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
5043 if (j
< ZSKIPLIST_MAXLEVEL
-1)
5044 zsl
->header
->span
[j
] = 0;
5046 zsl
->header
->backward
= NULL
;
5051 static void zslFreeNode(zskiplistNode
*node
) {
5052 decrRefCount(node
->obj
);
5053 zfree(node
->forward
);
5058 static void zslFree(zskiplist
*zsl
) {
5059 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
5061 zfree(zsl
->header
->forward
);
5062 zfree(zsl
->header
->span
);
5065 next
= node
->forward
[0];
5072 static int zslRandomLevel(void) {
5074 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
5076 return (level
<ZSKIPLIST_MAXLEVEL
) ? level
: ZSKIPLIST_MAXLEVEL
;
5079 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
5080 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5081 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
5085 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5086 /* store rank that is crossed to reach the insert position */
5087 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
5089 while (x
->forward
[i
] &&
5090 (x
->forward
[i
]->score
< score
||
5091 (x
->forward
[i
]->score
== score
&&
5092 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
5093 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
5098 /* we assume the key is not already inside, since we allow duplicated
5099 * scores, and the re-insertion of score and redis object should never
5100 * happpen since the caller of zslInsert() should test in the hash table
5101 * if the element is already inside or not. */
5102 level
= zslRandomLevel();
5103 if (level
> zsl
->level
) {
5104 for (i
= zsl
->level
; i
< level
; i
++) {
5106 update
[i
] = zsl
->header
;
5107 update
[i
]->span
[i
-1] = zsl
->length
;
5111 x
= zslCreateNode(level
,score
,obj
);
5112 for (i
= 0; i
< level
; i
++) {
5113 x
->forward
[i
] = update
[i
]->forward
[i
];
5114 update
[i
]->forward
[i
] = x
;
5116 /* update span covered by update[i] as x is inserted here */
5118 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5119 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5123 /* increment span for untouched levels */
5124 for (i
= level
; i
< zsl
->level
; i
++) {
5125 update
[i
]->span
[i
-1]++;
5128 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
5130 x
->forward
[0]->backward
= x
;
5136 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
5137 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
5139 for (i
= 0; i
< zsl
->level
; i
++) {
5140 if (update
[i
]->forward
[i
] == x
) {
5142 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5144 update
[i
]->forward
[i
] = x
->forward
[i
];
5146 /* invariant: i > 0, because update[0]->forward[0]
5147 * is always equal to x */
5148 update
[i
]->span
[i
-1] -= 1;
5151 if (x
->forward
[0]) {
5152 x
->forward
[0]->backward
= x
->backward
;
5154 zsl
->tail
= x
->backward
;
5156 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5161 /* Delete an element with matching score/object from the skiplist. */
5162 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
5163 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5167 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5168 while (x
->forward
[i
] &&
5169 (x
->forward
[i
]->score
< score
||
5170 (x
->forward
[i
]->score
== score
&&
5171 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
5175 /* We may have multiple elements with the same score, what we need
5176 * is to find the element with both the right score and object. */
5178 if (x
&& score
== x
->score
&& compareStringObjects(x
->obj
,obj
) == 0) {
5179 zslDeleteNode(zsl
, x
, update
);
5183 return 0; /* not found */
5185 return 0; /* not found */
5188 /* Delete all the elements with score between min and max from the skiplist.
5189 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5190 * Note that this function takes the reference to the hash table view of the
5191 * sorted set, in order to remove the elements from the hash table too. */
5192 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
5193 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5194 unsigned long removed
= 0;
5198 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5199 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
5203 /* We may have multiple elements with the same score, what we need
5204 * is to find the element with both the right score and object. */
5206 while (x
&& x
->score
<= max
) {
5207 zskiplistNode
*next
= x
->forward
[0];
5208 zslDeleteNode(zsl
, x
, update
);
5209 dictDelete(dict
,x
->obj
);
5214 return removed
; /* not found */
5217 /* Delete all the elements with rank between start and end from the skiplist.
5218 * Start and end are inclusive. Note that start and end need to be 1-based */
5219 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
5220 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5221 unsigned long traversed
= 0, removed
= 0;
5225 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5226 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
5227 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5235 while (x
&& traversed
<= end
) {
5236 zskiplistNode
*next
= x
->forward
[0];
5237 zslDeleteNode(zsl
, x
, update
);
5238 dictDelete(dict
,x
->obj
);
5247 /* Find the first node having a score equal or greater than the specified one.
5248 * Returns NULL if there is no match. */
5249 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
5254 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5255 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
5258 /* We may have multiple elements with the same score, what we need
5259 * is to find the element with both the right score and object. */
5260 return x
->forward
[0];
5263 /* Find the rank for an element by both score and key.
5264 * Returns 0 when the element cannot be found, rank otherwise.
5265 * Note that the rank is 1-based due to the span of zsl->header to the
5267 static unsigned long zslGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
5269 unsigned long rank
= 0;
5273 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5274 while (x
->forward
[i
] &&
5275 (x
->forward
[i
]->score
< score
||
5276 (x
->forward
[i
]->score
== score
&&
5277 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
5278 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
5282 /* x might be equal to zsl->header, so test if obj is non-NULL */
5283 if (x
->obj
&& compareStringObjects(x
->obj
,o
) == 0) {
5290 /* Finds an element by its rank. The rank argument needs to be 1-based. */
5291 zskiplistNode
* zslGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
5293 unsigned long traversed
= 0;
5297 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5298 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
5300 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5303 if (traversed
== rank
) {
5310 /* The actual Z-commands implementations */
5312 /* This generic command implements both ZADD and ZINCRBY.
5313 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
5314 * the increment if the operation is a ZINCRBY (doincrement == 1). */
5315 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
5320 zsetobj
= lookupKeyWrite(c
->db
,key
);
5321 if (zsetobj
== NULL
) {
5322 zsetobj
= createZsetObject();
5323 dictAdd(c
->db
->dict
,key
,zsetobj
);
5326 if (zsetobj
->type
!= REDIS_ZSET
) {
5327 addReply(c
,shared
.wrongtypeerr
);
5333 /* Ok now since we implement both ZADD and ZINCRBY here the code
5334 * needs to handle the two different conditions. It's all about setting
5335 * '*score', that is, the new score to set, to the right value. */
5336 score
= zmalloc(sizeof(double));
5340 /* Read the old score. If the element was not present starts from 0 */
5341 de
= dictFind(zs
->dict
,ele
);
5343 double *oldscore
= dictGetEntryVal(de
);
5344 *score
= *oldscore
+ scoreval
;
5352 /* What follows is a simple remove and re-insert operation that is common
5353 * to both ZADD and ZINCRBY... */
5354 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
5355 /* case 1: New element */
5356 incrRefCount(ele
); /* added to hash */
5357 zslInsert(zs
->zsl
,*score
,ele
);
5358 incrRefCount(ele
); /* added to skiplist */
5361 addReplyDouble(c
,*score
);
5363 addReply(c
,shared
.cone
);
5368 /* case 2: Score update operation */
5369 de
= dictFind(zs
->dict
,ele
);
5370 redisAssert(de
!= NULL
);
5371 oldscore
= dictGetEntryVal(de
);
5372 if (*score
!= *oldscore
) {
5375 /* Remove and insert the element in the skip list with new score */
5376 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
5377 redisAssert(deleted
!= 0);
5378 zslInsert(zs
->zsl
,*score
,ele
);
5380 /* Update the score in the hash table */
5381 dictReplace(zs
->dict
,ele
,score
);
5387 addReplyDouble(c
,*score
);
5389 addReply(c
,shared
.czero
);
5393 static void zaddCommand(redisClient
*c
) {
5396 scoreval
= strtod(c
->argv
[2]->ptr
,NULL
);
5397 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
5400 static void zincrbyCommand(redisClient
*c
) {
5403 scoreval
= strtod(c
->argv
[2]->ptr
,NULL
);
5404 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
5407 static void zremCommand(redisClient
*c
) {
5414 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5415 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5418 de
= dictFind(zs
->dict
,c
->argv
[2]);
5420 addReply(c
,shared
.czero
);
5423 /* Delete from the skiplist */
5424 oldscore
= dictGetEntryVal(de
);
5425 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
5426 redisAssert(deleted
!= 0);
5428 /* Delete from the hash table */
5429 dictDelete(zs
->dict
,c
->argv
[2]);
5430 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5431 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5433 addReply(c
,shared
.cone
);
5436 static void zremrangebyscoreCommand(redisClient
*c
) {
5437 double min
= strtod(c
->argv
[2]->ptr
,NULL
);
5438 double max
= strtod(c
->argv
[3]->ptr
,NULL
);
5443 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5444 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5447 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
5448 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5449 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5450 server
.dirty
+= deleted
;
5451 addReplyLong(c
,deleted
);
5454 static void zremrangebyrankCommand(redisClient
*c
) {
5455 int start
= atoi(c
->argv
[2]->ptr
);
5456 int end
= atoi(c
->argv
[3]->ptr
);
5462 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5463 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5465 llen
= zs
->zsl
->length
;
5467 /* convert negative indexes */
5468 if (start
< 0) start
= llen
+start
;
5469 if (end
< 0) end
= llen
+end
;
5470 if (start
< 0) start
= 0;
5471 if (end
< 0) end
= 0;
5473 /* indexes sanity checks */
5474 if (start
> end
|| start
>= llen
) {
5475 addReply(c
,shared
.czero
);
5478 if (end
>= llen
) end
= llen
-1;
5480 /* increment start and end because zsl*Rank functions
5481 * use 1-based rank */
5482 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
5483 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5484 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5485 server
.dirty
+= deleted
;
5486 addReplyLong(c
, deleted
);
5494 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
5495 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
5496 unsigned long size1
, size2
;
5497 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
5498 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
5499 return size1
- size2
;
5502 #define REDIS_AGGR_SUM 1
5503 #define REDIS_AGGR_MIN 2
5504 #define REDIS_AGGR_MAX 3
5506 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
5507 if (aggregate
== REDIS_AGGR_SUM
) {
5508 *target
= *target
+ val
;
5509 } else if (aggregate
== REDIS_AGGR_MIN
) {
5510 *target
= val
< *target
? val
: *target
;
5511 } else if (aggregate
== REDIS_AGGR_MAX
) {
5512 *target
= val
> *target
? val
: *target
;
5515 redisAssert(0 != 0);
5519 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
5521 int aggregate
= REDIS_AGGR_SUM
;
5528 /* expect zsetnum input keys to be given */
5529 zsetnum
= atoi(c
->argv
[2]->ptr
);
5531 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNION/ZINTER\r\n"));
5535 /* test if the expected number of keys would overflow */
5536 if (3+zsetnum
> c
->argc
) {
5537 addReply(c
,shared
.syntaxerr
);
5541 /* read keys to be used for input */
5542 src
= zmalloc(sizeof(zsetopsrc
) * zsetnum
);
5543 for (i
= 0, j
= 3; i
< zsetnum
; i
++, j
++) {
5544 robj
*zsetobj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
5548 if (zsetobj
->type
!= REDIS_ZSET
) {
5550 addReply(c
,shared
.wrongtypeerr
);
5553 src
[i
].dict
= ((zset
*)zsetobj
->ptr
)->dict
;
5556 /* default all weights to 1 */
5557 src
[i
].weight
= 1.0;
5560 /* parse optional extra arguments */
5562 int remaining
= c
->argc
- j
;
5565 if (remaining
>= (zsetnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
5567 for (i
= 0; i
< zsetnum
; i
++, j
++, remaining
--) {
5568 src
[i
].weight
= strtod(c
->argv
[j
]->ptr
, NULL
);
5570 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
5572 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
5573 aggregate
= REDIS_AGGR_SUM
;
5574 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
5575 aggregate
= REDIS_AGGR_MIN
;
5576 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
5577 aggregate
= REDIS_AGGR_MAX
;
5580 addReply(c
,shared
.syntaxerr
);
5586 addReply(c
,shared
.syntaxerr
);
5592 /* sort sets from the smallest to largest, this will improve our
5593 * algorithm's performance */
5594 qsort(src
,zsetnum
,sizeof(zsetopsrc
), qsortCompareZsetopsrcByCardinality
);
5596 dstobj
= createZsetObject();
5597 dstzset
= dstobj
->ptr
;
5599 if (op
== REDIS_OP_INTER
) {
5600 /* skip going over all entries if the smallest zset is NULL or empty */
5601 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
5602 /* precondition: as src[0].dict is non-empty and the zsets are ordered
5603 * from small to large, all src[i > 0].dict are non-empty too */
5604 di
= dictGetIterator(src
[0].dict
);
5605 while((de
= dictNext(di
)) != NULL
) {
5606 double *score
= zmalloc(sizeof(double)), value
;
5607 *score
= src
[0].weight
* (*(double*)dictGetEntryVal(de
));
5609 for (j
= 1; j
< zsetnum
; j
++) {
5610 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5612 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5613 zunionInterAggregate(score
, value
, aggregate
);
5619 /* skip entry when not present in every source dict */
5623 robj
*o
= dictGetEntryKey(de
);
5624 dictAdd(dstzset
->dict
,o
,score
);
5625 incrRefCount(o
); /* added to dictionary */
5626 zslInsert(dstzset
->zsl
,*score
,o
);
5627 incrRefCount(o
); /* added to skiplist */
5630 dictReleaseIterator(di
);
5632 } else if (op
== REDIS_OP_UNION
) {
5633 for (i
= 0; i
< zsetnum
; i
++) {
5634 if (!src
[i
].dict
) continue;
5636 di
= dictGetIterator(src
[i
].dict
);
5637 while((de
= dictNext(di
)) != NULL
) {
5638 /* skip key when already processed */
5639 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
5641 double *score
= zmalloc(sizeof(double)), value
;
5642 *score
= src
[i
].weight
* (*(double*)dictGetEntryVal(de
));
5644 /* because the zsets are sorted by size, its only possible
5645 * for sets at larger indices to hold this entry */
5646 for (j
= (i
+1); j
< zsetnum
; j
++) {
5647 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5649 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5650 zunionInterAggregate(score
, value
, aggregate
);
5654 robj
*o
= dictGetEntryKey(de
);
5655 dictAdd(dstzset
->dict
,o
,score
);
5656 incrRefCount(o
); /* added to dictionary */
5657 zslInsert(dstzset
->zsl
,*score
,o
);
5658 incrRefCount(o
); /* added to skiplist */
5660 dictReleaseIterator(di
);
5663 /* unknown operator */
5664 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
5667 deleteKey(c
->db
,dstkey
);
5668 if (dstzset
->zsl
->length
) {
5669 dictAdd(c
->db
->dict
,dstkey
,dstobj
);
5670 incrRefCount(dstkey
);
5671 addReplyLong(c
, dstzset
->zsl
->length
);
5674 decrRefCount(dstobj
);
5675 addReply(c
, shared
.czero
);
5680 static void zunionCommand(redisClient
*c
) {
5681 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
5684 static void zinterCommand(redisClient
*c
) {
5685 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
5688 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
5690 int start
= atoi(c
->argv
[2]->ptr
);
5691 int end
= atoi(c
->argv
[3]->ptr
);
5700 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
5702 } else if (c
->argc
>= 5) {
5703 addReply(c
,shared
.syntaxerr
);
5707 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL
||
5708 checkType(c
,o
,REDIS_ZSET
)) return;
5713 /* convert negative indexes */
5714 if (start
< 0) start
= llen
+start
;
5715 if (end
< 0) end
= llen
+end
;
5716 if (start
< 0) start
= 0;
5717 if (end
< 0) end
= 0;
5719 /* indexes sanity checks */
5720 if (start
> end
|| start
>= llen
) {
5721 /* Out of range start or start > end result in empty list */
5722 addReply(c
,shared
.emptymultibulk
);
5725 if (end
>= llen
) end
= llen
-1;
5726 rangelen
= (end
-start
)+1;
5728 /* check if starting point is trivial, before searching
5729 * the element in log(N) time */
5731 ln
= start
== 0 ? zsl
->tail
: zslGetElementByRank(zsl
, llen
-start
);
5734 zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1);
5737 /* Return the result in form of a multi-bulk reply */
5738 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
5739 withscores
? (rangelen
*2) : rangelen
));
5740 for (j
= 0; j
< rangelen
; j
++) {
5742 addReplyBulk(c
,ele
);
5744 addReplyDouble(c
,ln
->score
);
5745 ln
= reverse
? ln
->backward
: ln
->forward
[0];
5749 static void zrangeCommand(redisClient
*c
) {
5750 zrangeGenericCommand(c
,0);
5753 static void zrevrangeCommand(redisClient
*c
) {
5754 zrangeGenericCommand(c
,1);
5757 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
5758 * If justcount is non-zero, just the count is returned. */
5759 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
5762 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
5763 int offset
= 0, limit
= -1;
5767 /* Parse the min-max interval. If one of the values is prefixed
5768 * by the "(" character, it's considered "open". For instance
5769 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
5770 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
5771 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
5772 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
5775 min
= strtod(c
->argv
[2]->ptr
,NULL
);
5777 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
5778 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
5781 max
= strtod(c
->argv
[3]->ptr
,NULL
);
5784 /* Parse "WITHSCORES": note that if the command was called with
5785 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
5786 * enter the following paths to parse WITHSCORES and LIMIT. */
5787 if (c
->argc
== 5 || c
->argc
== 8) {
5788 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
5793 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
5797 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
5802 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
5803 addReply(c
,shared
.syntaxerr
);
5805 } else if (c
->argc
== (7 + withscores
)) {
5806 offset
= atoi(c
->argv
[5]->ptr
);
5807 limit
= atoi(c
->argv
[6]->ptr
);
5808 if (offset
< 0) offset
= 0;
5811 /* Ok, lookup the key and get the range */
5812 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
5814 addReply(c
,justcount
? shared
.czero
: shared
.nullmultibulk
);
5816 if (o
->type
!= REDIS_ZSET
) {
5817 addReply(c
,shared
.wrongtypeerr
);
5819 zset
*zsetobj
= o
->ptr
;
5820 zskiplist
*zsl
= zsetobj
->zsl
;
5822 robj
*ele
, *lenobj
= NULL
;
5823 unsigned long rangelen
= 0;
5825 /* Get the first node with the score >= min, or with
5826 * score > min if 'minex' is true. */
5827 ln
= zslFirstWithScore(zsl
,min
);
5828 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
5831 /* No element matching the speciifed interval */
5832 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
5836 /* We don't know in advance how many matching elements there
5837 * are in the list, so we push this object that will represent
5838 * the multi-bulk length in the output buffer, and will "fix"
5841 lenobj
= createObject(REDIS_STRING
,NULL
);
5843 decrRefCount(lenobj
);
5846 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
5849 ln
= ln
->forward
[0];
5852 if (limit
== 0) break;
5855 addReplyBulk(c
,ele
);
5857 addReplyDouble(c
,ln
->score
);
5859 ln
= ln
->forward
[0];
5861 if (limit
> 0) limit
--;
5864 addReplyLong(c
,(long)rangelen
);
5866 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
5867 withscores
? (rangelen
*2) : rangelen
);
5873 static void zrangebyscoreCommand(redisClient
*c
) {
5874 genericZrangebyscoreCommand(c
,0);
5877 static void zcountCommand(redisClient
*c
) {
5878 genericZrangebyscoreCommand(c
,1);
5881 static void zcardCommand(redisClient
*c
) {
5885 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5886 checkType(c
,o
,REDIS_ZSET
)) return;
5889 addReplyUlong(c
,zs
->zsl
->length
);
5892 static void zscoreCommand(redisClient
*c
) {
5897 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5898 checkType(c
,o
,REDIS_ZSET
)) return;
5901 de
= dictFind(zs
->dict
,c
->argv
[2]);
5903 addReply(c
,shared
.nullbulk
);
5905 double *score
= dictGetEntryVal(de
);
5907 addReplyDouble(c
,*score
);
5911 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
5919 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5920 checkType(c
,o
,REDIS_ZSET
)) return;
5924 de
= dictFind(zs
->dict
,c
->argv
[2]);
5926 addReply(c
,shared
.nullbulk
);
5930 score
= dictGetEntryVal(de
);
5931 rank
= zslGetRank(zsl
, *score
, c
->argv
[2]);
5934 addReplyLong(c
, zsl
->length
- rank
);
5936 addReplyLong(c
, rank
-1);
5939 addReply(c
,shared
.nullbulk
);
5943 static void zrankCommand(redisClient
*c
) {
5944 zrankGenericCommand(c
, 0);
5947 static void zrevrankCommand(redisClient
*c
) {
5948 zrankGenericCommand(c
, 1);
5951 /* =================================== Hashes =============================== */
5952 static void hsetCommand(redisClient
*c
) {
5954 robj
*o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5957 o
= createHashObject();
5958 dictAdd(c
->db
->dict
,c
->argv
[1],o
);
5959 incrRefCount(c
->argv
[1]);
5961 if (o
->type
!= REDIS_HASH
) {
5962 addReply(c
,shared
.wrongtypeerr
);
5966 /* We want to convert the zipmap into an hash table right now if the
5967 * entry to be added is too big. Note that we check if the object
5968 * is integer encoded before to try fetching the length in the test below.
5969 * This is because integers are small, but currently stringObjectLen()
5970 * performs a slow conversion: not worth it. */
5971 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
&&
5972 ((c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
&&
5973 sdslen(c
->argv
[2]->ptr
) > server
.hash_max_zipmap_value
) ||
5974 (c
->argv
[3]->encoding
== REDIS_ENCODING_RAW
&&
5975 sdslen(c
->argv
[3]->ptr
) > server
.hash_max_zipmap_value
)))
5977 convertToRealHash(o
);
5980 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
5981 unsigned char *zm
= o
->ptr
;
5982 robj
*valobj
= getDecodedObject(c
->argv
[3]);
5984 zm
= zipmapSet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
5985 valobj
->ptr
,sdslen(valobj
->ptr
),&update
);
5986 decrRefCount(valobj
);
5989 /* And here there is the second check for hash conversion. */
5990 if (zipmapLen(zm
) > server
.hash_max_zipmap_entries
)
5991 convertToRealHash(o
);
5993 c
->argv
[2] = tryObjectEncoding(c
->argv
[2]);
5994 /* note that c->argv[3] is already encoded, as the latest arg
5995 * of a bulk command is always integer encoded if possible. */
5996 if (dictReplace(o
->ptr
,c
->argv
[2],c
->argv
[3])) {
5997 incrRefCount(c
->argv
[2]);
6001 incrRefCount(c
->argv
[3]);
6004 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",update
== 0));
6007 static void hincrbyCommand(redisClient
*c
) {
6008 long long value
= 0, incr
= 0;
6009 robj
*o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
6012 o
= createHashObject();
6013 dictAdd(c
->db
->dict
,c
->argv
[1],o
);
6014 incrRefCount(c
->argv
[1]);
6016 if (o
->type
!= REDIS_HASH
) {
6017 addReply(c
,shared
.wrongtypeerr
);
6022 incr
= strtoll(c
->argv
[3]->ptr
, NULL
, 10);
6023 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6024 unsigned char *zm
= o
->ptr
;
6025 unsigned char *zval
;
6028 /* Find value if already present in hash */
6029 if (zipmapGet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
6031 /* strtoll needs the char* to have a trailing \0, but
6032 * the zipmap doesn't include them. */
6033 sds szval
= sdsnewlen(zval
, zvlen
);
6034 value
= strtoll(szval
,NULL
,10);
6039 sds svalue
= sdscatprintf(sdsempty(),"%lld",value
);
6040 zm
= zipmapSet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
6041 (unsigned char*)svalue
,sdslen(svalue
),NULL
);
6045 /* Check if the zipmap needs to be converted. */
6046 if (zipmapLen(zm
) > server
.hash_max_zipmap_entries
)
6047 convertToRealHash(o
);
6052 /* Find value if already present in hash */
6053 de
= dictFind(o
->ptr
,c
->argv
[2]);
6055 hval
= dictGetEntryVal(de
);
6056 if (hval
->encoding
== REDIS_ENCODING_RAW
)
6057 value
= strtoll(hval
->ptr
,NULL
,10);
6058 else if (hval
->encoding
== REDIS_ENCODING_INT
)
6059 value
= (long)hval
->ptr
;
6061 redisAssert(1 != 1);
6065 hval
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
6066 hval
= tryObjectEncoding(hval
);
6067 if (dictReplace(o
->ptr
,c
->argv
[2],hval
)) {
6068 incrRefCount(c
->argv
[2]);
6073 addReplyLongLong(c
, value
);
6076 static void hgetCommand(redisClient
*c
) {
6079 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6080 checkType(c
,o
,REDIS_HASH
)) return;
6082 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6083 unsigned char *zm
= o
->ptr
;
6088 field
= getDecodedObject(c
->argv
[2]);
6089 if (zipmapGet(zm
,field
->ptr
,sdslen(field
->ptr
), &val
,&vlen
)) {
6090 addReplySds(c
,sdscatprintf(sdsempty(),"$%u\r\n", vlen
));
6091 addReplySds(c
,sdsnewlen(val
,vlen
));
6092 addReply(c
,shared
.crlf
);
6093 decrRefCount(field
);
6096 addReply(c
,shared
.nullbulk
);
6097 decrRefCount(field
);
6101 struct dictEntry
*de
;
6103 de
= dictFind(o
->ptr
,c
->argv
[2]);
6105 addReply(c
,shared
.nullbulk
);
6107 robj
*e
= dictGetEntryVal(de
);
6114 static void hdelCommand(redisClient
*c
) {
6118 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6119 checkType(c
,o
,REDIS_HASH
)) return;
6121 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6122 robj
*field
= getDecodedObject(c
->argv
[2]);
6124 o
->ptr
= zipmapDel((unsigned char*) o
->ptr
,
6125 (unsigned char*) field
->ptr
,
6126 sdslen(field
->ptr
), &deleted
);
6127 decrRefCount(field
);
6128 if (zipmapLen((unsigned char*) o
->ptr
) == 0)
6129 deleteKey(c
->db
,c
->argv
[1]);
6131 deleted
= dictDelete((dict
*)o
->ptr
,c
->argv
[2]) == DICT_OK
;
6132 if (htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
6133 if (dictSize((dict
*)o
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
6135 if (deleted
) server
.dirty
++;
6136 addReply(c
,deleted
? shared
.cone
: shared
.czero
);
6139 static void hlenCommand(redisClient
*c
) {
6143 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6144 checkType(c
,o
,REDIS_HASH
)) return;
6146 len
= (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
6147 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
6148 addReplyUlong(c
,len
);
6151 #define REDIS_GETALL_KEYS 1
6152 #define REDIS_GETALL_VALS 2
6153 static void genericHgetallCommand(redisClient
*c
, int flags
) {
6155 unsigned long count
= 0;
6157 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL
6158 || checkType(c
,o
,REDIS_HASH
)) return;
6160 lenobj
= createObject(REDIS_STRING
,NULL
);
6162 decrRefCount(lenobj
);
6164 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6165 unsigned char *p
= zipmapRewind(o
->ptr
);
6166 unsigned char *field
, *val
;
6167 unsigned int flen
, vlen
;
6169 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
6172 if (flags
& REDIS_GETALL_KEYS
) {
6173 aux
= createStringObject((char*)field
,flen
);
6174 addReplyBulk(c
,aux
);
6178 if (flags
& REDIS_GETALL_VALS
) {
6179 aux
= createStringObject((char*)val
,vlen
);
6180 addReplyBulk(c
,aux
);
6186 dictIterator
*di
= dictGetIterator(o
->ptr
);
6189 while((de
= dictNext(di
)) != NULL
) {
6190 robj
*fieldobj
= dictGetEntryKey(de
);
6191 robj
*valobj
= dictGetEntryVal(de
);
6193 if (flags
& REDIS_GETALL_KEYS
) {
6194 addReplyBulk(c
,fieldobj
);
6197 if (flags
& REDIS_GETALL_VALS
) {
6198 addReplyBulk(c
,valobj
);
6202 dictReleaseIterator(di
);
6204 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
6207 static void hkeysCommand(redisClient
*c
) {
6208 genericHgetallCommand(c
,REDIS_GETALL_KEYS
);
6211 static void hvalsCommand(redisClient
*c
) {
6212 genericHgetallCommand(c
,REDIS_GETALL_VALS
);
6215 static void hgetallCommand(redisClient
*c
) {
6216 genericHgetallCommand(c
,REDIS_GETALL_KEYS
|REDIS_GETALL_VALS
);
6219 static void hexistsCommand(redisClient
*c
) {
6223 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6224 checkType(c
,o
,REDIS_HASH
)) return;
6226 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6228 unsigned char *zm
= o
->ptr
;
6230 field
= getDecodedObject(c
->argv
[2]);
6231 exists
= zipmapExists(zm
,field
->ptr
,sdslen(field
->ptr
));
6232 decrRefCount(field
);
6234 exists
= dictFind(o
->ptr
,c
->argv
[2]) != NULL
;
6236 addReply(c
,exists
? shared
.cone
: shared
.czero
);
6239 static void convertToRealHash(robj
*o
) {
6240 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
6241 unsigned int klen
, vlen
;
6242 dict
*dict
= dictCreate(&hashDictType
,NULL
);
6244 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
6245 p
= zipmapRewind(zm
);
6246 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
6247 robj
*keyobj
, *valobj
;
6249 keyobj
= createStringObject((char*)key
,klen
);
6250 valobj
= createStringObject((char*)val
,vlen
);
6251 keyobj
= tryObjectEncoding(keyobj
);
6252 valobj
= tryObjectEncoding(valobj
);
6253 dictAdd(dict
,keyobj
,valobj
);
6255 o
->encoding
= REDIS_ENCODING_HT
;
6260 /* ========================= Non type-specific commands ==================== */
6262 static void flushdbCommand(redisClient
*c
) {
6263 server
.dirty
+= dictSize(c
->db
->dict
);
6264 dictEmpty(c
->db
->dict
);
6265 dictEmpty(c
->db
->expires
);
6266 addReply(c
,shared
.ok
);
6269 static void flushallCommand(redisClient
*c
) {
6270 server
.dirty
+= emptyDb();
6271 addReply(c
,shared
.ok
);
6272 if (server
.bgsavechildpid
!= -1) {
6273 kill(server
.bgsavechildpid
,SIGKILL
);
6274 rdbRemoveTempFile(server
.bgsavechildpid
);
6276 rdbSave(server
.dbfilename
);
6280 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
6281 redisSortOperation
*so
= zmalloc(sizeof(*so
));
6283 so
->pattern
= pattern
;
6287 /* Return the value associated to the key with a name obtained
6288 * substituting the first occurence of '*' in 'pattern' with 'subst' */
6289 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
6293 int prefixlen
, sublen
, postfixlen
;
6294 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
6298 char buf
[REDIS_SORTKEY_MAX
+1];
6301 /* If the pattern is "#" return the substitution object itself in order
6302 * to implement the "SORT ... GET #" feature. */
6303 spat
= pattern
->ptr
;
6304 if (spat
[0] == '#' && spat
[1] == '\0') {
6308 /* The substitution object may be specially encoded. If so we create
6309 * a decoded object on the fly. Otherwise getDecodedObject will just
6310 * increment the ref count, that we'll decrement later. */
6311 subst
= getDecodedObject(subst
);
6314 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
6315 p
= strchr(spat
,'*');
6317 decrRefCount(subst
);
6322 sublen
= sdslen(ssub
);
6323 postfixlen
= sdslen(spat
)-(prefixlen
+1);
6324 memcpy(keyname
.buf
,spat
,prefixlen
);
6325 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
6326 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
6327 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
6328 keyname
.len
= prefixlen
+sublen
+postfixlen
;
6330 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2))
6331 decrRefCount(subst
);
6333 /* printf("lookup '%s' => %p\n", keyname.buf,de); */
6334 return lookupKeyRead(db
,&keyobj
);
6337 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
6338 * the additional parameter is not standard but a BSD-specific we have to
6339 * pass sorting parameters via the global 'server' structure */
6340 static int sortCompare(const void *s1
, const void *s2
) {
6341 const redisSortObject
*so1
= s1
, *so2
= s2
;
6344 if (!server
.sort_alpha
) {
6345 /* Numeric sorting. Here it's trivial as we precomputed scores */
6346 if (so1
->u
.score
> so2
->u
.score
) {
6348 } else if (so1
->u
.score
< so2
->u
.score
) {
6354 /* Alphanumeric sorting */
6355 if (server
.sort_bypattern
) {
6356 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
6357 /* At least one compare object is NULL */
6358 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
6360 else if (so1
->u
.cmpobj
== NULL
)
6365 /* We have both the objects, use strcoll */
6366 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
6369 /* Compare elements directly */
6372 dec1
= getDecodedObject(so1
->obj
);
6373 dec2
= getDecodedObject(so2
->obj
);
6374 cmp
= strcoll(dec1
->ptr
,dec2
->ptr
);
6379 return server
.sort_desc
? -cmp
: cmp
;
6382 /* The SORT command is the most complex command in Redis. Warning: this code
6383 * is optimized for speed and a bit less for readability */
6384 static void sortCommand(redisClient
*c
) {
6387 int desc
= 0, alpha
= 0;
6388 int limit_start
= 0, limit_count
= -1, start
, end
;
6389 int j
, dontsort
= 0, vectorlen
;
6390 int getop
= 0; /* GET operation counter */
6391 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
6392 redisSortObject
*vector
; /* Resulting vector to sort */
6394 /* Lookup the key to sort. It must be of the right types */
6395 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
6396 if (sortval
== NULL
) {
6397 addReply(c
,shared
.nullmultibulk
);
6400 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
6401 sortval
->type
!= REDIS_ZSET
)
6403 addReply(c
,shared
.wrongtypeerr
);
6407 /* Create a list of operations to perform for every sorted element.
6408 * Operations can be GET/DEL/INCR/DECR */
6409 operations
= listCreate();
6410 listSetFreeMethod(operations
,zfree
);
6413 /* Now we need to protect sortval incrementing its count, in the future
6414 * SORT may have options able to overwrite/delete keys during the sorting
6415 * and the sorted key itself may get destroied */
6416 incrRefCount(sortval
);
6418 /* The SORT command has an SQL-alike syntax, parse it */
6419 while(j
< c
->argc
) {
6420 int leftargs
= c
->argc
-j
-1;
6421 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
6423 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
6425 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
6427 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
6428 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
6429 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
6431 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
6432 storekey
= c
->argv
[j
+1];
6434 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
6435 sortby
= c
->argv
[j
+1];
6436 /* If the BY pattern does not contain '*', i.e. it is constant,
6437 * we don't need to sort nor to lookup the weight keys. */
6438 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
6440 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
6441 listAddNodeTail(operations
,createSortOperation(
6442 REDIS_SORT_GET
,c
->argv
[j
+1]));
6446 decrRefCount(sortval
);
6447 listRelease(operations
);
6448 addReply(c
,shared
.syntaxerr
);
6454 /* Load the sorting vector with all the objects to sort */
6455 switch(sortval
->type
) {
6456 case REDIS_LIST
: vectorlen
= listLength((list
*)sortval
->ptr
); break;
6457 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
6458 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
6459 default: vectorlen
= 0; redisAssert(0); /* Avoid GCC warning */
6461 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
6464 if (sortval
->type
== REDIS_LIST
) {
6465 list
*list
= sortval
->ptr
;
6469 listRewind(list
,&li
);
6470 while((ln
= listNext(&li
))) {
6471 robj
*ele
= ln
->value
;
6472 vector
[j
].obj
= ele
;
6473 vector
[j
].u
.score
= 0;
6474 vector
[j
].u
.cmpobj
= NULL
;
6482 if (sortval
->type
== REDIS_SET
) {
6485 zset
*zs
= sortval
->ptr
;
6489 di
= dictGetIterator(set
);
6490 while((setele
= dictNext(di
)) != NULL
) {
6491 vector
[j
].obj
= dictGetEntryKey(setele
);
6492 vector
[j
].u
.score
= 0;
6493 vector
[j
].u
.cmpobj
= NULL
;
6496 dictReleaseIterator(di
);
6498 redisAssert(j
== vectorlen
);
6500 /* Now it's time to load the right scores in the sorting vector */
6501 if (dontsort
== 0) {
6502 for (j
= 0; j
< vectorlen
; j
++) {
6506 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
6507 if (!byval
|| byval
->type
!= REDIS_STRING
) continue;
6509 vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
6511 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
6512 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
6514 /* Don't need to decode the object if it's
6515 * integer-encoded (the only encoding supported) so
6516 * far. We can just cast it */
6517 if (byval
->encoding
== REDIS_ENCODING_INT
) {
6518 vector
[j
].u
.score
= (long)byval
->ptr
;
6520 redisAssert(1 != 1);
6525 if (vector
[j
].obj
->encoding
== REDIS_ENCODING_RAW
)
6526 vector
[j
].u
.score
= strtod(vector
[j
].obj
->ptr
,NULL
);
6528 if (vector
[j
].obj
->encoding
== REDIS_ENCODING_INT
)
6529 vector
[j
].u
.score
= (long) vector
[j
].obj
->ptr
;
6531 redisAssert(1 != 1);
6538 /* We are ready to sort the vector... perform a bit of sanity check
6539 * on the LIMIT option too. We'll use a partial version of quicksort. */
6540 start
= (limit_start
< 0) ? 0 : limit_start
;
6541 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
6542 if (start
>= vectorlen
) {
6543 start
= vectorlen
-1;
6546 if (end
>= vectorlen
) end
= vectorlen
-1;
6548 if (dontsort
== 0) {
6549 server
.sort_desc
= desc
;
6550 server
.sort_alpha
= alpha
;
6551 server
.sort_bypattern
= sortby
? 1 : 0;
6552 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
6553 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
6555 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
6558 /* Send command output to the output buffer, performing the specified
6559 * GET/DEL/INCR/DECR operations if any. */
6560 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
6561 if (storekey
== NULL
) {
6562 /* STORE option not specified, sent the sorting result to client */
6563 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
6564 for (j
= start
; j
<= end
; j
++) {
6568 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
6569 listRewind(operations
,&li
);
6570 while((ln
= listNext(&li
))) {
6571 redisSortOperation
*sop
= ln
->value
;
6572 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6575 if (sop
->type
== REDIS_SORT_GET
) {
6576 if (!val
|| val
->type
!= REDIS_STRING
) {
6577 addReply(c
,shared
.nullbulk
);
6579 addReplyBulk(c
,val
);
6582 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6587 robj
*listObject
= createListObject();
6588 list
*listPtr
= (list
*) listObject
->ptr
;
6590 /* STORE option specified, set the sorting result as a List object */
6591 for (j
= start
; j
<= end
; j
++) {
6596 listAddNodeTail(listPtr
,vector
[j
].obj
);
6597 incrRefCount(vector
[j
].obj
);
6599 listRewind(operations
,&li
);
6600 while((ln
= listNext(&li
))) {
6601 redisSortOperation
*sop
= ln
->value
;
6602 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6605 if (sop
->type
== REDIS_SORT_GET
) {
6606 if (!val
|| val
->type
!= REDIS_STRING
) {
6607 listAddNodeTail(listPtr
,createStringObject("",0));
6609 listAddNodeTail(listPtr
,val
);
6613 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6617 if (dictReplace(c
->db
->dict
,storekey
,listObject
)) {
6618 incrRefCount(storekey
);
6620 /* Note: we add 1 because the DB is dirty anyway since even if the
6621 * SORT result is empty a new key is set and maybe the old content
6623 server
.dirty
+= 1+outputlen
;
6624 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
6628 decrRefCount(sortval
);
6629 listRelease(operations
);
6630 for (j
= 0; j
< vectorlen
; j
++) {
6631 if (sortby
&& alpha
&& vector
[j
].u
.cmpobj
)
6632 decrRefCount(vector
[j
].u
.cmpobj
);
6637 /* Convert an amount of bytes into a human readable string in the form
6638 * of 100B, 2G, 100M, 4K, and so forth. */
6639 static void bytesToHuman(char *s
, unsigned long long n
) {
6644 sprintf(s
,"%lluB",n
);
6646 } else if (n
< (1024*1024)) {
6647 d
= (double)n
/(1024);
6648 sprintf(s
,"%.2fK",d
);
6649 } else if (n
< (1024LL*1024*1024)) {
6650 d
= (double)n
/(1024*1024);
6651 sprintf(s
,"%.2fM",d
);
6652 } else if (n
< (1024LL*1024*1024*1024)) {
6653 d
= (double)n
/(1024LL*1024*1024);
6654 sprintf(s
,"%.2fG",d
);
6658 /* Create the string returned by the INFO command. This is decoupled
6659 * by the INFO command itself as we need to report the same information
6660 * on memory corruption problems. */
6661 static sds
genRedisInfoString(void) {
6663 time_t uptime
= time(NULL
)-server
.stat_starttime
;
6667 bytesToHuman(hmem
,zmalloc_used_memory());
6668 info
= sdscatprintf(sdsempty(),
6669 "redis_version:%s\r\n"
6671 "multiplexing_api:%s\r\n"
6672 "process_id:%ld\r\n"
6673 "uptime_in_seconds:%ld\r\n"
6674 "uptime_in_days:%ld\r\n"
6675 "connected_clients:%d\r\n"
6676 "connected_slaves:%d\r\n"
6677 "blocked_clients:%d\r\n"
6678 "used_memory:%zu\r\n"
6679 "used_memory_human:%s\r\n"
6680 "changes_since_last_save:%lld\r\n"
6681 "bgsave_in_progress:%d\r\n"
6682 "last_save_time:%ld\r\n"
6683 "bgrewriteaof_in_progress:%d\r\n"
6684 "total_connections_received:%lld\r\n"
6685 "total_commands_processed:%lld\r\n"
6686 "expired_keys:%lld\r\n"
6687 "hash_max_zipmap_entries:%ld\r\n"
6688 "hash_max_zipmap_value:%ld\r\n"
6689 "pubsub_channels:%ld\r\n"
6690 "pubsub_patterns:%u\r\n"
6694 (sizeof(long) == 8) ? "64" : "32",
6699 listLength(server
.clients
)-listLength(server
.slaves
),
6700 listLength(server
.slaves
),
6701 server
.blpop_blocked_clients
,
6702 zmalloc_used_memory(),
6705 server
.bgsavechildpid
!= -1,
6707 server
.bgrewritechildpid
!= -1,
6708 server
.stat_numconnections
,
6709 server
.stat_numcommands
,
6710 server
.stat_expiredkeys
,
6711 server
.hash_max_zipmap_entries
,
6712 server
.hash_max_zipmap_value
,
6713 dictSize(server
.pubsub_channels
),
6714 listLength(server
.pubsub_patterns
),
6715 server
.vm_enabled
!= 0,
6716 server
.masterhost
== NULL
? "master" : "slave"
6718 if (server
.masterhost
) {
6719 info
= sdscatprintf(info
,
6720 "master_host:%s\r\n"
6721 "master_port:%d\r\n"
6722 "master_link_status:%s\r\n"
6723 "master_last_io_seconds_ago:%d\r\n"
6726 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
6728 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
6731 if (server
.vm_enabled
) {
6733 info
= sdscatprintf(info
,
6734 "vm_conf_max_memory:%llu\r\n"
6735 "vm_conf_page_size:%llu\r\n"
6736 "vm_conf_pages:%llu\r\n"
6737 "vm_stats_used_pages:%llu\r\n"
6738 "vm_stats_swapped_objects:%llu\r\n"
6739 "vm_stats_swappin_count:%llu\r\n"
6740 "vm_stats_swappout_count:%llu\r\n"
6741 "vm_stats_io_newjobs_len:%lu\r\n"
6742 "vm_stats_io_processing_len:%lu\r\n"
6743 "vm_stats_io_processed_len:%lu\r\n"
6744 "vm_stats_io_active_threads:%lu\r\n"
6745 "vm_stats_blocked_clients:%lu\r\n"
6746 ,(unsigned long long) server
.vm_max_memory
,
6747 (unsigned long long) server
.vm_page_size
,
6748 (unsigned long long) server
.vm_pages
,
6749 (unsigned long long) server
.vm_stats_used_pages
,
6750 (unsigned long long) server
.vm_stats_swapped_objects
,
6751 (unsigned long long) server
.vm_stats_swapins
,
6752 (unsigned long long) server
.vm_stats_swapouts
,
6753 (unsigned long) listLength(server
.io_newjobs
),
6754 (unsigned long) listLength(server
.io_processing
),
6755 (unsigned long) listLength(server
.io_processed
),
6756 (unsigned long) server
.io_active_threads
,
6757 (unsigned long) server
.vm_blocked_clients
6761 for (j
= 0; j
< server
.dbnum
; j
++) {
6762 long long keys
, vkeys
;
6764 keys
= dictSize(server
.db
[j
].dict
);
6765 vkeys
= dictSize(server
.db
[j
].expires
);
6766 if (keys
|| vkeys
) {
6767 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
6774 static void infoCommand(redisClient
*c
) {
6775 sds info
= genRedisInfoString();
6776 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
6777 (unsigned long)sdslen(info
)));
6778 addReplySds(c
,info
);
6779 addReply(c
,shared
.crlf
);
6782 static void monitorCommand(redisClient
*c
) {
6783 /* ignore MONITOR if aleady slave or in monitor mode */
6784 if (c
->flags
& REDIS_SLAVE
) return;
6786 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
6788 listAddNodeTail(server
.monitors
,c
);
6789 addReply(c
,shared
.ok
);
6792 /* ================================= Expire ================================= */
6793 static int removeExpire(redisDb
*db
, robj
*key
) {
6794 if (dictDelete(db
->expires
,key
) == DICT_OK
) {
6801 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
6802 if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) {
6810 /* Return the expire time of the specified key, or -1 if no expire
6811 * is associated with this key (i.e. the key is non volatile) */
6812 static time_t getExpire(redisDb
*db
, robj
*key
) {
6815 /* No expire? return ASAP */
6816 if (dictSize(db
->expires
) == 0 ||
6817 (de
= dictFind(db
->expires
,key
)) == NULL
) return -1;
6819 return (time_t) dictGetEntryVal(de
);
6822 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
6826 /* No expire? return ASAP */
6827 if (dictSize(db
->expires
) == 0 ||
6828 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
6830 /* Lookup the expire */
6831 when
= (time_t) dictGetEntryVal(de
);
6832 if (time(NULL
) <= when
) return 0;
6834 /* Delete the key */
6835 dictDelete(db
->expires
,key
);
6836 server
.stat_expiredkeys
++;
6837 return dictDelete(db
->dict
,key
) == DICT_OK
;
6840 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
6843 /* No expire? return ASAP */
6844 if (dictSize(db
->expires
) == 0 ||
6845 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
6847 /* Delete the key */
6849 server
.stat_expiredkeys
++;
6850 dictDelete(db
->expires
,key
);
6851 return dictDelete(db
->dict
,key
) == DICT_OK
;
6854 static void expireGenericCommand(redisClient
*c
, robj
*key
, time_t seconds
) {
6857 de
= dictFind(c
->db
->dict
,key
);
6859 addReply(c
,shared
.czero
);
6863 if (deleteKey(c
->db
,key
)) server
.dirty
++;
6864 addReply(c
, shared
.cone
);
6867 time_t when
= time(NULL
)+seconds
;
6868 if (setExpire(c
->db
,key
,when
)) {
6869 addReply(c
,shared
.cone
);
6872 addReply(c
,shared
.czero
);
6878 static void expireCommand(redisClient
*c
) {
6879 expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10));
6882 static void expireatCommand(redisClient
*c
) {
6883 expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10)-time(NULL
));
6886 static void ttlCommand(redisClient
*c
) {
6890 expire
= getExpire(c
->db
,c
->argv
[1]);
6892 ttl
= (int) (expire
-time(NULL
));
6893 if (ttl
< 0) ttl
= -1;
6895 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
6898 /* ================================ MULTI/EXEC ============================== */
6900 /* Client state initialization for MULTI/EXEC */
6901 static void initClientMultiState(redisClient
*c
) {
6902 c
->mstate
.commands
= NULL
;
6903 c
->mstate
.count
= 0;
6906 /* Release all the resources associated with MULTI/EXEC state */
6907 static void freeClientMultiState(redisClient
*c
) {
6910 for (j
= 0; j
< c
->mstate
.count
; j
++) {
6912 multiCmd
*mc
= c
->mstate
.commands
+j
;
6914 for (i
= 0; i
< mc
->argc
; i
++)
6915 decrRefCount(mc
->argv
[i
]);
6918 zfree(c
->mstate
.commands
);
6921 /* Add a new command into the MULTI commands queue */
6922 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
6926 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
6927 sizeof(multiCmd
)*(c
->mstate
.count
+1));
6928 mc
= c
->mstate
.commands
+c
->mstate
.count
;
6931 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
6932 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
6933 for (j
= 0; j
< c
->argc
; j
++)
6934 incrRefCount(mc
->argv
[j
]);
6938 static void multiCommand(redisClient
*c
) {
6939 c
->flags
|= REDIS_MULTI
;
6940 addReply(c
,shared
.ok
);
6943 static void discardCommand(redisClient
*c
) {
6944 if (!(c
->flags
& REDIS_MULTI
)) {
6945 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
6949 freeClientMultiState(c
);
6950 initClientMultiState(c
);
6951 c
->flags
&= (~REDIS_MULTI
);
6952 addReply(c
,shared
.ok
);
6955 static void execCommand(redisClient
*c
) {
6960 if (!(c
->flags
& REDIS_MULTI
)) {
6961 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
6965 orig_argv
= c
->argv
;
6966 orig_argc
= c
->argc
;
6967 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
6968 for (j
= 0; j
< c
->mstate
.count
; j
++) {
6969 c
->argc
= c
->mstate
.commands
[j
].argc
;
6970 c
->argv
= c
->mstate
.commands
[j
].argv
;
6971 call(c
,c
->mstate
.commands
[j
].cmd
);
6973 c
->argv
= orig_argv
;
6974 c
->argc
= orig_argc
;
6975 freeClientMultiState(c
);
6976 initClientMultiState(c
);
6977 c
->flags
&= (~REDIS_MULTI
);
6980 /* =========================== Blocking Operations ========================= */
6982 /* Currently Redis blocking operations support is limited to list POP ops,
6983 * so the current implementation is not fully generic, but it is also not
6984 * completely specific so it will not require a rewrite to support new
6985 * kind of blocking operations in the future.
6987 * Still it's important to note that list blocking operations can be already
6988 * used as a notification mechanism in order to implement other blocking
6989 * operations at application level, so there must be a very strong evidence
6990 * of usefulness and generality before new blocking operations are implemented.
6992 * This is how the current blocking POP works, we use BLPOP as example:
6993 * - If the user calls BLPOP and the key exists and contains a non empty list
6994 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
6995 * if there is not to block.
6996 * - If instead BLPOP is called and the key does not exists or the list is
6997 * empty we need to block. In order to do so we remove the notification for
6998 * new data to read in the client socket (so that we'll not serve new
6999 * requests if the blocking request is not served). Also we put the client
7000 * in a dictionary (db->blockingkeys) mapping keys to a list of clients
7001 * blocking for this keys.
7002 * - If a PUSH operation against a key with blocked clients waiting is
7003 * performed, we serve the first in the list: basically instead to push
7004 * the new element inside the list we return it to the (first / oldest)
7005 * blocking client, unblock the client, and remove it form the list.
7007 * The above comment and the source code should be enough in order to understand
7008 * the implementation and modify / fix it later.
7011 /* Set a client in blocking mode for the specified key, with the specified
7013 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
7018 c
->blockingkeys
= zmalloc(sizeof(robj
*)*numkeys
);
7019 c
->blockingkeysnum
= numkeys
;
7020 c
->blockingto
= timeout
;
7021 for (j
= 0; j
< numkeys
; j
++) {
7022 /* Add the key in the client structure, to map clients -> keys */
7023 c
->blockingkeys
[j
] = keys
[j
];
7024 incrRefCount(keys
[j
]);
7026 /* And in the other "side", to map keys -> clients */
7027 de
= dictFind(c
->db
->blockingkeys
,keys
[j
]);
7031 /* For every key we take a list of clients blocked for it */
7033 retval
= dictAdd(c
->db
->blockingkeys
,keys
[j
],l
);
7034 incrRefCount(keys
[j
]);
7035 assert(retval
== DICT_OK
);
7037 l
= dictGetEntryVal(de
);
7039 listAddNodeTail(l
,c
);
7041 /* Mark the client as a blocked client */
7042 c
->flags
|= REDIS_BLOCKED
;
7043 server
.blpop_blocked_clients
++;
7046 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
7047 static void unblockClientWaitingData(redisClient
*c
) {
7052 assert(c
->blockingkeys
!= NULL
);
7053 /* The client may wait for multiple keys, so unblock it for every key. */
7054 for (j
= 0; j
< c
->blockingkeysnum
; j
++) {
7055 /* Remove this client from the list of clients waiting for this key. */
7056 de
= dictFind(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7058 l
= dictGetEntryVal(de
);
7059 listDelNode(l
,listSearchKey(l
,c
));
7060 /* If the list is empty we need to remove it to avoid wasting memory */
7061 if (listLength(l
) == 0)
7062 dictDelete(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7063 decrRefCount(c
->blockingkeys
[j
]);
7065 /* Cleanup the client structure */
7066 zfree(c
->blockingkeys
);
7067 c
->blockingkeys
= NULL
;
7068 c
->flags
&= (~REDIS_BLOCKED
);
7069 server
.blpop_blocked_clients
--;
7070 /* We want to process data if there is some command waiting
7071 * in the input buffer. Note that this is safe even if
7072 * unblockClientWaitingData() gets called from freeClient() because
7073 * freeClient() will be smart enough to call this function
7074 * *after* c->querybuf was set to NULL. */
7075 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
7078 /* This should be called from any function PUSHing into lists.
7079 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
7080 * 'ele' is the element pushed.
7082 * If the function returns 0 there was no client waiting for a list push
7085 * If the function returns 1 there was a client waiting for a list push
7086 * against this key, the element was passed to this client thus it's not
7087 * needed to actually add it to the list and the caller should return asap. */
7088 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
7089 struct dictEntry
*de
;
7090 redisClient
*receiver
;
7094 de
= dictFind(c
->db
->blockingkeys
,key
);
7095 if (de
== NULL
) return 0;
7096 l
= dictGetEntryVal(de
);
7099 receiver
= ln
->value
;
7101 addReplySds(receiver
,sdsnew("*2\r\n"));
7102 addReplyBulk(receiver
,key
);
7103 addReplyBulk(receiver
,ele
);
7104 unblockClientWaitingData(receiver
);
7108 /* Blocking RPOP/LPOP */
7109 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
7114 for (j
= 1; j
< c
->argc
-1; j
++) {
7115 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
7117 if (o
->type
!= REDIS_LIST
) {
7118 addReply(c
,shared
.wrongtypeerr
);
7121 list
*list
= o
->ptr
;
7122 if (listLength(list
) != 0) {
7123 /* If the list contains elements fall back to the usual
7124 * non-blocking POP operation */
7125 robj
*argv
[2], **orig_argv
;
7128 /* We need to alter the command arguments before to call
7129 * popGenericCommand() as the command takes a single key. */
7130 orig_argv
= c
->argv
;
7131 orig_argc
= c
->argc
;
7132 argv
[1] = c
->argv
[j
];
7136 /* Also the return value is different, we need to output
7137 * the multi bulk reply header and the key name. The
7138 * "real" command will add the last element (the value)
7139 * for us. If this souds like an hack to you it's just
7140 * because it is... */
7141 addReplySds(c
,sdsnew("*2\r\n"));
7142 addReplyBulk(c
,argv
[1]);
7143 popGenericCommand(c
,where
);
7145 /* Fix the client structure with the original stuff */
7146 c
->argv
= orig_argv
;
7147 c
->argc
= orig_argc
;
7153 /* If the list is empty or the key does not exists we must block */
7154 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
7155 if (timeout
> 0) timeout
+= time(NULL
);
7156 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
7159 static void blpopCommand(redisClient
*c
) {
7160 blockingPopGenericCommand(c
,REDIS_HEAD
);
7163 static void brpopCommand(redisClient
*c
) {
7164 blockingPopGenericCommand(c
,REDIS_TAIL
);
7167 /* =============================== Replication ============================= */
7169 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7170 ssize_t nwritten
, ret
= size
;
7171 time_t start
= time(NULL
);
7175 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
7176 nwritten
= write(fd
,ptr
,size
);
7177 if (nwritten
== -1) return -1;
7181 if ((time(NULL
)-start
) > timeout
) {
7189 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7190 ssize_t nread
, totread
= 0;
7191 time_t start
= time(NULL
);
7195 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
7196 nread
= read(fd
,ptr
,size
);
7197 if (nread
== -1) return -1;
7202 if ((time(NULL
)-start
) > timeout
) {
7210 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7217 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
7220 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
7231 static void syncCommand(redisClient
*c
) {
7232 /* ignore SYNC if aleady slave or in monitor mode */
7233 if (c
->flags
& REDIS_SLAVE
) return;
7235 /* SYNC can't be issued when the server has pending data to send to
7236 * the client about already issued commands. We need a fresh reply
7237 * buffer registering the differences between the BGSAVE and the current
7238 * dataset, so that we can copy to other slaves if needed. */
7239 if (listLength(c
->reply
) != 0) {
7240 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
7244 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
7245 /* Here we need to check if there is a background saving operation
7246 * in progress, or if it is required to start one */
7247 if (server
.bgsavechildpid
!= -1) {
7248 /* Ok a background save is in progress. Let's check if it is a good
7249 * one for replication, i.e. if there is another slave that is
7250 * registering differences since the server forked to save */
7255 listRewind(server
.slaves
,&li
);
7256 while((ln
= listNext(&li
))) {
7258 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
7261 /* Perfect, the server is already registering differences for
7262 * another slave. Set the right state, and copy the buffer. */
7263 listRelease(c
->reply
);
7264 c
->reply
= listDup(slave
->reply
);
7265 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7266 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
7268 /* No way, we need to wait for the next BGSAVE in order to
7269 * register differences */
7270 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7271 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
7274 /* Ok we don't have a BGSAVE in progress, let's start one */
7275 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
7276 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7277 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
7278 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
7281 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7284 c
->flags
|= REDIS_SLAVE
;
7286 listAddNodeTail(server
.slaves
,c
);
7290 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
7291 redisClient
*slave
= privdata
;
7293 REDIS_NOTUSED(mask
);
7294 char buf
[REDIS_IOBUF_LEN
];
7295 ssize_t nwritten
, buflen
;
7297 if (slave
->repldboff
== 0) {
7298 /* Write the bulk write count before to transfer the DB. In theory here
7299 * we don't know how much room there is in the output buffer of the
7300 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
7301 * operations) will never be smaller than the few bytes we need. */
7304 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
7306 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
7314 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
7315 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
7317 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
7318 (buflen
== 0) ? "premature EOF" : strerror(errno
));
7322 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
7323 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
7328 slave
->repldboff
+= nwritten
;
7329 if (slave
->repldboff
== slave
->repldbsize
) {
7330 close(slave
->repldbfd
);
7331 slave
->repldbfd
= -1;
7332 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7333 slave
->replstate
= REDIS_REPL_ONLINE
;
7334 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
7335 sendReplyToClient
, slave
) == AE_ERR
) {
7339 addReplySds(slave
,sdsempty());
7340 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
7344 /* This function is called at the end of every backgrond saving.
7345 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
7346 * otherwise REDIS_ERR is passed to the function.
7348 * The goal of this function is to handle slaves waiting for a successful
7349 * background saving in order to perform non-blocking synchronization. */
7350 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
7352 int startbgsave
= 0;
7355 listRewind(server
.slaves
,&li
);
7356 while((ln
= listNext(&li
))) {
7357 redisClient
*slave
= ln
->value
;
7359 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
7361 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7362 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
7363 struct redis_stat buf
;
7365 if (bgsaveerr
!= REDIS_OK
) {
7367 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
7370 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
7371 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
7373 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
7376 slave
->repldboff
= 0;
7377 slave
->repldbsize
= buf
.st_size
;
7378 slave
->replstate
= REDIS_REPL_SEND_BULK
;
7379 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7380 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
7387 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7390 listRewind(server
.slaves
,&li
);
7391 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
7392 while((ln
= listNext(&li
))) {
7393 redisClient
*slave
= ln
->value
;
7395 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
7402 static int syncWithMaster(void) {
7403 char buf
[1024], tmpfile
[256], authcmd
[1024];
7405 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
7406 int dfd
, maxtries
= 5;
7409 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
7414 /* AUTH with the master if required. */
7415 if(server
.masterauth
) {
7416 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
7417 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
7419 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
7423 /* Read the AUTH result. */
7424 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7426 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
7430 if (buf
[0] != '+') {
7432 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
7437 /* Issue the SYNC command */
7438 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
7440 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
7444 /* Read the bulk write count */
7445 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7447 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
7451 if (buf
[0] != '$') {
7453 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
7456 dumpsize
= strtol(buf
+1,NULL
,10);
7457 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
7458 /* Read the bulk write data on a temp file */
7460 snprintf(tmpfile
,256,
7461 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
7462 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
7463 if (dfd
!= -1) break;
7468 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
7472 int nread
, nwritten
;
7474 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
7476 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
7482 nwritten
= write(dfd
,buf
,nread
);
7483 if (nwritten
== -1) {
7484 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
7492 if (rename(tmpfile
,server
.dbfilename
) == -1) {
7493 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
7499 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
7500 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
7504 server
.master
= createClient(fd
);
7505 server
.master
->flags
|= REDIS_MASTER
;
7506 server
.master
->authenticated
= 1;
7507 server
.replstate
= REDIS_REPL_CONNECTED
;
7511 static void slaveofCommand(redisClient
*c
) {
7512 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
7513 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
7514 if (server
.masterhost
) {
7515 sdsfree(server
.masterhost
);
7516 server
.masterhost
= NULL
;
7517 if (server
.master
) freeClient(server
.master
);
7518 server
.replstate
= REDIS_REPL_NONE
;
7519 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
7522 sdsfree(server
.masterhost
);
7523 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
7524 server
.masterport
= atoi(c
->argv
[2]->ptr
);
7525 if (server
.master
) freeClient(server
.master
);
7526 server
.replstate
= REDIS_REPL_CONNECT
;
7527 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
7528 server
.masterhost
, server
.masterport
);
7530 addReply(c
,shared
.ok
);
7533 /* ============================ Maxmemory directive ======================== */
7535 /* Try to free one object form the pre-allocated objects free list.
7536 * This is useful under low mem conditions as by default we take 1 million
7537 * free objects allocated. On success REDIS_OK is returned, otherwise
7539 static int tryFreeOneObjectFromFreelist(void) {
7542 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
7543 if (listLength(server
.objfreelist
)) {
7544 listNode
*head
= listFirst(server
.objfreelist
);
7545 o
= listNodeValue(head
);
7546 listDelNode(server
.objfreelist
,head
);
7547 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7551 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7556 /* This function gets called when 'maxmemory' is set on the config file to limit
7557 * the max memory used by the server, and we are out of memory.
7558 * This function will try to, in order:
7560 * - Free objects from the free list
7561 * - Try to remove keys with an EXPIRE set
7563 * It is not possible to free enough memory to reach used-memory < maxmemory
7564 * the server will start refusing commands that will enlarge even more the
7567 static void freeMemoryIfNeeded(void) {
7568 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
7569 int j
, k
, freed
= 0;
7571 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
7572 for (j
= 0; j
< server
.dbnum
; j
++) {
7574 robj
*minkey
= NULL
;
7575 struct dictEntry
*de
;
7577 if (dictSize(server
.db
[j
].expires
)) {
7579 /* From a sample of three keys drop the one nearest to
7580 * the natural expire */
7581 for (k
= 0; k
< 3; k
++) {
7584 de
= dictGetRandomKey(server
.db
[j
].expires
);
7585 t
= (time_t) dictGetEntryVal(de
);
7586 if (minttl
== -1 || t
< minttl
) {
7587 minkey
= dictGetEntryKey(de
);
7591 deleteKey(server
.db
+j
,minkey
);
7594 if (!freed
) return; /* nothing to free... */
7598 /* ============================== Append Only file ========================== */
7600 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
7601 sds buf
= sdsempty();
7607 /* The DB this command was targetting is not the same as the last command
7608 * we appendend. To issue a SELECT command is needed. */
7609 if (dictid
!= server
.appendseldb
) {
7612 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
7613 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
7614 (unsigned long)strlen(seldb
),seldb
);
7615 server
.appendseldb
= dictid
;
7618 /* "Fix" the argv vector if the command is EXPIRE. We want to translate
7619 * EXPIREs into EXPIREATs calls */
7620 if (cmd
->proc
== expireCommand
) {
7623 tmpargv
[0] = createStringObject("EXPIREAT",8);
7624 tmpargv
[1] = argv
[1];
7625 incrRefCount(argv
[1]);
7626 when
= time(NULL
)+strtol(argv
[2]->ptr
,NULL
,10);
7627 tmpargv
[2] = createObject(REDIS_STRING
,
7628 sdscatprintf(sdsempty(),"%ld",when
));
7632 /* Append the actual command */
7633 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
7634 for (j
= 0; j
< argc
; j
++) {
7637 o
= getDecodedObject(o
);
7638 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
7639 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
7640 buf
= sdscatlen(buf
,"\r\n",2);
7644 /* Free the objects from the modified argv for EXPIREAT */
7645 if (cmd
->proc
== expireCommand
) {
7646 for (j
= 0; j
< 3; j
++)
7647 decrRefCount(argv
[j
]);
7650 /* We want to perform a single write. This should be guaranteed atomic
7651 * at least if the filesystem we are writing is a real physical one.
7652 * While this will save us against the server being killed I don't think
7653 * there is much to do about the whole server stopping for power problems
7655 nwritten
= write(server
.appendfd
,buf
,sdslen(buf
));
7656 if (nwritten
!= (signed)sdslen(buf
)) {
7657 /* Ooops, we are in troubles. The best thing to do for now is
7658 * to simply exit instead to give the illusion that everything is
7659 * working as expected. */
7660 if (nwritten
== -1) {
7661 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
7663 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
7667 /* If a background append only file rewriting is in progress we want to
7668 * accumulate the differences between the child DB and the current one
7669 * in a buffer, so that when the child process will do its work we
7670 * can append the differences to the new append only file. */
7671 if (server
.bgrewritechildpid
!= -1)
7672 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
7676 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
7677 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
7678 now
-server
.lastfsync
> 1))
7680 fsync(server
.appendfd
); /* Let's try to get this data on the disk */
7681 server
.lastfsync
= now
;
7685 /* In Redis commands are always executed in the context of a client, so in
7686 * order to load the append only file we need to create a fake client. */
7687 static struct redisClient
*createFakeClient(void) {
7688 struct redisClient
*c
= zmalloc(sizeof(*c
));
7692 c
->querybuf
= sdsempty();
7696 /* We set the fake client as a slave waiting for the synchronization
7697 * so that Redis will not try to send replies to this client. */
7698 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7699 c
->reply
= listCreate();
7700 listSetFreeMethod(c
->reply
,decrRefCount
);
7701 listSetDupMethod(c
->reply
,dupClientReplyValue
);
7705 static void freeFakeClient(struct redisClient
*c
) {
7706 sdsfree(c
->querybuf
);
7707 listRelease(c
->reply
);
7711 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
7712 * error (the append only file is zero-length) REDIS_ERR is returned. On
7713 * fatal error an error message is logged and the program exists. */
7714 int loadAppendOnlyFile(char *filename
) {
7715 struct redisClient
*fakeClient
;
7716 FILE *fp
= fopen(filename
,"r");
7717 struct redis_stat sb
;
7718 unsigned long long loadedkeys
= 0;
7720 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
7724 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
7728 fakeClient
= createFakeClient();
7735 struct redisCommand
*cmd
;
7737 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
7743 if (buf
[0] != '*') goto fmterr
;
7745 argv
= zmalloc(sizeof(robj
*)*argc
);
7746 for (j
= 0; j
< argc
; j
++) {
7747 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
7748 if (buf
[0] != '$') goto fmterr
;
7749 len
= strtol(buf
+1,NULL
,10);
7750 argsds
= sdsnewlen(NULL
,len
);
7751 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
7752 argv
[j
] = createObject(REDIS_STRING
,argsds
);
7753 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
7756 /* Command lookup */
7757 cmd
= lookupCommand(argv
[0]->ptr
);
7759 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
7762 /* Try object encoding */
7763 if (cmd
->flags
& REDIS_CMD_BULK
)
7764 argv
[argc
-1] = tryObjectEncoding(argv
[argc
-1]);
7765 /* Run the command in the context of a fake client */
7766 fakeClient
->argc
= argc
;
7767 fakeClient
->argv
= argv
;
7768 cmd
->proc(fakeClient
);
7769 /* Discard the reply objects list from the fake client */
7770 while(listLength(fakeClient
->reply
))
7771 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
7772 /* Clean up, ready for the next command */
7773 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
7775 /* Handle swapping while loading big datasets when VM is on */
7777 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
7778 while (zmalloc_used_memory() > server
.vm_max_memory
) {
7779 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
7784 freeFakeClient(fakeClient
);
7789 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
7791 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
7795 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
7799 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
7800 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
7804 /* Avoid the incr/decr ref count business if possible to help
7805 * copy-on-write (we are often in a child process when this function
7807 * Also makes sure that key objects don't get incrRefCount-ed when VM
7809 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
7810 obj
= getDecodedObject(obj
);
7813 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
));
7814 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
;
7815 if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0)
7817 if (fwrite("\r\n",2,1,fp
) == 0) goto err
;
7818 if (decrrc
) decrRefCount(obj
);
7821 if (decrrc
) decrRefCount(obj
);
7825 /* Write binary-safe string into a file in the bulkformat
7826 * $<count>\r\n<payload>\r\n */
7827 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
7830 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
);
7831 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7832 if (len
&& fwrite(s
,len
,1,fp
) == 0) return 0;
7833 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
7837 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
7838 static int fwriteBulkDouble(FILE *fp
, double d
) {
7839 char buf
[128], dbuf
[128];
7841 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
7842 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
7843 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7844 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
7848 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
7849 static int fwriteBulkLong(FILE *fp
, long l
) {
7850 char buf
[128], lbuf
[128];
7852 snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
);
7853 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2);
7854 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7855 if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0;
7859 /* Write a sequence of commands able to fully rebuild the dataset into
7860 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
7861 static int rewriteAppendOnlyFile(char *filename
) {
7862 dictIterator
*di
= NULL
;
7867 time_t now
= time(NULL
);
7869 /* Note that we have to use a different temp name here compared to the
7870 * one used by rewriteAppendOnlyFileBackground() function. */
7871 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
7872 fp
= fopen(tmpfile
,"w");
7874 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
7877 for (j
= 0; j
< server
.dbnum
; j
++) {
7878 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
7879 redisDb
*db
= server
.db
+j
;
7881 if (dictSize(d
) == 0) continue;
7882 di
= dictGetIterator(d
);
7888 /* SELECT the new DB */
7889 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
7890 if (fwriteBulkLong(fp
,j
) == 0) goto werr
;
7892 /* Iterate this DB writing every entry */
7893 while((de
= dictNext(di
)) != NULL
) {
7898 key
= dictGetEntryKey(de
);
7899 /* If the value for this key is swapped, load a preview in memory.
7900 * We use a "swapped" flag to remember if we need to free the
7901 * value object instead to just increment the ref count anyway
7902 * in order to avoid copy-on-write of pages if we are forked() */
7903 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
7904 key
->storage
== REDIS_VM_SWAPPING
) {
7905 o
= dictGetEntryVal(de
);
7908 o
= vmPreviewObject(key
);
7911 expiretime
= getExpire(db
,key
);
7913 /* Save the key and associated value */
7914 if (o
->type
== REDIS_STRING
) {
7915 /* Emit a SET command */
7916 char cmd
[]="*3\r\n$3\r\nSET\r\n";
7917 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7919 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7920 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
7921 } else if (o
->type
== REDIS_LIST
) {
7922 /* Emit the RPUSHes needed to rebuild the list */
7923 list
*list
= o
->ptr
;
7927 listRewind(list
,&li
);
7928 while((ln
= listNext(&li
))) {
7929 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
7930 robj
*eleobj
= listNodeValue(ln
);
7932 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7933 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7934 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
7936 } else if (o
->type
== REDIS_SET
) {
7937 /* Emit the SADDs needed to rebuild the set */
7939 dictIterator
*di
= dictGetIterator(set
);
7942 while((de
= dictNext(di
)) != NULL
) {
7943 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
7944 robj
*eleobj
= dictGetEntryKey(de
);
7946 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7947 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7948 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
7950 dictReleaseIterator(di
);
7951 } else if (o
->type
== REDIS_ZSET
) {
7952 /* Emit the ZADDs needed to rebuild the sorted set */
7954 dictIterator
*di
= dictGetIterator(zs
->dict
);
7957 while((de
= dictNext(di
)) != NULL
) {
7958 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
7959 robj
*eleobj
= dictGetEntryKey(de
);
7960 double *score
= dictGetEntryVal(de
);
7962 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7963 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7964 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
7965 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
7967 dictReleaseIterator(di
);
7968 } else if (o
->type
== REDIS_HASH
) {
7969 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
7971 /* Emit the HSETs needed to rebuild the hash */
7972 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
7973 unsigned char *p
= zipmapRewind(o
->ptr
);
7974 unsigned char *field
, *val
;
7975 unsigned int flen
, vlen
;
7977 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
7978 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7979 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7980 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
7982 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
7986 dictIterator
*di
= dictGetIterator(o
->ptr
);
7989 while((de
= dictNext(di
)) != NULL
) {
7990 robj
*field
= dictGetEntryKey(de
);
7991 robj
*val
= dictGetEntryVal(de
);
7993 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7994 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7995 if (fwriteBulkObject(fp
,field
) == -1) return -1;
7996 if (fwriteBulkObject(fp
,val
) == -1) return -1;
7998 dictReleaseIterator(di
);
8003 /* Save the expire time */
8004 if (expiretime
!= -1) {
8005 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
8006 /* If this key is already expired skip it */
8007 if (expiretime
< now
) continue;
8008 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8009 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8010 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
;
8012 if (swapped
) decrRefCount(o
);
8014 dictReleaseIterator(di
);
8017 /* Make sure data will not remain on the OS's output buffers */
8022 /* Use RENAME to make sure the DB file is changed atomically only
8023 * if the generate DB file is ok. */
8024 if (rename(tmpfile
,filename
) == -1) {
8025 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
8029 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
8035 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
8036 if (di
) dictReleaseIterator(di
);
8040 /* This is how rewriting of the append only file in background works:
8042 * 1) The user calls BGREWRITEAOF
8043 * 2) Redis calls this function, that forks():
8044 * 2a) the child rewrite the append only file in a temp file.
8045 * 2b) the parent accumulates differences in server.bgrewritebuf.
8046 * 3) When the child finished '2a' exists.
8047 * 4) The parent will trap the exit code, if it's OK, will append the
8048 * data accumulated into server.bgrewritebuf into the temp file, and
8049 * finally will rename(2) the temp file in the actual file name.
8050 * The the new file is reopened as the new append only file. Profit!
8052 static int rewriteAppendOnlyFileBackground(void) {
8055 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
8056 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
8057 if ((childpid
= fork()) == 0) {
8061 if (server
.vm_enabled
) vmReopenSwapFile();
8063 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
8064 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
8071 if (childpid
== -1) {
8072 redisLog(REDIS_WARNING
,
8073 "Can't rewrite append only file in background: fork: %s",
8077 redisLog(REDIS_NOTICE
,
8078 "Background append only file rewriting started by pid %d",childpid
);
8079 server
.bgrewritechildpid
= childpid
;
8080 updateDictResizePolicy();
8081 /* We set appendseldb to -1 in order to force the next call to the
8082 * feedAppendOnlyFile() to issue a SELECT command, so the differences
8083 * accumulated by the parent into server.bgrewritebuf will start
8084 * with a SELECT statement and it will be safe to merge. */
8085 server
.appendseldb
= -1;
8088 return REDIS_OK
; /* unreached */
8091 static void bgrewriteaofCommand(redisClient
*c
) {
8092 if (server
.bgrewritechildpid
!= -1) {
8093 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
8096 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
8097 char *status
= "+Background append only file rewriting started\r\n";
8098 addReplySds(c
,sdsnew(status
));
8100 addReply(c
,shared
.err
);
8104 static void aofRemoveTempFile(pid_t childpid
) {
8107 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
8111 /* Virtual Memory is composed mainly of two subsystems:
8112 * - Blocking Virutal Memory
8113 * - Threaded Virtual Memory I/O
8114 * The two parts are not fully decoupled, but functions are split among two
8115 * different sections of the source code (delimited by comments) in order to
8116 * make more clear what functionality is about the blocking VM and what about
8117 * the threaded (not blocking) VM.
8121 * Redis VM is a blocking VM (one that blocks reading swapped values from
8122 * disk into memory when a value swapped out is needed in memory) that is made
8123 * unblocking by trying to examine the command argument vector in order to
8124 * load in background values that will likely be needed in order to exec
8125 * the command. The command is executed only once all the relevant keys
8126 * are loaded into memory.
8128 * This basically is almost as simple of a blocking VM, but almost as parallel
8129 * as a fully non-blocking VM.
8132 /* =================== Virtual Memory - Blocking Side ====================== */
8134 /* substitute the first occurrence of '%p' with the process pid in the
8135 * swap file name. */
8136 static void expandVmSwapFilename(void) {
8137 char *p
= strstr(server
.vm_swap_file
,"%p");
8143 new = sdscat(new,server
.vm_swap_file
);
8144 new = sdscatprintf(new,"%ld",(long) getpid());
8145 new = sdscat(new,p
+2);
8146 zfree(server
.vm_swap_file
);
8147 server
.vm_swap_file
= new;
8150 static void vmInit(void) {
8155 if (server
.vm_max_threads
!= 0)
8156 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
8158 expandVmSwapFilename();
8159 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
8160 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
8161 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
8163 if (server
.vm_fp
== NULL
) {
8164 redisLog(REDIS_WARNING
,
8165 "Impossible to open the swap file: %s. Exiting.",
8169 server
.vm_fd
= fileno(server
.vm_fp
);
8170 server
.vm_next_page
= 0;
8171 server
.vm_near_pages
= 0;
8172 server
.vm_stats_used_pages
= 0;
8173 server
.vm_stats_swapped_objects
= 0;
8174 server
.vm_stats_swapouts
= 0;
8175 server
.vm_stats_swapins
= 0;
8176 totsize
= server
.vm_pages
*server
.vm_page_size
;
8177 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
8178 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
8179 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
8183 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
8185 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
8186 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
8187 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
8188 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
8190 /* Initialize threaded I/O (used by Virtual Memory) */
8191 server
.io_newjobs
= listCreate();
8192 server
.io_processing
= listCreate();
8193 server
.io_processed
= listCreate();
8194 server
.io_ready_clients
= listCreate();
8195 pthread_mutex_init(&server
.io_mutex
,NULL
);
8196 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
8197 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
8198 server
.io_active_threads
= 0;
8199 if (pipe(pipefds
) == -1) {
8200 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
8204 server
.io_ready_pipe_read
= pipefds
[0];
8205 server
.io_ready_pipe_write
= pipefds
[1];
8206 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
8207 /* LZF requires a lot of stack */
8208 pthread_attr_init(&server
.io_threads_attr
);
8209 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
8210 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
8211 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
8212 /* Listen for events in the threaded I/O pipe */
8213 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
8214 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
8215 oom("creating file event");
8218 /* Mark the page as used */
8219 static void vmMarkPageUsed(off_t page
) {
8220 off_t byte
= page
/8;
8222 redisAssert(vmFreePage(page
) == 1);
8223 server
.vm_bitmap
[byte
] |= 1<<bit
;
8226 /* Mark N contiguous pages as used, with 'page' being the first. */
8227 static void vmMarkPagesUsed(off_t page
, off_t count
) {
8230 for (j
= 0; j
< count
; j
++)
8231 vmMarkPageUsed(page
+j
);
8232 server
.vm_stats_used_pages
+= count
;
8233 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
8234 (long long)count
, (long long)page
);
8237 /* Mark the page as free */
8238 static void vmMarkPageFree(off_t page
) {
8239 off_t byte
= page
/8;
8241 redisAssert(vmFreePage(page
) == 0);
8242 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
8245 /* Mark N contiguous pages as free, with 'page' being the first. */
8246 static void vmMarkPagesFree(off_t page
, off_t count
) {
8249 for (j
= 0; j
< count
; j
++)
8250 vmMarkPageFree(page
+j
);
8251 server
.vm_stats_used_pages
-= count
;
8252 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
8253 (long long)count
, (long long)page
);
8256 /* Test if the page is free */
8257 static int vmFreePage(off_t page
) {
8258 off_t byte
= page
/8;
8260 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
8263 /* Find N contiguous free pages storing the first page of the cluster in *first.
8264 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
8265 * REDIS_ERR is returned.
8267 * This function uses a simple algorithm: we try to allocate
8268 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
8269 * again from the start of the swap file searching for free spaces.
8271 * If it looks pretty clear that there are no free pages near our offset
8272 * we try to find less populated places doing a forward jump of
8273 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
8274 * without hurry, and then we jump again and so forth...
8276 * This function can be improved using a free list to avoid to guess
8277 * too much, since we could collect data about freed pages.
8279 * note: I implemented this function just after watching an episode of
8280 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
8282 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
8283 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
8285 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
8286 server
.vm_near_pages
= 0;
8287 server
.vm_next_page
= 0;
8289 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
8290 base
= server
.vm_next_page
;
8292 while(offset
< server
.vm_pages
) {
8293 off_t
this = base
+offset
;
8295 /* If we overflow, restart from page zero */
8296 if (this >= server
.vm_pages
) {
8297 this -= server
.vm_pages
;
8299 /* Just overflowed, what we found on tail is no longer
8300 * interesting, as it's no longer contiguous. */
8304 if (vmFreePage(this)) {
8305 /* This is a free page */
8307 /* Already got N free pages? Return to the caller, with success */
8309 *first
= this-(n
-1);
8310 server
.vm_next_page
= this+1;
8311 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
8315 /* The current one is not a free page */
8319 /* Fast-forward if the current page is not free and we already
8320 * searched enough near this place. */
8322 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
8323 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
8325 /* Note that even if we rewind after the jump, we are don't need
8326 * to make sure numfree is set to zero as we only jump *if* it
8327 * is set to zero. */
8329 /* Otherwise just check the next page */
8336 /* Write the specified object at the specified page of the swap file */
8337 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
8338 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8339 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8340 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8341 redisLog(REDIS_WARNING
,
8342 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
8346 rdbSaveObject(server
.vm_fp
,o
);
8347 fflush(server
.vm_fp
);
8348 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8352 /* Swap the 'val' object relative to 'key' into disk. Store all the information
8353 * needed to later retrieve the object into the key object.
8354 * If we can't find enough contiguous empty pages to swap the object on disk
8355 * REDIS_ERR is returned. */
8356 static int vmSwapObjectBlocking(robj
*key
, robj
*val
) {
8357 off_t pages
= rdbSavedObjectPages(val
,NULL
);
8360 assert(key
->storage
== REDIS_VM_MEMORY
);
8361 assert(key
->refcount
== 1);
8362 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
;
8363 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
;
8364 key
->vm
.page
= page
;
8365 key
->vm
.usedpages
= pages
;
8366 key
->storage
= REDIS_VM_SWAPPED
;
8367 key
->vtype
= val
->type
;
8368 decrRefCount(val
); /* Deallocate the object from memory. */
8369 vmMarkPagesUsed(page
,pages
);
8370 redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)",
8371 (unsigned char*) key
->ptr
,
8372 (unsigned long long) page
, (unsigned long long) pages
);
8373 server
.vm_stats_swapped_objects
++;
8374 server
.vm_stats_swapouts
++;
8378 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
8381 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8382 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8383 redisLog(REDIS_WARNING
,
8384 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
8388 o
= rdbLoadObject(type
,server
.vm_fp
);
8390 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
8393 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8397 /* Load the value object relative to the 'key' object from swap to memory.
8398 * The newly allocated object is returned.
8400 * If preview is true the unserialized object is returned to the caller but
8401 * no changes are made to the key object, nor the pages are marked as freed */
8402 static robj
*vmGenericLoadObject(robj
*key
, int preview
) {
8405 redisAssert(key
->storage
== REDIS_VM_SWAPPED
|| key
->storage
== REDIS_VM_LOADING
);
8406 val
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
);
8408 key
->storage
= REDIS_VM_MEMORY
;
8409 key
->vm
.atime
= server
.unixtime
;
8410 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8411 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk",
8412 (unsigned char*) key
->ptr
);
8413 server
.vm_stats_swapped_objects
--;
8415 redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk",
8416 (unsigned char*) key
->ptr
);
8418 server
.vm_stats_swapins
++;
8422 /* Plain object loading, from swap to memory */
8423 static robj
*vmLoadObject(robj
*key
) {
8424 /* If we are loading the object in background, stop it, we
8425 * need to load this object synchronously ASAP. */
8426 if (key
->storage
== REDIS_VM_LOADING
)
8427 vmCancelThreadedIOJob(key
);
8428 return vmGenericLoadObject(key
,0);
8431 /* Just load the value on disk, without to modify the key.
8432 * This is useful when we want to perform some operation on the value
8433 * without to really bring it from swap to memory, like while saving the
8434 * dataset or rewriting the append only log. */
8435 static robj
*vmPreviewObject(robj
*key
) {
8436 return vmGenericLoadObject(key
,1);
8439 /* How a good candidate is this object for swapping?
8440 * The better candidate it is, the greater the returned value.
8442 * Currently we try to perform a fast estimation of the object size in
8443 * memory, and combine it with aging informations.
8445 * Basically swappability = idle-time * log(estimated size)
8447 * Bigger objects are preferred over smaller objects, but not
8448 * proportionally, this is why we use the logarithm. This algorithm is
8449 * just a first try and will probably be tuned later. */
8450 static double computeObjectSwappability(robj
*o
) {
8451 time_t age
= server
.unixtime
- o
->vm
.atime
;
8455 struct dictEntry
*de
;
8458 if (age
<= 0) return 0;
8461 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
8464 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
8469 listNode
*ln
= listFirst(l
);
8471 asize
= sizeof(list
);
8473 robj
*ele
= ln
->value
;
8476 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8477 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8479 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
8484 z
= (o
->type
== REDIS_ZSET
);
8485 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
8487 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8488 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
8493 de
= dictGetRandomKey(d
);
8494 ele
= dictGetEntryKey(de
);
8495 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8496 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8498 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8499 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
8503 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8504 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
8505 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
8506 unsigned int klen
, vlen
;
8507 unsigned char *key
, *val
;
8509 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
8513 asize
= len
*(klen
+vlen
+3);
8514 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
8516 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8521 de
= dictGetRandomKey(d
);
8522 ele
= dictGetEntryKey(de
);
8523 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8524 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8526 ele
= dictGetEntryVal(de
);
8527 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8528 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8530 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8535 return (double)age
*log(1+asize
);
8538 /* Try to swap an object that's a good candidate for swapping.
8539 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
8540 * to swap any object at all.
8542 * If 'usethreaded' is true, Redis will try to swap the object in background
8543 * using I/O threads. */
8544 static int vmSwapOneObject(int usethreads
) {
8546 struct dictEntry
*best
= NULL
;
8547 double best_swappability
= 0;
8548 redisDb
*best_db
= NULL
;
8551 for (j
= 0; j
< server
.dbnum
; j
++) {
8552 redisDb
*db
= server
.db
+j
;
8553 /* Why maxtries is set to 100?
8554 * Because this way (usually) we'll find 1 object even if just 1% - 2%
8555 * are swappable objects */
8558 if (dictSize(db
->dict
) == 0) continue;
8559 for (i
= 0; i
< 5; i
++) {
8561 double swappability
;
8563 if (maxtries
) maxtries
--;
8564 de
= dictGetRandomKey(db
->dict
);
8565 key
= dictGetEntryKey(de
);
8566 val
= dictGetEntryVal(de
);
8567 /* Only swap objects that are currently in memory.
8569 * Also don't swap shared objects if threaded VM is on, as we
8570 * try to ensure that the main thread does not touch the
8571 * object while the I/O thread is using it, but we can't
8572 * control other keys without adding additional mutex. */
8573 if (key
->storage
!= REDIS_VM_MEMORY
||
8574 (server
.vm_max_threads
!= 0 && val
->refcount
!= 1)) {
8575 if (maxtries
) i
--; /* don't count this try */
8578 swappability
= computeObjectSwappability(val
);
8579 if (!best
|| swappability
> best_swappability
) {
8581 best_swappability
= swappability
;
8586 if (best
== NULL
) return REDIS_ERR
;
8587 key
= dictGetEntryKey(best
);
8588 val
= dictGetEntryVal(best
);
8590 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
8591 key
->ptr
, best_swappability
);
8593 /* Unshare the key if needed */
8594 if (key
->refcount
> 1) {
8595 robj
*newkey
= dupStringObject(key
);
8597 key
= dictGetEntryKey(best
) = newkey
;
8601 vmSwapObjectThreaded(key
,val
,best_db
);
8604 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
8605 dictGetEntryVal(best
) = NULL
;
8613 static int vmSwapOneObjectBlocking() {
8614 return vmSwapOneObject(0);
8617 static int vmSwapOneObjectThreaded() {
8618 return vmSwapOneObject(1);
8621 /* Return true if it's safe to swap out objects in a given moment.
8622 * Basically we don't want to swap objects out while there is a BGSAVE
8623 * or a BGAEOREWRITE running in backgroud. */
8624 static int vmCanSwapOut(void) {
8625 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
8628 /* Delete a key if swapped. Returns 1 if the key was found, was swapped
8629 * and was deleted. Otherwise 0 is returned. */
8630 static int deleteIfSwapped(redisDb
*db
, robj
*key
) {
8634 if ((de
= dictFind(db
->dict
,key
)) == NULL
) return 0;
8635 foundkey
= dictGetEntryKey(de
);
8636 if (foundkey
->storage
== REDIS_VM_MEMORY
) return 0;
8641 /* =================== Virtual Memory - Threaded I/O ======================= */
8643 static void freeIOJob(iojob
*j
) {
8644 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
8645 j
->type
== REDIS_IOJOB_DO_SWAP
||
8646 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
8647 decrRefCount(j
->val
);
8648 /* We don't decrRefCount the j->key field as we did't incremented
8649 * the count creating IO Jobs. This is because the key field here is
8650 * just used as an indentifier and if a key is removed the Job should
8651 * never be touched again. */
8655 /* Every time a thread finished a Job, it writes a byte into the write side
8656 * of an unix pipe in order to "awake" the main thread, and this function
8658 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
8662 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
8664 REDIS_NOTUSED(mask
);
8665 REDIS_NOTUSED(privdata
);
8667 /* For every byte we read in the read side of the pipe, there is one
8668 * I/O job completed to process. */
8669 while((retval
= read(fd
,buf
,1)) == 1) {
8673 struct dictEntry
*de
;
8675 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
8677 /* Get the processed element (the oldest one) */
8679 assert(listLength(server
.io_processed
) != 0);
8680 if (toprocess
== -1) {
8681 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
8682 if (toprocess
<= 0) toprocess
= 1;
8684 ln
= listFirst(server
.io_processed
);
8686 listDelNode(server
.io_processed
,ln
);
8688 /* If this job is marked as canceled, just ignore it */
8693 /* Post process it in the main thread, as there are things we
8694 * can do just here to avoid race conditions and/or invasive locks */
8695 redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
);
8696 de
= dictFind(j
->db
->dict
,j
->key
);
8698 key
= dictGetEntryKey(de
);
8699 if (j
->type
== REDIS_IOJOB_LOAD
) {
8702 /* Key loaded, bring it at home */
8703 key
->storage
= REDIS_VM_MEMORY
;
8704 key
->vm
.atime
= server
.unixtime
;
8705 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8706 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
8707 (unsigned char*) key
->ptr
);
8708 server
.vm_stats_swapped_objects
--;
8709 server
.vm_stats_swapins
++;
8710 dictGetEntryVal(de
) = j
->val
;
8711 incrRefCount(j
->val
);
8714 /* Handle clients waiting for this key to be loaded. */
8715 handleClientsBlockedOnSwappedKey(db
,key
);
8716 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
8717 /* Now we know the amount of pages required to swap this object.
8718 * Let's find some space for it, and queue this task again
8719 * rebranded as REDIS_IOJOB_DO_SWAP. */
8720 if (!vmCanSwapOut() ||
8721 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
8723 /* Ooops... no space or we can't swap as there is
8724 * a fork()ed Redis trying to save stuff on disk. */
8726 key
->storage
= REDIS_VM_MEMORY
; /* undo operation */
8728 /* Note that we need to mark this pages as used now,
8729 * if the job will be canceled, we'll mark them as freed
8731 vmMarkPagesUsed(j
->page
,j
->pages
);
8732 j
->type
= REDIS_IOJOB_DO_SWAP
;
8737 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
8740 /* Key swapped. We can finally free some memory. */
8741 if (key
->storage
!= REDIS_VM_SWAPPING
) {
8742 printf("key->storage: %d\n",key
->storage
);
8743 printf("key->name: %s\n",(char*)key
->ptr
);
8744 printf("key->refcount: %d\n",key
->refcount
);
8745 printf("val: %p\n",(void*)j
->val
);
8746 printf("val->type: %d\n",j
->val
->type
);
8747 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
8749 redisAssert(key
->storage
== REDIS_VM_SWAPPING
);
8750 val
= dictGetEntryVal(de
);
8751 key
->vm
.page
= j
->page
;
8752 key
->vm
.usedpages
= j
->pages
;
8753 key
->storage
= REDIS_VM_SWAPPED
;
8754 key
->vtype
= j
->val
->type
;
8755 decrRefCount(val
); /* Deallocate the object from memory. */
8756 dictGetEntryVal(de
) = NULL
;
8757 redisLog(REDIS_DEBUG
,
8758 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
8759 (unsigned char*) key
->ptr
,
8760 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
8761 server
.vm_stats_swapped_objects
++;
8762 server
.vm_stats_swapouts
++;
8764 /* Put a few more swap requests in queue if we are still
8766 if (trytoswap
&& vmCanSwapOut() &&
8767 zmalloc_used_memory() > server
.vm_max_memory
)
8772 more
= listLength(server
.io_newjobs
) <
8773 (unsigned) server
.vm_max_threads
;
8775 /* Don't waste CPU time if swappable objects are rare. */
8776 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
8784 if (processed
== toprocess
) return;
8786 if (retval
< 0 && errno
!= EAGAIN
) {
8787 redisLog(REDIS_WARNING
,
8788 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
8793 static void lockThreadedIO(void) {
8794 pthread_mutex_lock(&server
.io_mutex
);
8797 static void unlockThreadedIO(void) {
8798 pthread_mutex_unlock(&server
.io_mutex
);
8801 /* Remove the specified object from the threaded I/O queue if still not
8802 * processed, otherwise make sure to flag it as canceled. */
8803 static void vmCancelThreadedIOJob(robj
*o
) {
8805 server
.io_newjobs
, /* 0 */
8806 server
.io_processing
, /* 1 */
8807 server
.io_processed
/* 2 */
8811 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
8814 /* Search for a matching key in one of the queues */
8815 for (i
= 0; i
< 3; i
++) {
8819 listRewind(lists
[i
],&li
);
8820 while ((ln
= listNext(&li
)) != NULL
) {
8821 iojob
*job
= ln
->value
;
8823 if (job
->canceled
) continue; /* Skip this, already canceled. */
8824 if (job
->key
== o
) {
8825 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n",
8826 (void*)job
, (char*)o
->ptr
, job
->type
, i
);
8827 /* Mark the pages as free since the swap didn't happened
8828 * or happened but is now discarded. */
8829 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
8830 vmMarkPagesFree(job
->page
,job
->pages
);
8831 /* Cancel the job. It depends on the list the job is
8834 case 0: /* io_newjobs */
8835 /* If the job was yet not processed the best thing to do
8836 * is to remove it from the queue at all */
8838 listDelNode(lists
[i
],ln
);
8840 case 1: /* io_processing */
8841 /* Oh Shi- the thread is messing with the Job:
8843 * Probably it's accessing the object if this is a
8844 * PREPARE_SWAP or DO_SWAP job.
8845 * If it's a LOAD job it may be reading from disk and
8846 * if we don't wait for the job to terminate before to
8847 * cancel it, maybe in a few microseconds data can be
8848 * corrupted in this pages. So the short story is:
8850 * Better to wait for the job to move into the
8851 * next queue (processed)... */
8853 /* We try again and again until the job is completed. */
8855 /* But let's wait some time for the I/O thread
8856 * to finish with this job. After all this condition
8857 * should be very rare. */
8860 case 2: /* io_processed */
8861 /* The job was already processed, that's easy...
8862 * just mark it as canceled so that we'll ignore it
8863 * when processing completed jobs. */
8867 /* Finally we have to adjust the storage type of the object
8868 * in order to "UNDO" the operaiton. */
8869 if (o
->storage
== REDIS_VM_LOADING
)
8870 o
->storage
= REDIS_VM_SWAPPED
;
8871 else if (o
->storage
== REDIS_VM_SWAPPING
)
8872 o
->storage
= REDIS_VM_MEMORY
;
8879 assert(1 != 1); /* We should never reach this */
8882 static void *IOThreadEntryPoint(void *arg
) {
8887 pthread_detach(pthread_self());
8889 /* Get a new job to process */
8891 if (listLength(server
.io_newjobs
) == 0) {
8892 /* No new jobs in queue, exit. */
8893 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
8894 (long) pthread_self());
8895 server
.io_active_threads
--;
8899 ln
= listFirst(server
.io_newjobs
);
8901 listDelNode(server
.io_newjobs
,ln
);
8902 /* Add the job in the processing queue */
8903 j
->thread
= pthread_self();
8904 listAddNodeTail(server
.io_processing
,j
);
8905 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
8907 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
8908 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
8910 /* Process the Job */
8911 if (j
->type
== REDIS_IOJOB_LOAD
) {
8912 j
->val
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
);
8913 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
8914 FILE *fp
= fopen("/dev/null","w+");
8915 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
8917 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
8918 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
8922 /* Done: insert the job into the processed queue */
8923 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
8924 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
8926 listDelNode(server
.io_processing
,ln
);
8927 listAddNodeTail(server
.io_processed
,j
);
8930 /* Signal the main thread there is new stuff to process */
8931 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
8933 return NULL
; /* never reached */
8936 static void spawnIOThread(void) {
8938 sigset_t mask
, omask
;
8942 sigaddset(&mask
,SIGCHLD
);
8943 sigaddset(&mask
,SIGHUP
);
8944 sigaddset(&mask
,SIGPIPE
);
8945 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
8946 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
8947 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
8951 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
8952 server
.io_active_threads
++;
8955 /* We need to wait for the last thread to exit before we are able to
8956 * fork() in order to BGSAVE or BGREWRITEAOF. */
8957 static void waitEmptyIOJobsQueue(void) {
8959 int io_processed_len
;
8962 if (listLength(server
.io_newjobs
) == 0 &&
8963 listLength(server
.io_processing
) == 0 &&
8964 server
.io_active_threads
== 0)
8969 /* While waiting for empty jobs queue condition we post-process some
8970 * finshed job, as I/O threads may be hanging trying to write against
8971 * the io_ready_pipe_write FD but there are so much pending jobs that
8973 io_processed_len
= listLength(server
.io_processed
);
8975 if (io_processed_len
) {
8976 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
8977 usleep(1000); /* 1 millisecond */
8979 usleep(10000); /* 10 milliseconds */
8984 static void vmReopenSwapFile(void) {
8985 /* Note: we don't close the old one as we are in the child process
8986 * and don't want to mess at all with the original file object. */
8987 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
8988 if (server
.vm_fp
== NULL
) {
8989 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
8990 server
.vm_swap_file
);
8993 server
.vm_fd
= fileno(server
.vm_fp
);
8996 /* This function must be called while with threaded IO locked */
8997 static void queueIOJob(iojob
*j
) {
8998 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
8999 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
9000 listAddNodeTail(server
.io_newjobs
,j
);
9001 if (server
.io_active_threads
< server
.vm_max_threads
)
9005 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
9008 assert(key
->storage
== REDIS_VM_MEMORY
);
9009 assert(key
->refcount
== 1);
9011 j
= zmalloc(sizeof(*j
));
9012 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
9018 j
->thread
= (pthread_t
) -1;
9019 key
->storage
= REDIS_VM_SWAPPING
;
9027 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
9029 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
9030 * If there is not already a job loading the key, it is craeted.
9031 * The key is added to the io_keys list in the client structure, and also
9032 * in the hash table mapping swapped keys to waiting clients, that is,
9033 * server.io_waited_keys. */
9034 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
9035 struct dictEntry
*de
;
9039 /* If the key does not exist or is already in RAM we don't need to
9040 * block the client at all. */
9041 de
= dictFind(c
->db
->dict
,key
);
9042 if (de
== NULL
) return 0;
9043 o
= dictGetEntryKey(de
);
9044 if (o
->storage
== REDIS_VM_MEMORY
) {
9046 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
9047 /* We were swapping the key, undo it! */
9048 vmCancelThreadedIOJob(o
);
9052 /* OK: the key is either swapped, or being loaded just now. */
9054 /* Add the key to the list of keys this client is waiting for.
9055 * This maps clients to keys they are waiting for. */
9056 listAddNodeTail(c
->io_keys
,key
);
9059 /* Add the client to the swapped keys => clients waiting map. */
9060 de
= dictFind(c
->db
->io_keys
,key
);
9064 /* For every key we take a list of clients blocked for it */
9066 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
9068 assert(retval
== DICT_OK
);
9070 l
= dictGetEntryVal(de
);
9072 listAddNodeTail(l
,c
);
9074 /* Are we already loading the key from disk? If not create a job */
9075 if (o
->storage
== REDIS_VM_SWAPPED
) {
9078 o
->storage
= REDIS_VM_LOADING
;
9079 j
= zmalloc(sizeof(*j
));
9080 j
->type
= REDIS_IOJOB_LOAD
;
9083 j
->key
->vtype
= o
->vtype
;
9084 j
->page
= o
->vm
.page
;
9087 j
->thread
= (pthread_t
) -1;
9095 /* Preload keys needed for the ZUNION and ZINTER commands. */
9096 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
) {
9098 num
= atoi(c
->argv
[2]->ptr
);
9099 for (i
= 0; i
< num
; i
++) {
9100 waitForSwappedKey(c
,c
->argv
[3+i
]);
9104 /* Is this client attempting to run a command against swapped keys?
9105 * If so, block it ASAP, load the keys in background, then resume it.
9107 * The important idea about this function is that it can fail! If keys will
9108 * still be swapped when the client is resumed, this key lookups will
9109 * just block loading keys from disk. In practical terms this should only
9110 * happen with SORT BY command or if there is a bug in this function.
9112 * Return 1 if the client is marked as blocked, 0 if the client can
9113 * continue as the keys it is going to access appear to be in memory. */
9114 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
) {
9117 if (cmd
->vm_preload_proc
!= NULL
) {
9118 cmd
->vm_preload_proc(c
);
9120 if (cmd
->vm_firstkey
== 0) return 0;
9121 last
= cmd
->vm_lastkey
;
9122 if (last
< 0) last
= c
->argc
+last
;
9123 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
)
9124 waitForSwappedKey(c
,c
->argv
[j
]);
9127 /* If the client was blocked for at least one key, mark it as blocked. */
9128 if (listLength(c
->io_keys
)) {
9129 c
->flags
|= REDIS_IO_WAIT
;
9130 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
9131 server
.vm_blocked_clients
++;
9138 /* Remove the 'key' from the list of blocked keys for a given client.
9140 * The function returns 1 when there are no longer blocking keys after
9141 * the current one was removed (and the client can be unblocked). */
9142 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
9146 struct dictEntry
*de
;
9148 /* Remove the key from the list of keys this client is waiting for. */
9149 listRewind(c
->io_keys
,&li
);
9150 while ((ln
= listNext(&li
)) != NULL
) {
9151 if (compareStringObjects(ln
->value
,key
) == 0) {
9152 listDelNode(c
->io_keys
,ln
);
9158 /* Remove the client form the key => waiting clients map. */
9159 de
= dictFind(c
->db
->io_keys
,key
);
9161 l
= dictGetEntryVal(de
);
9162 ln
= listSearchKey(l
,c
);
9165 if (listLength(l
) == 0)
9166 dictDelete(c
->db
->io_keys
,key
);
9168 return listLength(c
->io_keys
) == 0;
9171 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
9172 struct dictEntry
*de
;
9177 de
= dictFind(db
->io_keys
,key
);
9180 l
= dictGetEntryVal(de
);
9181 len
= listLength(l
);
9182 /* Note: we can't use something like while(listLength(l)) as the list
9183 * can be freed by the calling function when we remove the last element. */
9186 redisClient
*c
= ln
->value
;
9188 if (dontWaitForSwappedKey(c
,key
)) {
9189 /* Put the client in the list of clients ready to go as we
9190 * loaded all the keys about it. */
9191 listAddNodeTail(server
.io_ready_clients
,c
);
9196 /* =========================== Remote Configuration ========================= */
9198 static void configSetCommand(redisClient
*c
) {
9199 robj
*o
= getDecodedObject(c
->argv
[3]);
9200 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
9201 zfree(server
.dbfilename
);
9202 server
.dbfilename
= zstrdup(o
->ptr
);
9203 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
9204 zfree(server
.requirepass
);
9205 server
.requirepass
= zstrdup(o
->ptr
);
9206 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
9207 zfree(server
.masterauth
);
9208 server
.masterauth
= zstrdup(o
->ptr
);
9209 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
9210 server
.maxmemory
= strtoll(o
->ptr
, NULL
, 10);
9212 addReplySds(c
,sdscatprintf(sdsempty(),
9213 "-ERR not supported CONFIG parameter %s\r\n",
9214 (char*)c
->argv
[2]->ptr
));
9219 addReply(c
,shared
.ok
);
9222 static void configGetCommand(redisClient
*c
) {
9223 robj
*o
= getDecodedObject(c
->argv
[2]);
9224 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
9225 char *pattern
= o
->ptr
;
9229 decrRefCount(lenobj
);
9231 if (stringmatch(pattern
,"dbfilename",0)) {
9232 addReplyBulkCString(c
,"dbfilename");
9233 addReplyBulkCString(c
,server
.dbfilename
);
9236 if (stringmatch(pattern
,"requirepass",0)) {
9237 addReplyBulkCString(c
,"requirepass");
9238 addReplyBulkCString(c
,server
.requirepass
);
9241 if (stringmatch(pattern
,"masterauth",0)) {
9242 addReplyBulkCString(c
,"masterauth");
9243 addReplyBulkCString(c
,server
.masterauth
);
9246 if (stringmatch(pattern
,"maxmemory",0)) {
9249 snprintf(buf
,128,"%llu\n",server
.maxmemory
);
9250 addReplyBulkCString(c
,"maxmemory");
9251 addReplyBulkCString(c
,buf
);
9255 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
9258 static void configCommand(redisClient
*c
) {
9259 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
9260 if (c
->argc
!= 4) goto badarity
;
9261 configSetCommand(c
);
9262 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
9263 if (c
->argc
!= 3) goto badarity
;
9264 configGetCommand(c
);
9265 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
9266 if (c
->argc
!= 2) goto badarity
;
9267 server
.stat_numcommands
= 0;
9268 server
.stat_numconnections
= 0;
9269 server
.stat_expiredkeys
= 0;
9270 server
.stat_starttime
= time(NULL
);
9271 addReply(c
,shared
.ok
);
9273 addReplySds(c
,sdscatprintf(sdsempty(),
9274 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
9279 addReplySds(c
,sdscatprintf(sdsempty(),
9280 "-ERR Wrong number of arguments for CONFIG %s\r\n",
9281 (char*) c
->argv
[1]->ptr
));
9284 /* =========================== Pubsub implementation ======================== */
9286 static void freePubsubPattern(void *p
) {
9287 pubsubPattern
*pat
= p
;
9289 decrRefCount(pat
->pattern
);
9293 static int listMatchPubsubPattern(void *a
, void *b
) {
9294 pubsubPattern
*pa
= a
, *pb
= b
;
9296 return (pa
->client
== pb
->client
) &&
9297 (compareStringObjects(pa
->pattern
,pb
->pattern
) == 0);
9300 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
9301 * 0 if the client was already subscribed to that channel. */
9302 static int pubsubSubscribeChannel(redisClient
*c
, robj
*channel
) {
9303 struct dictEntry
*de
;
9304 list
*clients
= NULL
;
9307 /* Add the channel to the client -> channels hash table */
9308 if (dictAdd(c
->pubsub_channels
,channel
,NULL
) == DICT_OK
) {
9310 incrRefCount(channel
);
9311 /* Add the client to the channel -> list of clients hash table */
9312 de
= dictFind(server
.pubsub_channels
,channel
);
9314 clients
= listCreate();
9315 dictAdd(server
.pubsub_channels
,channel
,clients
);
9316 incrRefCount(channel
);
9318 clients
= dictGetEntryVal(de
);
9320 listAddNodeTail(clients
,c
);
9322 /* Notify the client */
9323 addReply(c
,shared
.mbulk3
);
9324 addReply(c
,shared
.subscribebulk
);
9325 addReplyBulk(c
,channel
);
9326 addReplyLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
9330 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
9331 * 0 if the client was not subscribed to the specified channel. */
9332 static int pubsubUnsubscribeChannel(redisClient
*c
, robj
*channel
, int notify
) {
9333 struct dictEntry
*de
;
9338 /* Remove the channel from the client -> channels hash table */
9339 incrRefCount(channel
); /* channel may be just a pointer to the same object
9340 we have in the hash tables. Protect it... */
9341 if (dictDelete(c
->pubsub_channels
,channel
) == DICT_OK
) {
9343 /* Remove the client from the channel -> clients list hash table */
9344 de
= dictFind(server
.pubsub_channels
,channel
);
9346 clients
= dictGetEntryVal(de
);
9347 ln
= listSearchKey(clients
,c
);
9349 listDelNode(clients
,ln
);
9350 if (listLength(clients
) == 0) {
9351 /* Free the list and associated hash entry at all if this was
9352 * the latest client, so that it will be possible to abuse
9353 * Redis PUBSUB creating millions of channels. */
9354 dictDelete(server
.pubsub_channels
,channel
);
9357 /* Notify the client */
9359 addReply(c
,shared
.mbulk3
);
9360 addReply(c
,shared
.unsubscribebulk
);
9361 addReplyBulk(c
,channel
);
9362 addReplyLong(c
,dictSize(c
->pubsub_channels
)+
9363 listLength(c
->pubsub_patterns
));
9366 decrRefCount(channel
); /* it is finally safe to release it */
9370 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */
9371 static int pubsubSubscribePattern(redisClient
*c
, robj
*pattern
) {
9374 if (listSearchKey(c
->pubsub_patterns
,pattern
) == NULL
) {
9377 listAddNodeTail(c
->pubsub_patterns
,pattern
);
9378 incrRefCount(pattern
);
9379 pat
= zmalloc(sizeof(*pat
));
9380 pat
->pattern
= getDecodedObject(pattern
);
9382 listAddNodeTail(server
.pubsub_patterns
,pat
);
9384 /* Notify the client */
9385 addReply(c
,shared
.mbulk3
);
9386 addReply(c
,shared
.psubscribebulk
);
9387 addReplyBulk(c
,pattern
);
9388 addReplyLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
9392 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
9393 * 0 if the client was not subscribed to the specified channel. */
9394 static int pubsubUnsubscribePattern(redisClient
*c
, robj
*pattern
, int notify
) {
9399 incrRefCount(pattern
); /* Protect the object. May be the same we remove */
9400 if ((ln
= listSearchKey(c
->pubsub_patterns
,pattern
)) != NULL
) {
9402 listDelNode(c
->pubsub_patterns
,ln
);
9404 pat
.pattern
= pattern
;
9405 ln
= listSearchKey(server
.pubsub_patterns
,&pat
);
9406 listDelNode(server
.pubsub_patterns
,ln
);
9408 /* Notify the client */
9410 addReply(c
,shared
.mbulk3
);
9411 addReply(c
,shared
.punsubscribebulk
);
9412 addReplyBulk(c
,pattern
);
9413 addReplyLong(c
,dictSize(c
->pubsub_channels
)+
9414 listLength(c
->pubsub_patterns
));
9416 decrRefCount(pattern
);
9420 /* Unsubscribe from all the channels. Return the number of channels the
9421 * client was subscribed from. */
9422 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
) {
9423 dictIterator
*di
= dictGetIterator(c
->pubsub_channels
);
9427 while((de
= dictNext(di
)) != NULL
) {
9428 robj
*channel
= dictGetEntryKey(de
);
9430 count
+= pubsubUnsubscribeChannel(c
,channel
,notify
);
9432 dictReleaseIterator(di
);
9436 /* Unsubscribe from all the patterns. Return the number of patterns the
9437 * client was subscribed from. */
9438 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
) {
9443 listRewind(c
->pubsub_patterns
,&li
);
9444 while ((ln
= listNext(&li
)) != NULL
) {
9445 robj
*pattern
= ln
->value
;
9447 count
+= pubsubUnsubscribePattern(c
,pattern
,notify
);
9452 /* Publish a message */
9453 static int pubsubPublishMessage(robj
*channel
, robj
*message
) {
9455 struct dictEntry
*de
;
9459 /* Send to clients listening for that channel */
9460 de
= dictFind(server
.pubsub_channels
,channel
);
9462 list
*list
= dictGetEntryVal(de
);
9466 listRewind(list
,&li
);
9467 while ((ln
= listNext(&li
)) != NULL
) {
9468 redisClient
*c
= ln
->value
;
9470 addReply(c
,shared
.mbulk3
);
9471 addReply(c
,shared
.messagebulk
);
9472 addReplyBulk(c
,channel
);
9473 addReplyBulk(c
,message
);
9477 /* Send to clients listening to matching channels */
9478 if (listLength(server
.pubsub_patterns
)) {
9479 listRewind(server
.pubsub_patterns
,&li
);
9480 channel
= getDecodedObject(channel
);
9481 while ((ln
= listNext(&li
)) != NULL
) {
9482 pubsubPattern
*pat
= ln
->value
;
9484 if (stringmatchlen((char*)pat
->pattern
->ptr
,
9485 sdslen(pat
->pattern
->ptr
),
9486 (char*)channel
->ptr
,
9487 sdslen(channel
->ptr
),0)) {
9488 addReply(pat
->client
,shared
.mbulk3
);
9489 addReply(pat
->client
,shared
.messagebulk
);
9490 addReplyBulk(pat
->client
,channel
);
9491 addReplyBulk(pat
->client
,message
);
9495 decrRefCount(channel
);
9500 static void subscribeCommand(redisClient
*c
) {
9503 for (j
= 1; j
< c
->argc
; j
++)
9504 pubsubSubscribeChannel(c
,c
->argv
[j
]);
9507 static void unsubscribeCommand(redisClient
*c
) {
9509 pubsubUnsubscribeAllChannels(c
,1);
9514 for (j
= 1; j
< c
->argc
; j
++)
9515 pubsubUnsubscribeChannel(c
,c
->argv
[j
],1);
9519 static void psubscribeCommand(redisClient
*c
) {
9522 for (j
= 1; j
< c
->argc
; j
++)
9523 pubsubSubscribePattern(c
,c
->argv
[j
]);
9526 static void punsubscribeCommand(redisClient
*c
) {
9528 pubsubUnsubscribeAllPatterns(c
,1);
9533 for (j
= 1; j
< c
->argc
; j
++)
9534 pubsubUnsubscribePattern(c
,c
->argv
[j
],1);
9538 static void publishCommand(redisClient
*c
) {
9539 int receivers
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]);
9540 addReplyLong(c
,receivers
);
9543 /* ================================= Debugging ============================== */
9545 static void debugCommand(redisClient
*c
) {
9546 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
9548 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
9549 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
9550 addReply(c
,shared
.err
);
9554 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
9555 addReply(c
,shared
.err
);
9558 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
9559 addReply(c
,shared
.ok
);
9560 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
9562 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
9563 addReply(c
,shared
.err
);
9566 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
9567 addReply(c
,shared
.ok
);
9568 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
9569 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9573 addReply(c
,shared
.nokeyerr
);
9576 key
= dictGetEntryKey(de
);
9577 val
= dictGetEntryVal(de
);
9578 if (!server
.vm_enabled
|| (key
->storage
== REDIS_VM_MEMORY
||
9579 key
->storage
== REDIS_VM_SWAPPING
)) {
9583 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
9584 strenc
= strencoding
[val
->encoding
];
9586 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
9589 addReplySds(c
,sdscatprintf(sdsempty(),
9590 "+Key at:%p refcount:%d, value at:%p refcount:%d "
9591 "encoding:%s serializedlength:%lld\r\n",
9592 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
,
9593 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
9595 addReplySds(c
,sdscatprintf(sdsempty(),
9596 "+Key at:%p refcount:%d, value swapped at: page %llu "
9597 "using %llu pages\r\n",
9598 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
,
9599 (unsigned long long) key
->vm
.usedpages
));
9601 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapin") && c
->argc
== 3) {
9602 lookupKeyRead(c
->db
,c
->argv
[2]);
9603 addReply(c
,shared
.ok
);
9604 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
9605 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9608 if (!server
.vm_enabled
) {
9609 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
9613 addReply(c
,shared
.nokeyerr
);
9616 key
= dictGetEntryKey(de
);
9617 val
= dictGetEntryVal(de
);
9618 /* If the key is shared we want to create a copy */
9619 if (key
->refcount
> 1) {
9620 robj
*newkey
= dupStringObject(key
);
9622 key
= dictGetEntryKey(de
) = newkey
;
9625 if (key
->storage
!= REDIS_VM_MEMORY
) {
9626 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
9627 } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
9628 dictGetEntryVal(de
) = NULL
;
9629 addReply(c
,shared
.ok
);
9631 addReply(c
,shared
.err
);
9634 addReplySds(c
,sdsnew(
9635 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n"));
9639 static void _redisAssert(char *estr
, char *file
, int line
) {
9640 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
9641 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true\n",file
,line
,estr
);
9642 #ifdef HAVE_BACKTRACE
9643 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
9648 /* =================================== Main! ================================ */
9651 int linuxOvercommitMemoryValue(void) {
9652 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
9656 if (fgets(buf
,64,fp
) == NULL
) {
9665 void linuxOvercommitMemoryWarning(void) {
9666 if (linuxOvercommitMemoryValue() == 0) {
9667 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low condition memory. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
9670 #endif /* __linux__ */
9672 static void daemonize(void) {
9676 if (fork() != 0) exit(0); /* parent exits */
9677 setsid(); /* create a new session */
9679 /* Every output goes to /dev/null. If Redis is daemonized but
9680 * the 'logfile' is set to 'stdout' in the configuration file
9681 * it will not log at all. */
9682 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
9683 dup2(fd
, STDIN_FILENO
);
9684 dup2(fd
, STDOUT_FILENO
);
9685 dup2(fd
, STDERR_FILENO
);
9686 if (fd
> STDERR_FILENO
) close(fd
);
9688 /* Try to write the pid file */
9689 fp
= fopen(server
.pidfile
,"w");
9691 fprintf(fp
,"%d\n",getpid());
9696 static void version() {
9697 printf("Redis server version %s\n", REDIS_VERSION
);
9701 static void usage() {
9702 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
9703 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
9707 int main(int argc
, char **argv
) {
9712 if (strcmp(argv
[1], "-v") == 0 ||
9713 strcmp(argv
[1], "--version") == 0) version();
9714 if (strcmp(argv
[1], "--help") == 0) usage();
9715 resetServerSaveParams();
9716 loadServerConfig(argv
[1]);
9717 } else if ((argc
> 2)) {
9720 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
9722 if (server
.daemonize
) daemonize();
9724 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
9726 linuxOvercommitMemoryWarning();
9729 if (server
.appendonly
) {
9730 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
9731 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
9733 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
9734 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
9736 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
9737 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
9739 aeDeleteEventLoop(server
.el
);
9743 /* ============================= Backtrace support ========================= */
9745 #ifdef HAVE_BACKTRACE
9746 static char *findFuncName(void *pointer
, unsigned long *offset
);
9748 static void *getMcontextEip(ucontext_t
*uc
) {
9749 #if defined(__FreeBSD__)
9750 return (void*) uc
->uc_mcontext
.mc_eip
;
9751 #elif defined(__dietlibc__)
9752 return (void*) uc
->uc_mcontext
.eip
;
9753 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
9755 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
9757 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
9759 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
9760 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
9761 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
9763 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
9765 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
9766 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
9767 #elif defined(__ia64__) /* Linux IA64 */
9768 return (void*) uc
->uc_mcontext
.sc_ip
;
9774 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
9776 char **messages
= NULL
;
9777 int i
, trace_size
= 0;
9778 unsigned long offset
=0;
9779 ucontext_t
*uc
= (ucontext_t
*) secret
;
9781 REDIS_NOTUSED(info
);
9783 redisLog(REDIS_WARNING
,
9784 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
9785 infostring
= genRedisInfoString();
9786 redisLog(REDIS_WARNING
, "%s",infostring
);
9787 /* It's not safe to sdsfree() the returned string under memory
9788 * corruption conditions. Let it leak as we are going to abort */
9790 trace_size
= backtrace(trace
, 100);
9791 /* overwrite sigaction with caller's address */
9792 if (getMcontextEip(uc
) != NULL
) {
9793 trace
[1] = getMcontextEip(uc
);
9795 messages
= backtrace_symbols(trace
, trace_size
);
9797 for (i
=1; i
<trace_size
; ++i
) {
9798 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
9800 p
= strchr(messages
[i
],'+');
9801 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
9802 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
9804 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
9807 /* free(messages); Don't call free() with possibly corrupted memory. */
9811 static void setupSigSegvAction(void) {
9812 struct sigaction act
;
9814 sigemptyset (&act
.sa_mask
);
9815 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
9816 * is used. Otherwise, sa_handler is used */
9817 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
9818 act
.sa_sigaction
= segvHandler
;
9819 sigaction (SIGSEGV
, &act
, NULL
);
9820 sigaction (SIGBUS
, &act
, NULL
);
9821 sigaction (SIGFPE
, &act
, NULL
);
9822 sigaction (SIGILL
, &act
, NULL
);
9823 sigaction (SIGBUS
, &act
, NULL
);
9827 #include "staticsymbols.h"
9828 /* This function try to convert a pointer into a function name. It's used in
9829 * oreder to provide a backtrace under segmentation fault that's able to
9830 * display functions declared as static (otherwise the backtrace is useless). */
9831 static char *findFuncName(void *pointer
, unsigned long *offset
){
9833 unsigned long off
, minoff
= 0;
9835 /* Try to match against the Symbol with the smallest offset */
9836 for (i
=0; symsTable
[i
].pointer
; i
++) {
9837 unsigned long lp
= (unsigned long) pointer
;
9839 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
9840 off
=lp
-symsTable
[i
].pointer
;
9841 if (ret
< 0 || off
< minoff
) {
9847 if (ret
== -1) return NULL
;
9849 return symsTable
[ret
].name
;
9851 #else /* HAVE_BACKTRACE */
9852 static void setupSigSegvAction(void) {
9854 #endif /* HAVE_BACKTRACE */