2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "2.1.1"
45 #endif /* HAVE_BACKTRACE */
53 #include <arpa/inet.h>
57 #include <sys/resource.h>
65 #include "solarisfixes.h"
69 #include "ae.h" /* Event driven programming library */
70 #include "sds.h" /* Dynamic safe strings */
71 #include "anet.h" /* Networking the easy way */
72 #include "dict.h" /* Hash tables */
73 #include "adlist.h" /* Linked lists */
74 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
75 #include "lzf.h" /* LZF compression library */
76 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
77 #include "zipmap.h" /* Compact dictionary-alike data structure */
78 #include "ziplist.h" /* Compact list data structure */
79 #include "sha1.h" /* SHA1 is used for DEBUG DIGEST */
80 #include "release.h" /* Release and/or git repository information */
86 /* Static server configuration */
87 #define REDIS_SERVERPORT 6379 /* TCP port */
88 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
89 #define REDIS_IOBUF_LEN 1024
90 #define REDIS_LOADBUF_LEN 1024
91 #define REDIS_STATIC_ARGS 8
92 #define REDIS_DEFAULT_DBNUM 16
93 #define REDIS_CONFIGLINE_MAX 1024
94 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
95 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
96 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* lookup 10 expires per loop */
97 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
98 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
100 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
101 #define REDIS_WRITEV_THRESHOLD 3
102 /* Max number of iovecs used for each writev call */
103 #define REDIS_WRITEV_IOVEC_COUNT 256
105 /* Hash table parameters */
106 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
109 #define REDIS_CMD_BULK 1 /* Bulk write command */
110 #define REDIS_CMD_INLINE 2 /* Inline command */
111 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
112 this flags will return an error when the 'maxmemory' option is set in the
113 config file and the server is using more than maxmemory bytes of memory.
114 In short this commands are denied on low memory conditions. */
115 #define REDIS_CMD_DENYOOM 4
116 #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */
119 #define REDIS_STRING 0
124 #define REDIS_VMPOINTER 8
126 /* Objects encoding. Some kind of objects like Strings and Hashes can be
127 * internally represented in multiple ways. The 'encoding' field of the object
128 * is set to one of this fields for this object. */
129 #define REDIS_ENCODING_RAW 0 /* Raw representation */
130 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
131 #define REDIS_ENCODING_HT 2 /* Encoded as hash table */
132 #define REDIS_ENCODING_ZIPMAP 3 /* Encoded as zipmap */
133 #define REDIS_ENCODING_LIST 4 /* Encoded as zipmap */
134 #define REDIS_ENCODING_ZIPLIST 5 /* Encoded as ziplist */
136 static char* strencoding
[] = {
137 "raw", "int", "hashtable", "zipmap", "list", "ziplist"
140 /* Object types only used for dumping to disk */
141 #define REDIS_EXPIRETIME 253
142 #define REDIS_SELECTDB 254
143 #define REDIS_EOF 255
145 /* Defines related to the dump file format. To store 32 bits lengths for short
146 * keys requires a lot of space, so we check the most significant 2 bits of
147 * the first byte to interpreter the length:
149 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
150 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
151 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
152 * 11|000000 this means: specially encoded object will follow. The six bits
153 * number specify the kind of object that follows.
154 * See the REDIS_RDB_ENC_* defines.
156 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
157 * values, will fit inside. */
158 #define REDIS_RDB_6BITLEN 0
159 #define REDIS_RDB_14BITLEN 1
160 #define REDIS_RDB_32BITLEN 2
161 #define REDIS_RDB_ENCVAL 3
162 #define REDIS_RDB_LENERR UINT_MAX
164 /* When a length of a string object stored on disk has the first two bits
165 * set, the remaining two bits specify a special encoding for the object
166 * accordingly to the following defines: */
167 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
168 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
169 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
170 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
172 /* Virtual memory object->where field. */
173 #define REDIS_VM_MEMORY 0 /* The object is on memory */
174 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
175 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
176 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
178 /* Virtual memory static configuration stuff.
179 * Check vmFindContiguousPages() to know more about this magic numbers. */
180 #define REDIS_VM_MAX_NEAR_PAGES 65536
181 #define REDIS_VM_MAX_RANDOM_JUMP 4096
182 #define REDIS_VM_MAX_THREADS 32
183 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
184 /* The following is the *percentage* of completed I/O jobs to process when the
185 * handelr is called. While Virtual Memory I/O operations are performed by
186 * threads, this operations must be processed by the main thread when completed
187 * in order to take effect. */
188 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
191 #define REDIS_SLAVE 1 /* This client is a slave server */
192 #define REDIS_MASTER 2 /* This client is a master server */
193 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
194 #define REDIS_MULTI 8 /* This client is in a MULTI context */
195 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
196 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
197 #define REDIS_DIRTY_CAS 64 /* Watched keys modified. EXEC will fail. */
199 /* Slave replication state - slave side */
200 #define REDIS_REPL_NONE 0 /* No active replication */
201 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
202 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
204 /* Slave replication state - from the point of view of master
205 * Note that in SEND_BULK and ONLINE state the slave receives new updates
206 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
207 * to start the next background saving in order to send updates to it. */
208 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
209 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
210 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
211 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
213 /* List related stuff */
217 /* Sort operations */
218 #define REDIS_SORT_GET 0
219 #define REDIS_SORT_ASC 1
220 #define REDIS_SORT_DESC 2
221 #define REDIS_SORTKEY_MAX 1024
224 #define REDIS_DEBUG 0
225 #define REDIS_VERBOSE 1
226 #define REDIS_NOTICE 2
227 #define REDIS_WARNING 3
229 /* Anti-warning macro... */
230 #define REDIS_NOTUSED(V) ((void) V)
232 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
233 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
235 /* Append only defines */
236 #define APPENDFSYNC_NO 0
237 #define APPENDFSYNC_ALWAYS 1
238 #define APPENDFSYNC_EVERYSEC 2
240 /* Zip structure related defaults */
241 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
242 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
243 #define REDIS_LIST_MAX_ZIPLIST_ENTRIES 1024
244 #define REDIS_LIST_MAX_ZIPLIST_VALUE 32
246 /* We can print the stacktrace, so our assert is defined this way: */
247 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
248 #define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1)
249 static void _redisAssert(char *estr
, char *file
, int line
);
250 static void _redisPanic(char *msg
, char *file
, int line
);
252 /*================================= Data types ============================== */
254 /* A redis object, that is a type able to hold a string / list / set */
256 /* The actual Redis Object */
257 typedef struct redisObject
{
259 unsigned storage
:2; /* REDIS_VM_MEMORY or REDIS_VM_SWAPPING */
261 unsigned lru
:22; /* lru time (relative to server.lruclock) */
264 /* VM fields are only allocated if VM is active, otherwise the
265 * object allocation function will just allocate
266 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
267 * Redis without VM active will not have any overhead. */
270 /* The VM pointer structure - identifies an object in the swap file.
272 * This object is stored in place of the value
273 * object in the main key->value hash table representing a database.
274 * Note that the first fields (type, storage) are the same as the redisObject
275 * structure so that vmPointer strucuters can be accessed even when casted
276 * as redisObject structures.
278 * This is useful as we don't know if a value object is or not on disk, but we
279 * are always able to read obj->storage to check this. For vmPointer
280 * structures "type" is set to REDIS_VMPOINTER (even if without this field
281 * is still possible to check the kind of object from the value of 'storage').*/
282 typedef struct vmPointer
{
284 unsigned storage
:2; /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
286 unsigned int vtype
; /* type of the object stored in the swap file */
287 off_t page
; /* the page at witch the object is stored on disk */
288 off_t usedpages
; /* number of pages used on disk */
291 /* Macro used to initalize a Redis object allocated on the stack.
292 * Note that this macro is taken near the structure definition to make sure
293 * we'll update it when the structure is changed, to avoid bugs like
294 * bug #85 introduced exactly in this way. */
295 #define initStaticStringObject(_var,_ptr) do { \
297 _var.type = REDIS_STRING; \
298 _var.encoding = REDIS_ENCODING_RAW; \
300 _var.storage = REDIS_VM_MEMORY; \
303 typedef struct redisDb
{
304 dict
*dict
; /* The keyspace for this DB */
305 dict
*expires
; /* Timeout of keys with a timeout set */
306 dict
*blocking_keys
; /* Keys with clients waiting for data (BLPOP) */
307 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
308 dict
*watched_keys
; /* WATCHED keys for MULTI/EXEC CAS */
312 /* Client MULTI/EXEC state */
313 typedef struct multiCmd
{
316 struct redisCommand
*cmd
;
319 typedef struct multiState
{
320 multiCmd
*commands
; /* Array of MULTI commands */
321 int count
; /* Total number of MULTI commands */
324 /* With multiplexing we need to take per-clinet state.
325 * Clients are taken in a liked list. */
326 typedef struct redisClient
{
331 robj
**argv
, **mbargv
;
333 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
334 int multibulk
; /* multi bulk command format active */
337 time_t lastinteraction
; /* time of the last interaction, used for timeout */
338 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
339 int slaveseldb
; /* slave selected db, if this client is a slave */
340 int authenticated
; /* when requirepass is non-NULL */
341 int replstate
; /* replication state if this is a slave */
342 int repldbfd
; /* replication DB file descriptor */
343 long repldboff
; /* replication DB file offset */
344 off_t repldbsize
; /* replication DB file size */
345 multiState mstate
; /* MULTI/EXEC state */
346 robj
**blocking_keys
; /* The key we are waiting to terminate a blocking
347 * operation such as BLPOP. Otherwise NULL. */
348 int blocking_keys_num
; /* Number of blocking keys */
349 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
350 * is >= blockingto then the operation timed out. */
351 list
*io_keys
; /* Keys this client is waiting to be loaded from the
352 * swap file in order to continue. */
353 list
*watched_keys
; /* Keys WATCHED for MULTI/EXEC CAS */
354 dict
*pubsub_channels
; /* channels a client is interested in (SUBSCRIBE) */
355 list
*pubsub_patterns
; /* patterns a client is interested in (SUBSCRIBE) */
363 /* Global server state structure */
368 long long dirty
; /* changes to DB from the last save */
370 list
*slaves
, *monitors
;
371 char neterr
[ANET_ERR_LEN
];
373 int cronloops
; /* number of times the cron function run */
374 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
375 time_t lastsave
; /* Unix time of last save succeeede */
376 /* Fields used only for stats */
377 time_t stat_starttime
; /* server start time */
378 long long stat_numcommands
; /* number of processed commands */
379 long long stat_numconnections
; /* number of connections received */
380 long long stat_expiredkeys
; /* number of expired keys */
389 int no_appendfsync_on_rewrite
;
395 pid_t bgsavechildpid
;
396 pid_t bgrewritechildpid
;
397 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
398 sds aofbuf
; /* AOF buffer, written before entering the event loop */
399 struct saveparam
*saveparams
;
404 char *appendfilename
;
408 /* Replication related */
413 redisClient
*master
; /* client that is master for this slave */
415 unsigned int maxclients
;
416 unsigned long long maxmemory
;
417 unsigned int blpop_blocked_clients
;
418 unsigned int vm_blocked_clients
;
419 /* Sort parameters - qsort_r() is only available under BSD so we
420 * have to take this state global, in order to pass it to sortCompare() */
424 /* Virtual memory configuration */
429 unsigned long long vm_max_memory
;
430 /* Zip structure config */
431 size_t hash_max_zipmap_entries
;
432 size_t hash_max_zipmap_value
;
433 size_t list_max_ziplist_entries
;
434 size_t list_max_ziplist_value
;
435 /* Virtual memory state */
438 off_t vm_next_page
; /* Next probably empty page */
439 off_t vm_near_pages
; /* Number of pages allocated sequentially */
440 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
441 time_t unixtime
; /* Unix time sampled every second. */
442 /* Virtual memory I/O threads stuff */
443 /* An I/O thread process an element taken from the io_jobs queue and
444 * put the result of the operation in the io_done list. While the
445 * job is being processed, it's put on io_processing queue. */
446 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
447 list
*io_processing
; /* List of VM I/O jobs being processed */
448 list
*io_processed
; /* List of VM I/O jobs already processed */
449 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
450 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
451 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
452 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
453 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
454 int io_active_threads
; /* Number of running I/O threads */
455 int vm_max_threads
; /* Max number of I/O threads running at the same time */
456 /* Our main thread is blocked on the event loop, locking for sockets ready
457 * to be read or written, so when a threaded I/O operation is ready to be
458 * processed by the main thread, the I/O thread will use a unix pipe to
459 * awake the main thread. The followings are the two pipe FDs. */
460 int io_ready_pipe_read
;
461 int io_ready_pipe_write
;
462 /* Virtual memory stats */
463 unsigned long long vm_stats_used_pages
;
464 unsigned long long vm_stats_swapped_objects
;
465 unsigned long long vm_stats_swapouts
;
466 unsigned long long vm_stats_swapins
;
468 dict
*pubsub_channels
; /* Map channels to list of subscribed clients */
469 list
*pubsub_patterns
; /* A list of pubsub_patterns */
472 unsigned lruclock
:22; /* clock incrementing every minute, for LRU */
473 unsigned lruclock_padding
:10;
476 typedef struct pubsubPattern
{
481 typedef void redisCommandProc(redisClient
*c
);
482 typedef void redisVmPreloadProc(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
483 struct redisCommand
{
485 redisCommandProc
*proc
;
488 /* Use a function to determine which keys need to be loaded
489 * in the background prior to executing this command. Takes precedence
490 * over vm_firstkey and others, ignored when NULL */
491 redisVmPreloadProc
*vm_preload_proc
;
492 /* What keys should be loaded in background when calling this command? */
493 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
494 int vm_lastkey
; /* THe last argument that's a key */
495 int vm_keystep
; /* The step between first and last key */
498 struct redisFunctionSym
{
500 unsigned long pointer
;
503 typedef struct _redisSortObject
{
511 typedef struct _redisSortOperation
{
514 } redisSortOperation
;
516 /* ZSETs use a specialized version of Skiplists */
518 typedef struct zskiplistNode
{
519 struct zskiplistNode
**forward
;
520 struct zskiplistNode
*backward
;
526 typedef struct zskiplist
{
527 struct zskiplistNode
*header
, *tail
;
528 unsigned long length
;
532 typedef struct zset
{
537 /* Our shared "common" objects */
539 #define REDIS_SHARED_INTEGERS 10000
540 struct sharedObjectsStruct
{
541 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *cnegone
, *pong
, *space
,
542 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
543 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
544 *outofrangeerr
, *plus
,
545 *select0
, *select1
, *select2
, *select3
, *select4
,
546 *select5
, *select6
, *select7
, *select8
, *select9
,
547 *messagebulk
, *pmessagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
,
548 *mbulk4
, *psubscribebulk
, *punsubscribebulk
,
549 *integers
[REDIS_SHARED_INTEGERS
];
552 /* Global vars that are actally used as constants. The following double
553 * values are used for double on-disk serialization, and are initialized
554 * at runtime to avoid strange compiler optimizations. */
556 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
558 /* VM threaded I/O request message */
559 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
560 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
561 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
562 typedef struct iojob
{
563 int type
; /* Request type, REDIS_IOJOB_* */
564 redisDb
*db
;/* Redis database */
565 robj
*key
; /* This I/O request is about swapping this key */
566 robj
*id
; /* Unique identifier of this job:
567 this is the object to swap for REDIS_IOREQ_*_SWAP, or the
568 vmpointer objct for REDIS_IOREQ_LOAD. */
569 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
570 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
571 off_t page
; /* Swap page where to read/write the object */
572 off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */
573 int canceled
; /* True if this command was canceled by blocking side of VM */
574 pthread_t thread
; /* ID of the thread processing this entry */
577 /*================================ Prototypes =============================== */
578 char *redisGitSHA1(void);
579 char *redisGitDirty(void);
581 static void freeStringObject(robj
*o
);
582 static void freeListObject(robj
*o
);
583 static void freeSetObject(robj
*o
);
584 static void decrRefCount(void *o
);
585 static robj
*createObject(int type
, void *ptr
);
586 static void freeClient(redisClient
*c
);
587 static int rdbLoad(char *filename
);
588 static void addReply(redisClient
*c
, robj
*obj
);
589 static void addReplySds(redisClient
*c
, sds s
);
590 static void incrRefCount(robj
*o
);
591 static int rdbSaveBackground(char *filename
);
592 static robj
*createStringObject(char *ptr
, size_t len
);
593 static robj
*dupStringObject(robj
*o
);
594 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
);
595 static void replicationFeedMonitors(list
*monitors
, int dictid
, robj
**argv
, int argc
);
596 static void flushAppendOnlyFile(void);
597 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
598 static int syncWithMaster(void);
599 static robj
*tryObjectEncoding(robj
*o
);
600 static robj
*getDecodedObject(robj
*o
);
601 static int removeExpire(redisDb
*db
, robj
*key
);
602 static int expireIfNeeded(redisDb
*db
, robj
*key
);
603 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
604 static int dbDelete(redisDb
*db
, robj
*key
);
605 static time_t getExpire(redisDb
*db
, robj
*key
);
606 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
607 static void updateSlavesWaitingBgsave(int bgsaveerr
);
608 static void freeMemoryIfNeeded(void);
609 static int processCommand(redisClient
*c
);
610 static void setupSigSegvAction(void);
611 static void rdbRemoveTempFile(pid_t childpid
);
612 static void aofRemoveTempFile(pid_t childpid
);
613 static size_t stringObjectLen(robj
*o
);
614 static void processInputBuffer(redisClient
*c
);
615 static zskiplist
*zslCreate(void);
616 static void zslFree(zskiplist
*zsl
);
617 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
618 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
619 static void initClientMultiState(redisClient
*c
);
620 static void freeClientMultiState(redisClient
*c
);
621 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
622 static void unblockClientWaitingData(redisClient
*c
);
623 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
624 static void vmInit(void);
625 static void vmMarkPagesFree(off_t page
, off_t count
);
626 static robj
*vmLoadObject(robj
*o
);
627 static robj
*vmPreviewObject(robj
*o
);
628 static int vmSwapOneObjectBlocking(void);
629 static int vmSwapOneObjectThreaded(void);
630 static int vmCanSwapOut(void);
631 static int tryFreeOneObjectFromFreelist(void);
632 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
633 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
634 static void vmCancelThreadedIOJob(robj
*o
);
635 static void lockThreadedIO(void);
636 static void unlockThreadedIO(void);
637 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
638 static void freeIOJob(iojob
*j
);
639 static void queueIOJob(iojob
*j
);
640 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
641 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
642 static void waitEmptyIOJobsQueue(void);
643 static void vmReopenSwapFile(void);
644 static int vmFreePage(off_t page
);
645 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
646 static void execBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
647 static int blockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
);
648 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
649 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
650 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
651 static struct redisCommand
*lookupCommand(char *name
);
652 static void call(redisClient
*c
, struct redisCommand
*cmd
);
653 static void resetClient(redisClient
*c
);
654 static void convertToRealHash(robj
*o
);
655 static void listTypeConvert(robj
*o
, int enc
);
656 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
);
657 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
);
658 static void freePubsubPattern(void *p
);
659 static int listMatchPubsubPattern(void *a
, void *b
);
660 static int compareStringObjects(robj
*a
, robj
*b
);
661 static int equalStringObjects(robj
*a
, robj
*b
);
663 static int rewriteAppendOnlyFileBackground(void);
664 static vmpointer
*vmSwapObjectBlocking(robj
*val
);
665 static int prepareForShutdown();
666 static void touchWatchedKey(redisDb
*db
, robj
*key
);
667 static void touchWatchedKeysOnFlush(int dbid
);
668 static void unwatchAllKeys(redisClient
*c
);
670 static void authCommand(redisClient
*c
);
671 static void pingCommand(redisClient
*c
);
672 static void echoCommand(redisClient
*c
);
673 static void setCommand(redisClient
*c
);
674 static void setnxCommand(redisClient
*c
);
675 static void setexCommand(redisClient
*c
);
676 static void getCommand(redisClient
*c
);
677 static void delCommand(redisClient
*c
);
678 static void existsCommand(redisClient
*c
);
679 static void incrCommand(redisClient
*c
);
680 static void decrCommand(redisClient
*c
);
681 static void incrbyCommand(redisClient
*c
);
682 static void decrbyCommand(redisClient
*c
);
683 static void selectCommand(redisClient
*c
);
684 static void randomkeyCommand(redisClient
*c
);
685 static void keysCommand(redisClient
*c
);
686 static void dbsizeCommand(redisClient
*c
);
687 static void lastsaveCommand(redisClient
*c
);
688 static void saveCommand(redisClient
*c
);
689 static void bgsaveCommand(redisClient
*c
);
690 static void bgrewriteaofCommand(redisClient
*c
);
691 static void shutdownCommand(redisClient
*c
);
692 static void moveCommand(redisClient
*c
);
693 static void renameCommand(redisClient
*c
);
694 static void renamenxCommand(redisClient
*c
);
695 static void lpushCommand(redisClient
*c
);
696 static void rpushCommand(redisClient
*c
);
697 static void lpushxCommand(redisClient
*c
);
698 static void rpushxCommand(redisClient
*c
);
699 static void linsertCommand(redisClient
*c
);
700 static void lpopCommand(redisClient
*c
);
701 static void rpopCommand(redisClient
*c
);
702 static void llenCommand(redisClient
*c
);
703 static void lindexCommand(redisClient
*c
);
704 static void lrangeCommand(redisClient
*c
);
705 static void ltrimCommand(redisClient
*c
);
706 static void typeCommand(redisClient
*c
);
707 static void lsetCommand(redisClient
*c
);
708 static void saddCommand(redisClient
*c
);
709 static void sremCommand(redisClient
*c
);
710 static void smoveCommand(redisClient
*c
);
711 static void sismemberCommand(redisClient
*c
);
712 static void scardCommand(redisClient
*c
);
713 static void spopCommand(redisClient
*c
);
714 static void srandmemberCommand(redisClient
*c
);
715 static void sinterCommand(redisClient
*c
);
716 static void sinterstoreCommand(redisClient
*c
);
717 static void sunionCommand(redisClient
*c
);
718 static void sunionstoreCommand(redisClient
*c
);
719 static void sdiffCommand(redisClient
*c
);
720 static void sdiffstoreCommand(redisClient
*c
);
721 static void syncCommand(redisClient
*c
);
722 static void flushdbCommand(redisClient
*c
);
723 static void flushallCommand(redisClient
*c
);
724 static void sortCommand(redisClient
*c
);
725 static void lremCommand(redisClient
*c
);
726 static void rpoplpushcommand(redisClient
*c
);
727 static void infoCommand(redisClient
*c
);
728 static void mgetCommand(redisClient
*c
);
729 static void monitorCommand(redisClient
*c
);
730 static void expireCommand(redisClient
*c
);
731 static void expireatCommand(redisClient
*c
);
732 static void getsetCommand(redisClient
*c
);
733 static void ttlCommand(redisClient
*c
);
734 static void slaveofCommand(redisClient
*c
);
735 static void debugCommand(redisClient
*c
);
736 static void msetCommand(redisClient
*c
);
737 static void msetnxCommand(redisClient
*c
);
738 static void zaddCommand(redisClient
*c
);
739 static void zincrbyCommand(redisClient
*c
);
740 static void zrangeCommand(redisClient
*c
);
741 static void zrangebyscoreCommand(redisClient
*c
);
742 static void zcountCommand(redisClient
*c
);
743 static void zrevrangeCommand(redisClient
*c
);
744 static void zcardCommand(redisClient
*c
);
745 static void zremCommand(redisClient
*c
);
746 static void zscoreCommand(redisClient
*c
);
747 static void zremrangebyscoreCommand(redisClient
*c
);
748 static void multiCommand(redisClient
*c
);
749 static void execCommand(redisClient
*c
);
750 static void discardCommand(redisClient
*c
);
751 static void blpopCommand(redisClient
*c
);
752 static void brpopCommand(redisClient
*c
);
753 static void appendCommand(redisClient
*c
);
754 static void substrCommand(redisClient
*c
);
755 static void zrankCommand(redisClient
*c
);
756 static void zrevrankCommand(redisClient
*c
);
757 static void hsetCommand(redisClient
*c
);
758 static void hsetnxCommand(redisClient
*c
);
759 static void hgetCommand(redisClient
*c
);
760 static void hmsetCommand(redisClient
*c
);
761 static void hmgetCommand(redisClient
*c
);
762 static void hdelCommand(redisClient
*c
);
763 static void hlenCommand(redisClient
*c
);
764 static void zremrangebyrankCommand(redisClient
*c
);
765 static void zunionstoreCommand(redisClient
*c
);
766 static void zinterstoreCommand(redisClient
*c
);
767 static void hkeysCommand(redisClient
*c
);
768 static void hvalsCommand(redisClient
*c
);
769 static void hgetallCommand(redisClient
*c
);
770 static void hexistsCommand(redisClient
*c
);
771 static void configCommand(redisClient
*c
);
772 static void hincrbyCommand(redisClient
*c
);
773 static void subscribeCommand(redisClient
*c
);
774 static void unsubscribeCommand(redisClient
*c
);
775 static void psubscribeCommand(redisClient
*c
);
776 static void punsubscribeCommand(redisClient
*c
);
777 static void publishCommand(redisClient
*c
);
778 static void watchCommand(redisClient
*c
);
779 static void unwatchCommand(redisClient
*c
);
781 /*================================= Globals ================================= */
784 static struct redisServer server
; /* server global state */
785 static struct redisCommand
*commandTable
;
786 static struct redisCommand readonlyCommandTable
[] = {
787 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
788 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
789 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
790 {"setex",setexCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
791 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
792 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
793 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
794 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
795 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
796 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
797 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
798 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
799 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
800 {"rpushx",rpushxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
801 {"lpushx",lpushxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
802 {"linsert",linsertCommand
,5,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
803 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
804 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
805 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
806 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
807 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
808 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
809 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
810 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
811 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
812 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
813 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
814 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
815 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
816 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
817 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
818 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
819 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
820 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
821 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
822 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
823 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
824 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
825 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
826 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
827 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
828 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
829 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
830 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
831 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
832 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
833 {"zunionstore",zunionstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
834 {"zinterstore",zinterstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
835 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
836 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
837 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
838 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
839 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
840 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
841 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
842 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
843 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
844 {"hsetnx",hsetnxCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
845 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
846 {"hmset",hmsetCommand
,-4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
847 {"hmget",hmgetCommand
,-3,REDIS_CMD_BULK
,NULL
,1,1,1},
848 {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
849 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
850 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
851 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
852 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
853 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
854 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
855 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
856 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
857 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
858 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
859 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
860 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
861 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
862 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
863 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
864 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
865 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
866 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
867 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
868 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
869 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
870 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
871 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
872 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
873 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
874 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
875 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
876 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
877 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
878 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
879 {"exec",execCommand
,1,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,execBlockClientOnSwappedKeys
,0,0,0},
880 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
881 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
882 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
883 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
884 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
885 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
886 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
887 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
888 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
889 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
890 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
891 {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
892 {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
893 {"psubscribe",psubscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
894 {"punsubscribe",punsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
895 {"publish",publishCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_FORCE_REPLICATION
,NULL
,0,0,0},
896 {"watch",watchCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
897 {"unwatch",unwatchCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}
900 /*============================ Utility functions ============================ */
902 /* Glob-style pattern matching. */
903 static int stringmatchlen(const char *pattern
, int patternLen
,
904 const char *string
, int stringLen
, int nocase
)
909 while (pattern
[1] == '*') {
914 return 1; /* match */
916 if (stringmatchlen(pattern
+1, patternLen
-1,
917 string
, stringLen
, nocase
))
918 return 1; /* match */
922 return 0; /* no match */
926 return 0; /* no match */
936 not = pattern
[0] == '^';
943 if (pattern
[0] == '\\') {
946 if (pattern
[0] == string
[0])
948 } else if (pattern
[0] == ']') {
950 } else if (patternLen
== 0) {
954 } else if (pattern
[1] == '-' && patternLen
>= 3) {
955 int start
= pattern
[0];
956 int end
= pattern
[2];
964 start
= tolower(start
);
970 if (c
>= start
&& c
<= end
)
974 if (pattern
[0] == string
[0])
977 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
987 return 0; /* no match */
993 if (patternLen
>= 2) {
1000 if (pattern
[0] != string
[0])
1001 return 0; /* no match */
1003 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
1004 return 0; /* no match */
1012 if (stringLen
== 0) {
1013 while(*pattern
== '*') {
1020 if (patternLen
== 0 && stringLen
== 0)
1025 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
1026 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
1029 /* Convert a string representing an amount of memory into the number of
1030 * bytes, so for instance memtoll("1Gi") will return 1073741824 that is
1033 * On parsing error, if *err is not NULL, it's set to 1, otherwise it's
1035 static long long memtoll(const char *p
, int *err
) {
1038 long mul
; /* unit multiplier */
1040 unsigned int digits
;
1043 /* Search the first non digit character. */
1046 while(*u
&& isdigit(*u
)) u
++;
1047 if (*u
== '\0' || !strcasecmp(u
,"b")) {
1049 } else if (!strcasecmp(u
,"k")) {
1051 } else if (!strcasecmp(u
,"kb")) {
1053 } else if (!strcasecmp(u
,"m")) {
1055 } else if (!strcasecmp(u
,"mb")) {
1057 } else if (!strcasecmp(u
,"g")) {
1058 mul
= 1000L*1000*1000;
1059 } else if (!strcasecmp(u
,"gb")) {
1060 mul
= 1024L*1024*1024;
1066 if (digits
>= sizeof(buf
)) {
1070 memcpy(buf
,p
,digits
);
1072 val
= strtoll(buf
,NULL
,10);
1076 /* Convert a long long into a string. Returns the number of
1077 * characters needed to represent the number, that can be shorter if passed
1078 * buffer length is not enough to store the whole number. */
1079 static int ll2string(char *s
, size_t len
, long long value
) {
1081 unsigned long long v
;
1084 if (len
== 0) return 0;
1085 v
= (value
< 0) ? -value
: value
;
1086 p
= buf
+31; /* point to the last character */
1091 if (value
< 0) *p
-- = '-';
1094 if (l
+1 > len
) l
= len
-1; /* Make sure it fits, including the nul term */
1100 static void redisLog(int level
, const char *fmt
, ...) {
1104 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
1108 if (level
>= server
.verbosity
) {
1114 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
1115 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
1116 vfprintf(fp
, fmt
, ap
);
1122 if (server
.logfile
) fclose(fp
);
1125 /*====================== Hash table type implementation ==================== */
1127 /* This is an hash table type that uses the SDS dynamic strings libary as
1128 * keys and radis objects as values (objects can hold SDS strings,
1131 static void dictVanillaFree(void *privdata
, void *val
)
1133 DICT_NOTUSED(privdata
);
1137 static void dictListDestructor(void *privdata
, void *val
)
1139 DICT_NOTUSED(privdata
);
1140 listRelease((list
*)val
);
1143 static int dictSdsKeyCompare(void *privdata
, const void *key1
,
1147 DICT_NOTUSED(privdata
);
1149 l1
= sdslen((sds
)key1
);
1150 l2
= sdslen((sds
)key2
);
1151 if (l1
!= l2
) return 0;
1152 return memcmp(key1
, key2
, l1
) == 0;
1155 static void dictRedisObjectDestructor(void *privdata
, void *val
)
1157 DICT_NOTUSED(privdata
);
1159 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
1163 static void dictSdsDestructor(void *privdata
, void *val
)
1165 DICT_NOTUSED(privdata
);
1170 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1173 const robj
*o1
= key1
, *o2
= key2
;
1174 return dictSdsKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1177 static unsigned int dictObjHash(const void *key
) {
1178 const robj
*o
= key
;
1179 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1182 static unsigned int dictSdsHash(const void *key
) {
1183 return dictGenHashFunction((unsigned char*)key
, sdslen((char*)key
));
1186 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1189 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1192 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1193 o2
->encoding
== REDIS_ENCODING_INT
)
1194 return o1
->ptr
== o2
->ptr
;
1196 o1
= getDecodedObject(o1
);
1197 o2
= getDecodedObject(o2
);
1198 cmp
= dictSdsKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1204 static unsigned int dictEncObjHash(const void *key
) {
1205 robj
*o
= (robj
*) key
;
1207 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1208 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1210 if (o
->encoding
== REDIS_ENCODING_INT
) {
1214 len
= ll2string(buf
,32,(long)o
->ptr
);
1215 return dictGenHashFunction((unsigned char*)buf
, len
);
1219 o
= getDecodedObject(o
);
1220 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1228 static dictType setDictType
= {
1229 dictEncObjHash
, /* hash function */
1232 dictEncObjKeyCompare
, /* key compare */
1233 dictRedisObjectDestructor
, /* key destructor */
1234 NULL
/* val destructor */
1237 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1238 static dictType zsetDictType
= {
1239 dictEncObjHash
, /* hash function */
1242 dictEncObjKeyCompare
, /* key compare */
1243 dictRedisObjectDestructor
, /* key destructor */
1244 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1247 /* Db->dict, keys are sds strings, vals are Redis objects. */
1248 static dictType dbDictType
= {
1249 dictSdsHash
, /* hash function */
1252 dictSdsKeyCompare
, /* key compare */
1253 dictSdsDestructor
, /* key destructor */
1254 dictRedisObjectDestructor
/* val destructor */
1258 static dictType keyptrDictType
= {
1259 dictSdsHash
, /* hash function */
1262 dictSdsKeyCompare
, /* key compare */
1263 NULL
, /* key destructor */
1264 NULL
/* val destructor */
1267 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1268 static dictType hashDictType
= {
1269 dictEncObjHash
, /* hash function */
1272 dictEncObjKeyCompare
, /* key compare */
1273 dictRedisObjectDestructor
, /* key destructor */
1274 dictRedisObjectDestructor
/* val destructor */
1277 /* Keylist hash table type has unencoded redis objects as keys and
1278 * lists as values. It's used for blocking operations (BLPOP) and to
1279 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1280 static dictType keylistDictType
= {
1281 dictObjHash
, /* hash function */
1284 dictObjKeyCompare
, /* key compare */
1285 dictRedisObjectDestructor
, /* key destructor */
1286 dictListDestructor
/* val destructor */
1289 static void version();
1291 /* ========================= Random utility functions ======================= */
1293 /* Redis generally does not try to recover from out of memory conditions
1294 * when allocating objects or strings, it is not clear if it will be possible
1295 * to report this condition to the client since the networking layer itself
1296 * is based on heap allocation for send buffers, so we simply abort.
1297 * At least the code will be simpler to read... */
1298 static void oom(const char *msg
) {
1299 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1304 /* ====================== Redis server networking stuff ===================== */
1305 static void closeTimedoutClients(void) {
1308 time_t now
= time(NULL
);
1311 listRewind(server
.clients
,&li
);
1312 while ((ln
= listNext(&li
)) != NULL
) {
1313 c
= listNodeValue(ln
);
1314 if (server
.maxidletime
&&
1315 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1316 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1317 dictSize(c
->pubsub_channels
) == 0 && /* no timeout for pubsub */
1318 listLength(c
->pubsub_patterns
) == 0 &&
1319 (now
- c
->lastinteraction
> server
.maxidletime
))
1321 redisLog(REDIS_VERBOSE
,"Closing idle client");
1323 } else if (c
->flags
& REDIS_BLOCKED
) {
1324 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1325 addReply(c
,shared
.nullmultibulk
);
1326 unblockClientWaitingData(c
);
1332 static int htNeedsResize(dict
*dict
) {
1333 long long size
, used
;
1335 size
= dictSlots(dict
);
1336 used
= dictSize(dict
);
1337 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1338 (used
*100/size
< REDIS_HT_MINFILL
));
1341 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1342 * we resize the hash table to save memory */
1343 static void tryResizeHashTables(void) {
1346 for (j
= 0; j
< server
.dbnum
; j
++) {
1347 if (htNeedsResize(server
.db
[j
].dict
))
1348 dictResize(server
.db
[j
].dict
);
1349 if (htNeedsResize(server
.db
[j
].expires
))
1350 dictResize(server
.db
[j
].expires
);
1354 /* Our hash table implementation performs rehashing incrementally while
1355 * we write/read from the hash table. Still if the server is idle, the hash
1356 * table will use two tables for a long time. So we try to use 1 millisecond
1357 * of CPU time at every serverCron() loop in order to rehash some key. */
1358 static void incrementallyRehash(void) {
1361 for (j
= 0; j
< server
.dbnum
; j
++) {
1362 if (dictIsRehashing(server
.db
[j
].dict
)) {
1363 dictRehashMilliseconds(server
.db
[j
].dict
,1);
1364 break; /* already used our millisecond for this loop... */
1369 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1370 void backgroundSaveDoneHandler(int statloc
) {
1371 int exitcode
= WEXITSTATUS(statloc
);
1372 int bysignal
= WIFSIGNALED(statloc
);
1374 if (!bysignal
&& exitcode
== 0) {
1375 redisLog(REDIS_NOTICE
,
1376 "Background saving terminated with success");
1378 server
.lastsave
= time(NULL
);
1379 } else if (!bysignal
&& exitcode
!= 0) {
1380 redisLog(REDIS_WARNING
, "Background saving error");
1382 redisLog(REDIS_WARNING
,
1383 "Background saving terminated by signal %d", WTERMSIG(statloc
));
1384 rdbRemoveTempFile(server
.bgsavechildpid
);
1386 server
.bgsavechildpid
= -1;
1387 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1388 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1389 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1392 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1394 void backgroundRewriteDoneHandler(int statloc
) {
1395 int exitcode
= WEXITSTATUS(statloc
);
1396 int bysignal
= WIFSIGNALED(statloc
);
1398 if (!bysignal
&& exitcode
== 0) {
1402 redisLog(REDIS_NOTICE
,
1403 "Background append only file rewriting terminated with success");
1404 /* Now it's time to flush the differences accumulated by the parent */
1405 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1406 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1408 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1411 /* Flush our data... */
1412 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1413 (signed) sdslen(server
.bgrewritebuf
)) {
1414 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1418 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1419 /* Now our work is to rename the temp file into the stable file. And
1420 * switch the file descriptor used by the server for append only. */
1421 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1422 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1426 /* Mission completed... almost */
1427 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1428 if (server
.appendfd
!= -1) {
1429 /* If append only is actually enabled... */
1430 close(server
.appendfd
);
1431 server
.appendfd
= fd
;
1432 if (server
.appendfsync
!= APPENDFSYNC_NO
) aof_fsync(fd
);
1433 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1434 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1436 /* If append only is disabled we just generate a dump in this
1437 * format. Why not? */
1440 } else if (!bysignal
&& exitcode
!= 0) {
1441 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1443 redisLog(REDIS_WARNING
,
1444 "Background append only file rewriting terminated by signal %d",
1448 sdsfree(server
.bgrewritebuf
);
1449 server
.bgrewritebuf
= sdsempty();
1450 aofRemoveTempFile(server
.bgrewritechildpid
);
1451 server
.bgrewritechildpid
= -1;
1454 /* This function is called once a background process of some kind terminates,
1455 * as we want to avoid resizing the hash tables when there is a child in order
1456 * to play well with copy-on-write (otherwise when a resize happens lots of
1457 * memory pages are copied). The goal of this function is to update the ability
1458 * for dict.c to resize the hash tables accordingly to the fact we have o not
1459 * running childs. */
1460 static void updateDictResizePolicy(void) {
1461 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1)
1464 dictDisableResize();
1467 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1468 int j
, loops
= server
.cronloops
++;
1469 REDIS_NOTUSED(eventLoop
);
1471 REDIS_NOTUSED(clientData
);
1473 /* We take a cached value of the unix time in the global state because
1474 * with virtual memory and aging there is to store the current time
1475 * in objects at every object access, and accuracy is not needed.
1476 * To access a global var is faster than calling time(NULL) */
1477 server
.unixtime
= time(NULL
);
1478 /* We have just 21 bits per object for LRU information.
1479 * So we use an (eventually wrapping) LRU clock with minutes resolution.
1481 * When we need to select what object to swap, we compute the minimum
1482 * time distance between the current lruclock and the object last access
1483 * lruclock info. Even if clocks will wrap on overflow, there is
1484 * the interesting property that we are sure that at least
1485 * ABS(A-B) minutes passed between current time and timestamp B.
1487 * This is not precise but we don't need at all precision, but just
1488 * something statistically reasonable.
1490 server
.lruclock
= (time(NULL
)/60)&((1<<21)-1);
1492 /* We received a SIGTERM, shutting down here in a safe way, as it is
1493 * not ok doing so inside the signal handler. */
1494 if (server
.shutdown_asap
) {
1495 if (prepareForShutdown() == REDIS_OK
) exit(0);
1496 redisLog(REDIS_WARNING
,"SIGTERM received but errors trying to shut down the server, check the logs for more information");
1499 /* Show some info about non-empty databases */
1500 for (j
= 0; j
< server
.dbnum
; j
++) {
1501 long long size
, used
, vkeys
;
1503 size
= dictSlots(server
.db
[j
].dict
);
1504 used
= dictSize(server
.db
[j
].dict
);
1505 vkeys
= dictSize(server
.db
[j
].expires
);
1506 if (!(loops
% 50) && (used
|| vkeys
)) {
1507 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1508 /* dictPrintStats(server.dict); */
1512 /* We don't want to resize the hash tables while a bacground saving
1513 * is in progress: the saving child is created using fork() that is
1514 * implemented with a copy-on-write semantic in most modern systems, so
1515 * if we resize the HT while there is the saving child at work actually
1516 * a lot of memory movements in the parent will cause a lot of pages
1518 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1) {
1519 if (!(loops
% 10)) tryResizeHashTables();
1520 if (server
.activerehashing
) incrementallyRehash();
1523 /* Show information about connected clients */
1524 if (!(loops
% 50)) {
1525 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use",
1526 listLength(server
.clients
)-listLength(server
.slaves
),
1527 listLength(server
.slaves
),
1528 zmalloc_used_memory());
1531 /* Close connections of timedout clients */
1532 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1533 closeTimedoutClients();
1535 /* Check if a background saving or AOF rewrite in progress terminated */
1536 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1540 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1541 if (pid
== server
.bgsavechildpid
) {
1542 backgroundSaveDoneHandler(statloc
);
1544 backgroundRewriteDoneHandler(statloc
);
1546 updateDictResizePolicy();
1549 /* If there is not a background saving in progress check if
1550 * we have to save now */
1551 time_t now
= time(NULL
);
1552 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1553 struct saveparam
*sp
= server
.saveparams
+j
;
1555 if (server
.dirty
>= sp
->changes
&&
1556 now
-server
.lastsave
> sp
->seconds
) {
1557 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1558 sp
->changes
, sp
->seconds
);
1559 rdbSaveBackground(server
.dbfilename
);
1565 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1566 * will use few CPU cycles if there are few expiring keys, otherwise
1567 * it will get more aggressive to avoid that too much memory is used by
1568 * keys that can be removed from the keyspace. */
1569 for (j
= 0; j
< server
.dbnum
; j
++) {
1571 redisDb
*db
= server
.db
+j
;
1573 /* Continue to expire if at the end of the cycle more than 25%
1574 * of the keys were expired. */
1576 long num
= dictSize(db
->expires
);
1577 time_t now
= time(NULL
);
1580 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1581 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1586 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1587 t
= (time_t) dictGetEntryVal(de
);
1589 sds key
= dictGetEntryKey(de
);
1590 robj
*keyobj
= createStringObject(key
,sdslen(key
));
1592 dbDelete(db
,keyobj
);
1593 decrRefCount(keyobj
);
1595 server
.stat_expiredkeys
++;
1598 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1601 /* Swap a few keys on disk if we are over the memory limit and VM
1602 * is enbled. Try to free objects from the free list first. */
1603 if (vmCanSwapOut()) {
1604 while (server
.vm_enabled
&& zmalloc_used_memory() >
1605 server
.vm_max_memory
)
1609 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1610 retval
= (server
.vm_max_threads
== 0) ?
1611 vmSwapOneObjectBlocking() :
1612 vmSwapOneObjectThreaded();
1613 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1614 zmalloc_used_memory() >
1615 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1617 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1619 /* Note that when using threade I/O we free just one object,
1620 * because anyway when the I/O thread in charge to swap this
1621 * object out will finish, the handler of completed jobs
1622 * will try to swap more objects if we are still out of memory. */
1623 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1627 /* Check if we should connect to a MASTER */
1628 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1629 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1630 if (syncWithMaster() == REDIS_OK
) {
1631 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1632 if (server
.appendonly
) rewriteAppendOnlyFileBackground();
1638 /* This function gets called every time Redis is entering the
1639 * main loop of the event driven library, that is, before to sleep
1640 * for ready file descriptors. */
1641 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1642 REDIS_NOTUSED(eventLoop
);
1644 /* Awake clients that got all the swapped keys they requested */
1645 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1649 listRewind(server
.io_ready_clients
,&li
);
1650 while((ln
= listNext(&li
))) {
1651 redisClient
*c
= ln
->value
;
1652 struct redisCommand
*cmd
;
1654 /* Resume the client. */
1655 listDelNode(server
.io_ready_clients
,ln
);
1656 c
->flags
&= (~REDIS_IO_WAIT
);
1657 server
.vm_blocked_clients
--;
1658 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1659 readQueryFromClient
, c
);
1660 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1661 assert(cmd
!= NULL
);
1664 /* There may be more data to process in the input buffer. */
1665 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1666 processInputBuffer(c
);
1669 /* Write the AOF buffer on disk */
1670 flushAppendOnlyFile();
1673 static void createSharedObjects(void) {
1676 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1677 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1678 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1679 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1680 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1681 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1682 shared
.cnegone
= createObject(REDIS_STRING
,sdsnew(":-1\r\n"));
1683 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1684 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1685 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1686 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1687 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1688 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1689 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1690 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1691 "-ERR no such key\r\n"));
1692 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1693 "-ERR syntax error\r\n"));
1694 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1695 "-ERR source and destination objects are the same\r\n"));
1696 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1697 "-ERR index out of range\r\n"));
1698 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1699 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1700 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1701 shared
.select0
= createStringObject("select 0\r\n",10);
1702 shared
.select1
= createStringObject("select 1\r\n",10);
1703 shared
.select2
= createStringObject("select 2\r\n",10);
1704 shared
.select3
= createStringObject("select 3\r\n",10);
1705 shared
.select4
= createStringObject("select 4\r\n",10);
1706 shared
.select5
= createStringObject("select 5\r\n",10);
1707 shared
.select6
= createStringObject("select 6\r\n",10);
1708 shared
.select7
= createStringObject("select 7\r\n",10);
1709 shared
.select8
= createStringObject("select 8\r\n",10);
1710 shared
.select9
= createStringObject("select 9\r\n",10);
1711 shared
.messagebulk
= createStringObject("$7\r\nmessage\r\n",13);
1712 shared
.pmessagebulk
= createStringObject("$8\r\npmessage\r\n",14);
1713 shared
.subscribebulk
= createStringObject("$9\r\nsubscribe\r\n",15);
1714 shared
.unsubscribebulk
= createStringObject("$11\r\nunsubscribe\r\n",18);
1715 shared
.psubscribebulk
= createStringObject("$10\r\npsubscribe\r\n",17);
1716 shared
.punsubscribebulk
= createStringObject("$12\r\npunsubscribe\r\n",19);
1717 shared
.mbulk3
= createStringObject("*3\r\n",4);
1718 shared
.mbulk4
= createStringObject("*4\r\n",4);
1719 for (j
= 0; j
< REDIS_SHARED_INTEGERS
; j
++) {
1720 shared
.integers
[j
] = createObject(REDIS_STRING
,(void*)(long)j
);
1721 shared
.integers
[j
]->encoding
= REDIS_ENCODING_INT
;
1725 static void appendServerSaveParams(time_t seconds
, int changes
) {
1726 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1727 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1728 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1729 server
.saveparamslen
++;
1732 static void resetServerSaveParams() {
1733 zfree(server
.saveparams
);
1734 server
.saveparams
= NULL
;
1735 server
.saveparamslen
= 0;
1738 static void initServerConfig() {
1739 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1740 server
.port
= REDIS_SERVERPORT
;
1741 server
.verbosity
= REDIS_VERBOSE
;
1742 server
.maxidletime
= REDIS_MAXIDLETIME
;
1743 server
.saveparams
= NULL
;
1744 server
.logfile
= NULL
; /* NULL = log on standard output */
1745 server
.bindaddr
= NULL
;
1746 server
.glueoutputbuf
= 1;
1747 server
.daemonize
= 0;
1748 server
.appendonly
= 0;
1749 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1750 server
.no_appendfsync_on_rewrite
= 0;
1751 server
.lastfsync
= time(NULL
);
1752 server
.appendfd
= -1;
1753 server
.appendseldb
= -1; /* Make sure the first time will not match */
1754 server
.pidfile
= zstrdup("/var/run/redis.pid");
1755 server
.dbfilename
= zstrdup("dump.rdb");
1756 server
.appendfilename
= zstrdup("appendonly.aof");
1757 server
.requirepass
= NULL
;
1758 server
.rdbcompression
= 1;
1759 server
.activerehashing
= 1;
1760 server
.maxclients
= 0;
1761 server
.blpop_blocked_clients
= 0;
1762 server
.maxmemory
= 0;
1763 server
.vm_enabled
= 0;
1764 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1765 server
.vm_page_size
= 256; /* 256 bytes per page */
1766 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1767 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1768 server
.vm_max_threads
= 4;
1769 server
.vm_blocked_clients
= 0;
1770 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1771 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1772 server
.list_max_ziplist_entries
= REDIS_LIST_MAX_ZIPLIST_ENTRIES
;
1773 server
.list_max_ziplist_value
= REDIS_LIST_MAX_ZIPLIST_VALUE
;
1774 server
.shutdown_asap
= 0;
1776 resetServerSaveParams();
1778 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1779 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1780 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1781 /* Replication related */
1783 server
.masterauth
= NULL
;
1784 server
.masterhost
= NULL
;
1785 server
.masterport
= 6379;
1786 server
.master
= NULL
;
1787 server
.replstate
= REDIS_REPL_NONE
;
1789 /* Double constants initialization */
1791 R_PosInf
= 1.0/R_Zero
;
1792 R_NegInf
= -1.0/R_Zero
;
1793 R_Nan
= R_Zero
/R_Zero
;
1796 static void initServer() {
1799 signal(SIGHUP
, SIG_IGN
);
1800 signal(SIGPIPE
, SIG_IGN
);
1801 setupSigSegvAction();
1803 server
.devnull
= fopen("/dev/null","w");
1804 if (server
.devnull
== NULL
) {
1805 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1808 server
.clients
= listCreate();
1809 server
.slaves
= listCreate();
1810 server
.monitors
= listCreate();
1811 server
.objfreelist
= listCreate();
1812 createSharedObjects();
1813 server
.el
= aeCreateEventLoop();
1814 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1815 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1816 if (server
.fd
== -1) {
1817 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1820 for (j
= 0; j
< server
.dbnum
; j
++) {
1821 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1822 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1823 server
.db
[j
].blocking_keys
= dictCreate(&keylistDictType
,NULL
);
1824 server
.db
[j
].watched_keys
= dictCreate(&keylistDictType
,NULL
);
1825 if (server
.vm_enabled
)
1826 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1827 server
.db
[j
].id
= j
;
1829 server
.pubsub_channels
= dictCreate(&keylistDictType
,NULL
);
1830 server
.pubsub_patterns
= listCreate();
1831 listSetFreeMethod(server
.pubsub_patterns
,freePubsubPattern
);
1832 listSetMatchMethod(server
.pubsub_patterns
,listMatchPubsubPattern
);
1833 server
.cronloops
= 0;
1834 server
.bgsavechildpid
= -1;
1835 server
.bgrewritechildpid
= -1;
1836 server
.bgrewritebuf
= sdsempty();
1837 server
.aofbuf
= sdsempty();
1838 server
.lastsave
= time(NULL
);
1840 server
.stat_numcommands
= 0;
1841 server
.stat_numconnections
= 0;
1842 server
.stat_expiredkeys
= 0;
1843 server
.stat_starttime
= time(NULL
);
1844 server
.unixtime
= time(NULL
);
1845 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1846 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1847 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1849 if (server
.appendonly
) {
1850 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1851 if (server
.appendfd
== -1) {
1852 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1858 if (server
.vm_enabled
) vmInit();
1861 /* Empty the whole database */
1862 static long long emptyDb() {
1864 long long removed
= 0;
1866 for (j
= 0; j
< server
.dbnum
; j
++) {
1867 removed
+= dictSize(server
.db
[j
].dict
);
1868 dictEmpty(server
.db
[j
].dict
);
1869 dictEmpty(server
.db
[j
].expires
);
1874 static int yesnotoi(char *s
) {
1875 if (!strcasecmp(s
,"yes")) return 1;
1876 else if (!strcasecmp(s
,"no")) return 0;
1880 /* I agree, this is a very rudimental way to load a configuration...
1881 will improve later if the config gets more complex */
1882 static void loadServerConfig(char *filename
) {
1884 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1888 if (filename
[0] == '-' && filename
[1] == '\0')
1891 if ((fp
= fopen(filename
,"r")) == NULL
) {
1892 redisLog(REDIS_WARNING
, "Fatal error, can't open config file '%s'", filename
);
1897 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1903 line
= sdstrim(line
," \t\r\n");
1905 /* Skip comments and blank lines*/
1906 if (line
[0] == '#' || line
[0] == '\0') {
1911 /* Split into arguments */
1912 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1913 sdstolower(argv
[0]);
1915 /* Execute config directives */
1916 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1917 server
.maxidletime
= atoi(argv
[1]);
1918 if (server
.maxidletime
< 0) {
1919 err
= "Invalid timeout value"; goto loaderr
;
1921 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1922 server
.port
= atoi(argv
[1]);
1923 if (server
.port
< 1 || server
.port
> 65535) {
1924 err
= "Invalid port"; goto loaderr
;
1926 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1927 server
.bindaddr
= zstrdup(argv
[1]);
1928 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1929 int seconds
= atoi(argv
[1]);
1930 int changes
= atoi(argv
[2]);
1931 if (seconds
< 1 || changes
< 0) {
1932 err
= "Invalid save parameters"; goto loaderr
;
1934 appendServerSaveParams(seconds
,changes
);
1935 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1936 if (chdir(argv
[1]) == -1) {
1937 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1938 argv
[1], strerror(errno
));
1941 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1942 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1943 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1944 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1945 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1947 err
= "Invalid log level. Must be one of debug, notice, warning";
1950 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1953 server
.logfile
= zstrdup(argv
[1]);
1954 if (!strcasecmp(server
.logfile
,"stdout")) {
1955 zfree(server
.logfile
);
1956 server
.logfile
= NULL
;
1958 if (server
.logfile
) {
1959 /* Test if we are able to open the file. The server will not
1960 * be able to abort just for this problem later... */
1961 logfp
= fopen(server
.logfile
,"a");
1962 if (logfp
== NULL
) {
1963 err
= sdscatprintf(sdsempty(),
1964 "Can't open the log file: %s", strerror(errno
));
1969 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1970 server
.dbnum
= atoi(argv
[1]);
1971 if (server
.dbnum
< 1) {
1972 err
= "Invalid number of databases"; goto loaderr
;
1974 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1975 loadServerConfig(argv
[1]);
1976 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1977 server
.maxclients
= atoi(argv
[1]);
1978 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1979 server
.maxmemory
= memtoll(argv
[1],NULL
);
1980 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1981 server
.masterhost
= sdsnew(argv
[1]);
1982 server
.masterport
= atoi(argv
[2]);
1983 server
.replstate
= REDIS_REPL_CONNECT
;
1984 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1985 server
.masterauth
= zstrdup(argv
[1]);
1986 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1987 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1988 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1990 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1991 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1992 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1994 } else if (!strcasecmp(argv
[0],"activerehashing") && argc
== 2) {
1995 if ((server
.activerehashing
= yesnotoi(argv
[1])) == -1) {
1996 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1998 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1999 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
2000 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
2002 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
2003 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
2004 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
2006 } else if (!strcasecmp(argv
[0],"appendfilename") && argc
== 2) {
2007 zfree(server
.appendfilename
);
2008 server
.appendfilename
= zstrdup(argv
[1]);
2009 } else if (!strcasecmp(argv
[0],"no-appendfsync-on-rewrite")
2011 if ((server
.no_appendfsync_on_rewrite
= yesnotoi(argv
[1])) == -1) {
2012 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
2014 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
2015 if (!strcasecmp(argv
[1],"no")) {
2016 server
.appendfsync
= APPENDFSYNC_NO
;
2017 } else if (!strcasecmp(argv
[1],"always")) {
2018 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
2019 } else if (!strcasecmp(argv
[1],"everysec")) {
2020 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
2022 err
= "argument must be 'no', 'always' or 'everysec'";
2025 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
2026 server
.requirepass
= zstrdup(argv
[1]);
2027 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
2028 zfree(server
.pidfile
);
2029 server
.pidfile
= zstrdup(argv
[1]);
2030 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
2031 zfree(server
.dbfilename
);
2032 server
.dbfilename
= zstrdup(argv
[1]);
2033 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
2034 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
2035 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
2037 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
2038 zfree(server
.vm_swap_file
);
2039 server
.vm_swap_file
= zstrdup(argv
[1]);
2040 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
2041 server
.vm_max_memory
= memtoll(argv
[1],NULL
);
2042 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
2043 server
.vm_page_size
= memtoll(argv
[1], NULL
);
2044 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
2045 server
.vm_pages
= memtoll(argv
[1], NULL
);
2046 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
2047 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
2048 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
2049 server
.hash_max_zipmap_entries
= memtoll(argv
[1], NULL
);
2050 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
2051 server
.hash_max_zipmap_value
= memtoll(argv
[1], NULL
);
2052 } else if (!strcasecmp(argv
[0],"list-max-ziplist-entries") && argc
== 2){
2053 server
.list_max_ziplist_entries
= memtoll(argv
[1], NULL
);
2054 } else if (!strcasecmp(argv
[0],"list-max-ziplist-value") && argc
== 2){
2055 server
.list_max_ziplist_value
= memtoll(argv
[1], NULL
);
2057 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
2059 for (j
= 0; j
< argc
; j
++)
2064 if (fp
!= stdin
) fclose(fp
);
2068 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
2069 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
2070 fprintf(stderr
, ">>> '%s'\n", line
);
2071 fprintf(stderr
, "%s\n", err
);
2075 static void freeClientArgv(redisClient
*c
) {
2078 for (j
= 0; j
< c
->argc
; j
++)
2079 decrRefCount(c
->argv
[j
]);
2080 for (j
= 0; j
< c
->mbargc
; j
++)
2081 decrRefCount(c
->mbargv
[j
]);
2086 static void freeClient(redisClient
*c
) {
2089 /* Note that if the client we are freeing is blocked into a blocking
2090 * call, we have to set querybuf to NULL *before* to call
2091 * unblockClientWaitingData() to avoid processInputBuffer() will get
2092 * called. Also it is important to remove the file events after
2093 * this, because this call adds the READABLE event. */
2094 sdsfree(c
->querybuf
);
2096 if (c
->flags
& REDIS_BLOCKED
)
2097 unblockClientWaitingData(c
);
2099 /* UNWATCH all the keys */
2101 listRelease(c
->watched_keys
);
2102 /* Unsubscribe from all the pubsub channels */
2103 pubsubUnsubscribeAllChannels(c
,0);
2104 pubsubUnsubscribeAllPatterns(c
,0);
2105 dictRelease(c
->pubsub_channels
);
2106 listRelease(c
->pubsub_patterns
);
2107 /* Obvious cleanup */
2108 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
2109 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2110 listRelease(c
->reply
);
2113 /* Remove from the list of clients */
2114 ln
= listSearchKey(server
.clients
,c
);
2115 redisAssert(ln
!= NULL
);
2116 listDelNode(server
.clients
,ln
);
2117 /* Remove from the list of clients that are now ready to be restarted
2118 * after waiting for swapped keys */
2119 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
2120 ln
= listSearchKey(server
.io_ready_clients
,c
);
2122 listDelNode(server
.io_ready_clients
,ln
);
2123 server
.vm_blocked_clients
--;
2126 /* Remove from the list of clients waiting for swapped keys */
2127 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
2128 ln
= listFirst(c
->io_keys
);
2129 dontWaitForSwappedKey(c
,ln
->value
);
2131 listRelease(c
->io_keys
);
2132 /* Master/slave cleanup */
2133 if (c
->flags
& REDIS_SLAVE
) {
2134 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
2136 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
2137 ln
= listSearchKey(l
,c
);
2138 redisAssert(ln
!= NULL
);
2141 if (c
->flags
& REDIS_MASTER
) {
2142 server
.master
= NULL
;
2143 server
.replstate
= REDIS_REPL_CONNECT
;
2145 /* Release memory */
2148 freeClientMultiState(c
);
2152 #define GLUEREPLY_UP_TO (1024)
2153 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
2155 char buf
[GLUEREPLY_UP_TO
];
2160 listRewind(c
->reply
,&li
);
2161 while((ln
= listNext(&li
))) {
2165 objlen
= sdslen(o
->ptr
);
2166 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
2167 memcpy(buf
+copylen
,o
->ptr
,objlen
);
2169 listDelNode(c
->reply
,ln
);
2171 if (copylen
== 0) return;
2175 /* Now the output buffer is empty, add the new single element */
2176 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
2177 listAddNodeHead(c
->reply
,o
);
2180 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2181 redisClient
*c
= privdata
;
2182 int nwritten
= 0, totwritten
= 0, objlen
;
2185 REDIS_NOTUSED(mask
);
2187 /* Use writev() if we have enough buffers to send */
2188 if (!server
.glueoutputbuf
&&
2189 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
2190 !(c
->flags
& REDIS_MASTER
))
2192 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
2196 while(listLength(c
->reply
)) {
2197 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
2198 glueReplyBuffersIfNeeded(c
);
2200 o
= listNodeValue(listFirst(c
->reply
));
2201 objlen
= sdslen(o
->ptr
);
2204 listDelNode(c
->reply
,listFirst(c
->reply
));
2208 if (c
->flags
& REDIS_MASTER
) {
2209 /* Don't reply to a master */
2210 nwritten
= objlen
- c
->sentlen
;
2212 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
2213 if (nwritten
<= 0) break;
2215 c
->sentlen
+= nwritten
;
2216 totwritten
+= nwritten
;
2217 /* If we fully sent the object on head go to the next one */
2218 if (c
->sentlen
== objlen
) {
2219 listDelNode(c
->reply
,listFirst(c
->reply
));
2222 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
2223 * bytes, in a single threaded server it's a good idea to serve
2224 * other clients as well, even if a very large request comes from
2225 * super fast link that is always able to accept data (in real world
2226 * scenario think about 'KEYS *' against the loopback interfae) */
2227 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
2229 if (nwritten
== -1) {
2230 if (errno
== EAGAIN
) {
2233 redisLog(REDIS_VERBOSE
,
2234 "Error writing to client: %s", strerror(errno
));
2239 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
2240 if (listLength(c
->reply
) == 0) {
2242 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2246 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
2248 redisClient
*c
= privdata
;
2249 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
2251 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
2252 int offset
, ion
= 0;
2254 REDIS_NOTUSED(mask
);
2257 while (listLength(c
->reply
)) {
2258 offset
= c
->sentlen
;
2262 /* fill-in the iov[] array */
2263 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
2264 o
= listNodeValue(node
);
2265 objlen
= sdslen(o
->ptr
);
2267 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
2270 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2271 break; /* no more iovecs */
2273 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2274 iov
[ion
].iov_len
= objlen
- offset
;
2275 willwrite
+= objlen
- offset
;
2276 offset
= 0; /* just for the first item */
2283 /* write all collected blocks at once */
2284 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2285 if (errno
!= EAGAIN
) {
2286 redisLog(REDIS_VERBOSE
,
2287 "Error writing to client: %s", strerror(errno
));
2294 totwritten
+= nwritten
;
2295 offset
= c
->sentlen
;
2297 /* remove written robjs from c->reply */
2298 while (nwritten
&& listLength(c
->reply
)) {
2299 o
= listNodeValue(listFirst(c
->reply
));
2300 objlen
= sdslen(o
->ptr
);
2302 if(nwritten
>= objlen
- offset
) {
2303 listDelNode(c
->reply
, listFirst(c
->reply
));
2304 nwritten
-= objlen
- offset
;
2308 c
->sentlen
+= nwritten
;
2316 c
->lastinteraction
= time(NULL
);
2318 if (listLength(c
->reply
) == 0) {
2320 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2324 static int qsortRedisCommands(const void *r1
, const void *r2
) {
2326 ((struct redisCommand
*)r1
)->name
,
2327 ((struct redisCommand
*)r2
)->name
);
2330 static void sortCommandTable() {
2331 /* Copy and sort the read-only version of the command table */
2332 commandTable
= (struct redisCommand
*)malloc(sizeof(readonlyCommandTable
));
2333 memcpy(commandTable
,readonlyCommandTable
,sizeof(readonlyCommandTable
));
2335 sizeof(readonlyCommandTable
)/sizeof(struct redisCommand
),
2336 sizeof(struct redisCommand
),qsortRedisCommands
);
2339 static struct redisCommand
*lookupCommand(char *name
) {
2340 struct redisCommand tmp
= {name
,NULL
,0,0,NULL
,0,0,0};
2344 sizeof(readonlyCommandTable
)/sizeof(struct redisCommand
),
2345 sizeof(struct redisCommand
),
2346 qsortRedisCommands
);
2349 /* resetClient prepare the client to process the next command */
2350 static void resetClient(redisClient
*c
) {
2356 /* Call() is the core of Redis execution of a command */
2357 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2360 dirty
= server
.dirty
;
2362 dirty
= server
.dirty
-dirty
;
2364 if (server
.appendonly
&& dirty
)
2365 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2366 if ((dirty
|| cmd
->flags
& REDIS_CMD_FORCE_REPLICATION
) &&
2367 listLength(server
.slaves
))
2368 replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
);
2369 if (listLength(server
.monitors
))
2370 replicationFeedMonitors(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
);
2371 server
.stat_numcommands
++;
2374 /* If this function gets called we already read a whole
2375 * command, argments are in the client argv/argc fields.
2376 * processCommand() execute the command or prepare the
2377 * server for a bulk read from the client.
2379 * If 1 is returned the client is still alive and valid and
2380 * and other operations can be performed by the caller. Otherwise
2381 * if 0 is returned the client was destroied (i.e. after QUIT). */
2382 static int processCommand(redisClient
*c
) {
2383 struct redisCommand
*cmd
;
2385 /* Free some memory if needed (maxmemory setting) */
2386 if (server
.maxmemory
) freeMemoryIfNeeded();
2388 /* Handle the multi bulk command type. This is an alternative protocol
2389 * supported by Redis in order to receive commands that are composed of
2390 * multiple binary-safe "bulk" arguments. The latency of processing is
2391 * a bit higher but this allows things like multi-sets, so if this
2392 * protocol is used only for MSET and similar commands this is a big win. */
2393 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2394 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2395 if (c
->multibulk
<= 0) {
2399 decrRefCount(c
->argv
[c
->argc
-1]);
2403 } else if (c
->multibulk
) {
2404 if (c
->bulklen
== -1) {
2405 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2406 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2410 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2411 decrRefCount(c
->argv
[0]);
2412 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2414 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2419 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2423 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2424 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2428 if (c
->multibulk
== 0) {
2432 /* Here we need to swap the multi-bulk argc/argv with the
2433 * normal argc/argv of the client structure. */
2435 c
->argv
= c
->mbargv
;
2436 c
->mbargv
= auxargv
;
2439 c
->argc
= c
->mbargc
;
2440 c
->mbargc
= auxargc
;
2442 /* We need to set bulklen to something different than -1
2443 * in order for the code below to process the command without
2444 * to try to read the last argument of a bulk command as
2445 * a special argument. */
2447 /* continue below and process the command */
2454 /* -- end of multi bulk commands processing -- */
2456 /* The QUIT command is handled as a special case. Normal command
2457 * procs are unable to close the client connection safely */
2458 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2463 /* Now lookup the command and check ASAP about trivial error conditions
2464 * such wrong arity, bad command name and so forth. */
2465 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2468 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2469 (char*)c
->argv
[0]->ptr
));
2472 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2473 (c
->argc
< -cmd
->arity
)) {
2475 sdscatprintf(sdsempty(),
2476 "-ERR wrong number of arguments for '%s' command\r\n",
2480 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2481 /* This is a bulk command, we have to read the last argument yet. */
2482 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2484 decrRefCount(c
->argv
[c
->argc
-1]);
2485 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2487 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2492 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2493 /* It is possible that the bulk read is already in the
2494 * buffer. Check this condition and handle it accordingly.
2495 * This is just a fast path, alternative to call processInputBuffer().
2496 * It's a good idea since the code is small and this condition
2497 * happens most of the times. */
2498 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2499 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2501 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2503 /* Otherwise return... there is to read the last argument
2504 * from the socket. */
2508 /* Let's try to encode the bulk object to save space. */
2509 if (cmd
->flags
& REDIS_CMD_BULK
)
2510 c
->argv
[c
->argc
-1] = tryObjectEncoding(c
->argv
[c
->argc
-1]);
2512 /* Check if the user is authenticated */
2513 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2514 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2519 /* Handle the maxmemory directive */
2520 if (server
.maxmemory
&& (cmd
->flags
& REDIS_CMD_DENYOOM
) &&
2521 zmalloc_used_memory() > server
.maxmemory
)
2523 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2528 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
2529 if ((dictSize(c
->pubsub_channels
) > 0 || listLength(c
->pubsub_patterns
) > 0)
2531 cmd
->proc
!= subscribeCommand
&& cmd
->proc
!= unsubscribeCommand
&&
2532 cmd
->proc
!= psubscribeCommand
&& cmd
->proc
!= punsubscribeCommand
) {
2533 addReplySds(c
,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n"));
2538 /* Exec the command */
2539 if (c
->flags
& REDIS_MULTI
&&
2540 cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
&&
2541 cmd
->proc
!= multiCommand
&& cmd
->proc
!= watchCommand
)
2543 queueMultiCommand(c
,cmd
);
2544 addReply(c
,shared
.queued
);
2546 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2547 blockClientOnSwappedKeys(c
,cmd
)) return 1;
2551 /* Prepare the client for the next command */
2556 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
) {
2561 /* We need 1+(ARGS*3) objects since commands are using the new protocol
2562 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
2563 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
2564 robj
*static_outv
[REDIS_STATIC_ARGS
*3+1];
2567 if (argc
<= REDIS_STATIC_ARGS
) {
2570 outv
= zmalloc(sizeof(robj
*)*(argc
*3+1));
2573 lenobj
= createObject(REDIS_STRING
,
2574 sdscatprintf(sdsempty(), "*%d\r\n", argc
));
2575 lenobj
->refcount
= 0;
2576 outv
[outc
++] = lenobj
;
2577 for (j
= 0; j
< argc
; j
++) {
2578 lenobj
= createObject(REDIS_STRING
,
2579 sdscatprintf(sdsempty(),"$%lu\r\n",
2580 (unsigned long) stringObjectLen(argv
[j
])));
2581 lenobj
->refcount
= 0;
2582 outv
[outc
++] = lenobj
;
2583 outv
[outc
++] = argv
[j
];
2584 outv
[outc
++] = shared
.crlf
;
2587 /* Increment all the refcounts at start and decrement at end in order to
2588 * be sure to free objects if there is no slave in a replication state
2589 * able to be feed with commands */
2590 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2591 listRewind(slaves
,&li
);
2592 while((ln
= listNext(&li
))) {
2593 redisClient
*slave
= ln
->value
;
2595 /* Don't feed slaves that are still waiting for BGSAVE to start */
2596 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2598 /* Feed all the other slaves, MONITORs and so on */
2599 if (slave
->slaveseldb
!= dictid
) {
2603 case 0: selectcmd
= shared
.select0
; break;
2604 case 1: selectcmd
= shared
.select1
; break;
2605 case 2: selectcmd
= shared
.select2
; break;
2606 case 3: selectcmd
= shared
.select3
; break;
2607 case 4: selectcmd
= shared
.select4
; break;
2608 case 5: selectcmd
= shared
.select5
; break;
2609 case 6: selectcmd
= shared
.select6
; break;
2610 case 7: selectcmd
= shared
.select7
; break;
2611 case 8: selectcmd
= shared
.select8
; break;
2612 case 9: selectcmd
= shared
.select9
; break;
2614 selectcmd
= createObject(REDIS_STRING
,
2615 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2616 selectcmd
->refcount
= 0;
2619 addReply(slave
,selectcmd
);
2620 slave
->slaveseldb
= dictid
;
2622 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2624 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2625 if (outv
!= static_outv
) zfree(outv
);
2628 static sds
sdscatrepr(sds s
, char *p
, size_t len
) {
2629 s
= sdscatlen(s
,"\"",1);
2634 s
= sdscatprintf(s
,"\\%c",*p
);
2636 case '\n': s
= sdscatlen(s
,"\\n",1); break;
2637 case '\r': s
= sdscatlen(s
,"\\r",1); break;
2638 case '\t': s
= sdscatlen(s
,"\\t",1); break;
2639 case '\a': s
= sdscatlen(s
,"\\a",1); break;
2640 case '\b': s
= sdscatlen(s
,"\\b",1); break;
2643 s
= sdscatprintf(s
,"%c",*p
);
2645 s
= sdscatprintf(s
,"\\x%02x",(unsigned char)*p
);
2650 return sdscatlen(s
,"\"",1);
2653 static void replicationFeedMonitors(list
*monitors
, int dictid
, robj
**argv
, int argc
) {
2657 sds cmdrepr
= sdsnew("+");
2661 gettimeofday(&tv
,NULL
);
2662 cmdrepr
= sdscatprintf(cmdrepr
,"%ld.%ld ",(long)tv
.tv_sec
,(long)tv
.tv_usec
);
2663 if (dictid
!= 0) cmdrepr
= sdscatprintf(cmdrepr
,"(db %d) ", dictid
);
2665 for (j
= 0; j
< argc
; j
++) {
2666 if (argv
[j
]->encoding
== REDIS_ENCODING_INT
) {
2667 cmdrepr
= sdscatprintf(cmdrepr
, "%ld", (long)argv
[j
]->ptr
);
2669 cmdrepr
= sdscatrepr(cmdrepr
,(char*)argv
[j
]->ptr
,
2670 sdslen(argv
[j
]->ptr
));
2673 cmdrepr
= sdscatlen(cmdrepr
," ",1);
2675 cmdrepr
= sdscatlen(cmdrepr
,"\r\n",2);
2676 cmdobj
= createObject(REDIS_STRING
,cmdrepr
);
2678 listRewind(monitors
,&li
);
2679 while((ln
= listNext(&li
))) {
2680 redisClient
*monitor
= ln
->value
;
2681 addReply(monitor
,cmdobj
);
2683 decrRefCount(cmdobj
);
2686 static void processInputBuffer(redisClient
*c
) {
2688 /* Before to process the input buffer, make sure the client is not
2689 * waitig for a blocking operation such as BLPOP. Note that the first
2690 * iteration the client is never blocked, otherwise the processInputBuffer
2691 * would not be called at all, but after the execution of the first commands
2692 * in the input buffer the client may be blocked, and the "goto again"
2693 * will try to reiterate. The following line will make it return asap. */
2694 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2695 if (c
->bulklen
== -1) {
2696 /* Read the first line of the query */
2697 char *p
= strchr(c
->querybuf
,'\n');
2704 query
= c
->querybuf
;
2705 c
->querybuf
= sdsempty();
2706 querylen
= 1+(p
-(query
));
2707 if (sdslen(query
) > querylen
) {
2708 /* leave data after the first line of the query in the buffer */
2709 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2711 *p
= '\0'; /* remove "\n" */
2712 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2713 sdsupdatelen(query
);
2715 /* Now we can split the query in arguments */
2716 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2719 if (c
->argv
) zfree(c
->argv
);
2720 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2722 for (j
= 0; j
< argc
; j
++) {
2723 if (sdslen(argv
[j
])) {
2724 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2732 /* Execute the command. If the client is still valid
2733 * after processCommand() return and there is something
2734 * on the query buffer try to process the next command. */
2735 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2737 /* Nothing to process, argc == 0. Just process the query
2738 * buffer if it's not empty or return to the caller */
2739 if (sdslen(c
->querybuf
)) goto again
;
2742 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2743 redisLog(REDIS_VERBOSE
, "Client protocol error");
2748 /* Bulk read handling. Note that if we are at this point
2749 the client already sent a command terminated with a newline,
2750 we are reading the bulk data that is actually the last
2751 argument of the command. */
2752 int qbl
= sdslen(c
->querybuf
);
2754 if (c
->bulklen
<= qbl
) {
2755 /* Copy everything but the final CRLF as final argument */
2756 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2758 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2759 /* Process the command. If the client is still valid after
2760 * the processing and there is more data in the buffer
2761 * try to parse it. */
2762 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2768 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2769 redisClient
*c
= (redisClient
*) privdata
;
2770 char buf
[REDIS_IOBUF_LEN
];
2773 REDIS_NOTUSED(mask
);
2775 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2777 if (errno
== EAGAIN
) {
2780 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2784 } else if (nread
== 0) {
2785 redisLog(REDIS_VERBOSE
, "Client closed connection");
2790 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2791 c
->lastinteraction
= time(NULL
);
2795 processInputBuffer(c
);
2798 static int selectDb(redisClient
*c
, int id
) {
2799 if (id
< 0 || id
>= server
.dbnum
)
2801 c
->db
= &server
.db
[id
];
2805 static void *dupClientReplyValue(void *o
) {
2806 incrRefCount((robj
*)o
);
2810 static int listMatchObjects(void *a
, void *b
) {
2811 return equalStringObjects(a
,b
);
2814 static redisClient
*createClient(int fd
) {
2815 redisClient
*c
= zmalloc(sizeof(*c
));
2817 anetNonBlock(NULL
,fd
);
2818 anetTcpNoDelay(NULL
,fd
);
2819 if (!c
) return NULL
;
2822 c
->querybuf
= sdsempty();
2831 c
->lastinteraction
= time(NULL
);
2832 c
->authenticated
= 0;
2833 c
->replstate
= REDIS_REPL_NONE
;
2834 c
->reply
= listCreate();
2835 listSetFreeMethod(c
->reply
,decrRefCount
);
2836 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2837 c
->blocking_keys
= NULL
;
2838 c
->blocking_keys_num
= 0;
2839 c
->io_keys
= listCreate();
2840 c
->watched_keys
= listCreate();
2841 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2842 c
->pubsub_channels
= dictCreate(&setDictType
,NULL
);
2843 c
->pubsub_patterns
= listCreate();
2844 listSetFreeMethod(c
->pubsub_patterns
,decrRefCount
);
2845 listSetMatchMethod(c
->pubsub_patterns
,listMatchObjects
);
2846 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2847 readQueryFromClient
, c
) == AE_ERR
) {
2851 listAddNodeTail(server
.clients
,c
);
2852 initClientMultiState(c
);
2856 static void addReply(redisClient
*c
, robj
*obj
) {
2857 if (listLength(c
->reply
) == 0 &&
2858 (c
->replstate
== REDIS_REPL_NONE
||
2859 c
->replstate
== REDIS_REPL_ONLINE
) &&
2860 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2861 sendReplyToClient
, c
) == AE_ERR
) return;
2863 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2864 obj
= dupStringObject(obj
);
2865 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2867 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2870 static void addReplySds(redisClient
*c
, sds s
) {
2871 robj
*o
= createObject(REDIS_STRING
,s
);
2876 static void addReplyDouble(redisClient
*c
, double d
) {
2879 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2880 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2881 (unsigned long) strlen(buf
),buf
));
2884 static void addReplyLongLong(redisClient
*c
, long long ll
) {
2889 addReply(c
,shared
.czero
);
2891 } else if (ll
== 1) {
2892 addReply(c
,shared
.cone
);
2896 len
= ll2string(buf
+1,sizeof(buf
)-1,ll
);
2899 addReplySds(c
,sdsnewlen(buf
,len
+3));
2902 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2907 addReply(c
,shared
.czero
);
2909 } else if (ul
== 1) {
2910 addReply(c
,shared
.cone
);
2913 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2914 addReplySds(c
,sdsnewlen(buf
,len
));
2917 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2921 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2922 len
= sdslen(obj
->ptr
);
2924 long n
= (long)obj
->ptr
;
2926 /* Compute how many bytes will take this integer as a radix 10 string */
2932 while((n
= n
/10) != 0) {
2937 intlen
= ll2string(buf
+1,sizeof(buf
)-1,(long long)len
);
2938 buf
[intlen
+1] = '\r';
2939 buf
[intlen
+2] = '\n';
2940 addReplySds(c
,sdsnewlen(buf
,intlen
+3));
2943 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2944 addReplyBulkLen(c
,obj
);
2946 addReply(c
,shared
.crlf
);
2949 static void addReplyBulkSds(redisClient
*c
, sds s
) {
2950 robj
*o
= createStringObject(s
, sdslen(s
));
2955 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2956 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2958 addReply(c
,shared
.nullbulk
);
2960 robj
*o
= createStringObject(s
,strlen(s
));
2966 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2971 REDIS_NOTUSED(mask
);
2972 REDIS_NOTUSED(privdata
);
2974 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2975 if (cfd
== AE_ERR
) {
2976 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2979 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2980 if ((c
= createClient(cfd
)) == NULL
) {
2981 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2982 close(cfd
); /* May be already closed, just ingore errors */
2985 /* If maxclient directive is set and this is one client more... close the
2986 * connection. Note that we create the client instead to check before
2987 * for this condition, since now the socket is already set in nonblocking
2988 * mode and we can send an error for free using the Kernel I/O */
2989 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2990 char *err
= "-ERR max number of clients reached\r\n";
2992 /* That's a best effort error message, don't check write errors */
2993 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2994 /* Nothing to do, Just to avoid the warning... */
2999 server
.stat_numconnections
++;
3002 /* ======================= Redis objects implementation ===================== */
3004 static robj
*createObject(int type
, void *ptr
) {
3007 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
3008 if (listLength(server
.objfreelist
)) {
3009 listNode
*head
= listFirst(server
.objfreelist
);
3010 o
= listNodeValue(head
);
3011 listDelNode(server
.objfreelist
,head
);
3012 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
3014 if (server
.vm_enabled
)
3015 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
3016 o
= zmalloc(sizeof(*o
));
3019 o
->encoding
= REDIS_ENCODING_RAW
;
3022 if (server
.vm_enabled
) {
3023 /* Note that this code may run in the context of an I/O thread
3024 * and accessing server.lruclock in theory is an error
3025 * (no locks). But in practice this is safe, and even if we read
3026 * garbage Redis will not fail. */
3027 o
->lru
= server
.lruclock
;
3028 o
->storage
= REDIS_VM_MEMORY
;
3033 static robj
*createStringObject(char *ptr
, size_t len
) {
3034 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
3037 static robj
*createStringObjectFromLongLong(long long value
) {
3039 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
3040 incrRefCount(shared
.integers
[value
]);
3041 o
= shared
.integers
[value
];
3043 if (value
>= LONG_MIN
&& value
<= LONG_MAX
) {
3044 o
= createObject(REDIS_STRING
, NULL
);
3045 o
->encoding
= REDIS_ENCODING_INT
;
3046 o
->ptr
= (void*)((long)value
);
3048 o
= createObject(REDIS_STRING
,sdsfromlonglong(value
));
3054 static robj
*dupStringObject(robj
*o
) {
3055 assert(o
->encoding
== REDIS_ENCODING_RAW
);
3056 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
3059 static robj
*createListObject(void) {
3060 list
*l
= listCreate();
3061 robj
*o
= createObject(REDIS_LIST
,l
);
3062 listSetFreeMethod(l
,decrRefCount
);
3063 o
->encoding
= REDIS_ENCODING_LIST
;
3067 static robj
*createZiplistObject(void) {
3068 unsigned char *zl
= ziplistNew();
3069 robj
*o
= createObject(REDIS_LIST
,zl
);
3070 o
->encoding
= REDIS_ENCODING_ZIPLIST
;
3074 static robj
*createSetObject(void) {
3075 dict
*d
= dictCreate(&setDictType
,NULL
);
3076 return createObject(REDIS_SET
,d
);
3079 static robj
*createHashObject(void) {
3080 /* All the Hashes start as zipmaps. Will be automatically converted
3081 * into hash tables if there are enough elements or big elements
3083 unsigned char *zm
= zipmapNew();
3084 robj
*o
= createObject(REDIS_HASH
,zm
);
3085 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
3089 static robj
*createZsetObject(void) {
3090 zset
*zs
= zmalloc(sizeof(*zs
));
3092 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
3093 zs
->zsl
= zslCreate();
3094 return createObject(REDIS_ZSET
,zs
);
3097 static void freeStringObject(robj
*o
) {
3098 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3103 static void freeListObject(robj
*o
) {
3104 switch (o
->encoding
) {
3105 case REDIS_ENCODING_LIST
:
3106 listRelease((list
*) o
->ptr
);
3108 case REDIS_ENCODING_ZIPLIST
:
3112 redisPanic("Unknown list encoding type");
3116 static void freeSetObject(robj
*o
) {
3117 dictRelease((dict
*) o
->ptr
);
3120 static void freeZsetObject(robj
*o
) {
3123 dictRelease(zs
->dict
);
3128 static void freeHashObject(robj
*o
) {
3129 switch (o
->encoding
) {
3130 case REDIS_ENCODING_HT
:
3131 dictRelease((dict
*) o
->ptr
);
3133 case REDIS_ENCODING_ZIPMAP
:
3137 redisPanic("Unknown hash encoding type");
3142 static void incrRefCount(robj
*o
) {
3146 static void decrRefCount(void *obj
) {
3149 /* Object is a swapped out value, or in the process of being loaded. */
3150 if (server
.vm_enabled
&&
3151 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
3153 vmpointer
*vp
= obj
;
3154 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(o
);
3155 vmMarkPagesFree(vp
->page
,vp
->usedpages
);
3156 server
.vm_stats_swapped_objects
--;
3161 if (o
->refcount
<= 0) redisPanic("decrRefCount against refcount <= 0");
3162 /* Object is in memory, or in the process of being swapped out.
3164 * If the object is being swapped out, abort the operation on
3165 * decrRefCount even if the refcount does not drop to 0: the object
3166 * is referenced at least two times, as value of the key AND as
3167 * job->val in the iojob. So if we don't invalidate the iojob, when it is
3168 * done but the relevant key was removed in the meantime, the
3169 * complete jobs handler will not find the key about the job and the
3170 * assert will fail. */
3171 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
3172 vmCancelThreadedIOJob(o
);
3173 if (--(o
->refcount
) == 0) {
3175 case REDIS_STRING
: freeStringObject(o
); break;
3176 case REDIS_LIST
: freeListObject(o
); break;
3177 case REDIS_SET
: freeSetObject(o
); break;
3178 case REDIS_ZSET
: freeZsetObject(o
); break;
3179 case REDIS_HASH
: freeHashObject(o
); break;
3180 default: redisPanic("Unknown object type"); break;
3182 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
3183 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
3184 !listAddNodeHead(server
.objfreelist
,o
))
3186 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
3190 static int checkType(redisClient
*c
, robj
*o
, int type
) {
3191 if (o
->type
!= type
) {
3192 addReply(c
,shared
.wrongtypeerr
);
3198 /* Check if the nul-terminated string 's' can be represented by a long
3199 * (that is, is a number that fits into long without any other space or
3200 * character before or after the digits).
3202 * If so, the function returns REDIS_OK and *longval is set to the value
3203 * of the number. Otherwise REDIS_ERR is returned */
3204 static int isStringRepresentableAsLong(sds s
, long *longval
) {
3205 char buf
[32], *endptr
;
3209 value
= strtol(s
, &endptr
, 10);
3210 if (endptr
[0] != '\0') return REDIS_ERR
;
3211 slen
= ll2string(buf
,32,value
);
3213 /* If the number converted back into a string is not identical
3214 * then it's not possible to encode the string as integer */
3215 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
3216 if (longval
) *longval
= value
;
3220 /* Try to encode a string object in order to save space */
3221 static robj
*tryObjectEncoding(robj
*o
) {
3225 if (o
->encoding
!= REDIS_ENCODING_RAW
)
3226 return o
; /* Already encoded */
3228 /* It's not safe to encode shared objects: shared objects can be shared
3229 * everywhere in the "object space" of Redis. Encoded objects can only
3230 * appear as "values" (and not, for instance, as keys) */
3231 if (o
->refcount
> 1) return o
;
3233 /* Currently we try to encode only strings */
3234 redisAssert(o
->type
== REDIS_STRING
);
3236 /* Check if we can represent this string as a long integer */
3237 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return o
;
3239 /* Ok, this object can be encoded */
3240 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
3242 incrRefCount(shared
.integers
[value
]);
3243 return shared
.integers
[value
];
3245 o
->encoding
= REDIS_ENCODING_INT
;
3247 o
->ptr
= (void*) value
;
3252 /* Get a decoded version of an encoded object (returned as a new object).
3253 * If the object is already raw-encoded just increment the ref count. */
3254 static robj
*getDecodedObject(robj
*o
) {
3257 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3261 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
3264 ll2string(buf
,32,(long)o
->ptr
);
3265 dec
= createStringObject(buf
,strlen(buf
));
3268 redisPanic("Unknown encoding type");
3272 /* Compare two string objects via strcmp() or alike.
3273 * Note that the objects may be integer-encoded. In such a case we
3274 * use ll2string() to get a string representation of the numbers on the stack
3275 * and compare the strings, it's much faster than calling getDecodedObject().
3277 * Important note: if objects are not integer encoded, but binary-safe strings,
3278 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
3280 static int compareStringObjects(robj
*a
, robj
*b
) {
3281 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
3282 char bufa
[128], bufb
[128], *astr
, *bstr
;
3285 if (a
== b
) return 0;
3286 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
3287 ll2string(bufa
,sizeof(bufa
),(long) a
->ptr
);
3293 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
3294 ll2string(bufb
,sizeof(bufb
),(long) b
->ptr
);
3300 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
3303 /* Equal string objects return 1 if the two objects are the same from the
3304 * point of view of a string comparison, otherwise 0 is returned. Note that
3305 * this function is faster then checking for (compareStringObject(a,b) == 0)
3306 * because it can perform some more optimization. */
3307 static int equalStringObjects(robj
*a
, robj
*b
) {
3308 if (a
->encoding
!= REDIS_ENCODING_RAW
&& b
->encoding
!= REDIS_ENCODING_RAW
){
3309 return a
->ptr
== b
->ptr
;
3311 return compareStringObjects(a
,b
) == 0;
3315 static size_t stringObjectLen(robj
*o
) {
3316 redisAssert(o
->type
== REDIS_STRING
);
3317 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3318 return sdslen(o
->ptr
);
3322 return ll2string(buf
,32,(long)o
->ptr
);
3326 static int getDoubleFromObject(robj
*o
, double *target
) {
3333 redisAssert(o
->type
== REDIS_STRING
);
3334 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3335 value
= strtod(o
->ptr
, &eptr
);
3336 if (eptr
[0] != '\0') return REDIS_ERR
;
3337 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3338 value
= (long)o
->ptr
;
3340 redisPanic("Unknown string encoding");
3348 static int getDoubleFromObjectOrReply(redisClient
*c
, robj
*o
, double *target
, const char *msg
) {
3350 if (getDoubleFromObject(o
, &value
) != REDIS_OK
) {
3352 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3354 addReplySds(c
, sdsnew("-ERR value is not a double\r\n"));
3363 static int getLongLongFromObject(robj
*o
, long long *target
) {
3370 redisAssert(o
->type
== REDIS_STRING
);
3371 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3372 value
= strtoll(o
->ptr
, &eptr
, 10);
3373 if (eptr
[0] != '\0') return REDIS_ERR
;
3374 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3375 value
= (long)o
->ptr
;
3377 redisPanic("Unknown string encoding");
3385 static int getLongLongFromObjectOrReply(redisClient
*c
, robj
*o
, long long *target
, const char *msg
) {
3387 if (getLongLongFromObject(o
, &value
) != REDIS_OK
) {
3389 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3391 addReplySds(c
, sdsnew("-ERR value is not an integer\r\n"));
3400 static int getLongFromObjectOrReply(redisClient
*c
, robj
*o
, long *target
, const char *msg
) {
3403 if (getLongLongFromObjectOrReply(c
, o
, &value
, msg
) != REDIS_OK
) return REDIS_ERR
;
3404 if (value
< LONG_MIN
|| value
> LONG_MAX
) {
3406 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3408 addReplySds(c
, sdsnew("-ERR value is out of range\r\n"));
3417 /* =========================== Keyspace access API ========================== */
3419 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
3420 dictEntry
*de
= dictFind(db
->dict
,key
->ptr
);
3422 robj
*val
= dictGetEntryVal(de
);
3424 if (server
.vm_enabled
) {
3425 if (val
->storage
== REDIS_VM_MEMORY
||
3426 val
->storage
== REDIS_VM_SWAPPING
)
3428 /* If we were swapping the object out, cancel the operation */
3429 if (val
->storage
== REDIS_VM_SWAPPING
)
3430 vmCancelThreadedIOJob(val
);
3431 /* Update the access time for the aging algorithm. */
3432 val
->lru
= server
.lruclock
;
3434 int notify
= (val
->storage
== REDIS_VM_LOADING
);
3436 /* Our value was swapped on disk. Bring it at home. */
3437 redisAssert(val
->type
== REDIS_VMPOINTER
);
3438 val
= vmLoadObject(val
);
3439 dictGetEntryVal(de
) = val
;
3441 /* Clients blocked by the VM subsystem may be waiting for
3443 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
3452 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
3453 expireIfNeeded(db
,key
);
3454 return lookupKey(db
,key
);
3457 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
3458 deleteIfVolatile(db
,key
);
3459 touchWatchedKey(db
,key
);
3460 return lookupKey(db
,key
);
3463 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3464 robj
*o
= lookupKeyRead(c
->db
, key
);
3465 if (!o
) addReply(c
,reply
);
3469 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3470 robj
*o
= lookupKeyWrite(c
->db
, key
);
3471 if (!o
) addReply(c
,reply
);
3475 /* Add the key to the DB. If the key already exists REDIS_ERR is returned,
3476 * otherwise REDIS_OK is returned, and the caller should increment the
3477 * refcount of 'val'. */
3478 static int dbAdd(redisDb
*db
, robj
*key
, robj
*val
) {
3479 /* Perform a lookup before adding the key, as we need to copy the
3481 if (dictFind(db
->dict
, key
->ptr
) != NULL
) {
3484 sds copy
= sdsdup(key
->ptr
);
3485 dictAdd(db
->dict
, copy
, val
);
3490 /* If the key does not exist, this is just like dbAdd(). Otherwise
3491 * the value associated to the key is replaced with the new one.
3493 * On update (key already existed) 0 is returned. Otherwise 1. */
3494 static int dbReplace(redisDb
*db
, robj
*key
, robj
*val
) {
3495 if (dictFind(db
->dict
,key
->ptr
) == NULL
) {
3496 sds copy
= sdsdup(key
->ptr
);
3497 dictAdd(db
->dict
, copy
, val
);
3500 dictReplace(db
->dict
, key
->ptr
, val
);
3505 static int dbExists(redisDb
*db
, robj
*key
) {
3506 return dictFind(db
->dict
,key
->ptr
) != NULL
;
3509 /* Return a random key, in form of a Redis object.
3510 * If there are no keys, NULL is returned.
3512 * The function makes sure to return keys not already expired. */
3513 static robj
*dbRandomKey(redisDb
*db
) {
3514 struct dictEntry
*de
;
3520 de
= dictGetRandomKey(db
->dict
);
3521 if (de
== NULL
) return NULL
;
3523 key
= dictGetEntryKey(de
);
3524 keyobj
= createStringObject(key
,sdslen(key
));
3525 if (dictFind(db
->expires
,key
)) {
3526 if (expireIfNeeded(db
,keyobj
)) {
3527 decrRefCount(keyobj
);
3528 continue; /* search for another key. This expired. */
3535 /* Delete a key, value, and associated expiration entry if any, from the DB */
3536 static int dbDelete(redisDb
*db
, robj
*key
) {
3537 /* Deleting an entry from the expires dict will not free the sds of
3538 * the key, because it is shared with the main dictionary. */
3539 if (dictSize(db
->expires
) > 0) dictDelete(db
->expires
,key
->ptr
);
3540 return dictDelete(db
->dict
,key
->ptr
) == DICT_OK
;
3543 /*============================ RDB saving/loading =========================== */
3545 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3546 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3550 static int rdbSaveTime(FILE *fp
, time_t t
) {
3551 int32_t t32
= (int32_t) t
;
3552 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3556 /* check rdbLoadLen() comments for more info */
3557 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3558 unsigned char buf
[2];
3561 /* Save a 6 bit len */
3562 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3563 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3564 } else if (len
< (1<<14)) {
3565 /* Save a 14 bit len */
3566 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3568 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3570 /* Save a 32 bit len */
3571 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3572 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3574 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3579 /* Encode 'value' as an integer if possible (if integer will fit the
3580 * supported range). If the function sucessful encoded the integer
3581 * then the (up to 5 bytes) encoded representation is written in the
3582 * string pointed by 'enc' and the length is returned. Otherwise
3584 static int rdbEncodeInteger(long long value
, unsigned char *enc
) {
3585 /* Finally check if it fits in our ranges */
3586 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3587 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3588 enc
[1] = value
&0xFF;
3590 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3591 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3592 enc
[1] = value
&0xFF;
3593 enc
[2] = (value
>>8)&0xFF;
3595 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3596 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3597 enc
[1] = value
&0xFF;
3598 enc
[2] = (value
>>8)&0xFF;
3599 enc
[3] = (value
>>16)&0xFF;
3600 enc
[4] = (value
>>24)&0xFF;
3607 /* String objects in the form "2391" "-100" without any space and with a
3608 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3609 * encoded as integers to save space */
3610 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3612 char *endptr
, buf
[32];
3614 /* Check if it's possible to encode this value as a number */
3615 value
= strtoll(s
, &endptr
, 10);
3616 if (endptr
[0] != '\0') return 0;
3617 ll2string(buf
,32,value
);
3619 /* If the number converted back into a string is not identical
3620 * then it's not possible to encode the string as integer */
3621 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3623 return rdbEncodeInteger(value
,enc
);
3626 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3627 size_t comprlen
, outlen
;
3631 /* We require at least four bytes compression for this to be worth it */
3632 if (len
<= 4) return 0;
3634 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3635 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3636 if (comprlen
== 0) {
3640 /* Data compressed! Let's save it on disk */
3641 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3642 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3643 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3644 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3645 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3654 /* Save a string objet as [len][data] on disk. If the object is a string
3655 * representation of an integer value we try to safe it in a special form */
3656 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3659 /* Try integer encoding */
3661 unsigned char buf
[5];
3662 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3663 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3668 /* Try LZF compression - under 20 bytes it's unable to compress even
3669 * aaaaaaaaaaaaaaaaaa so skip it */
3670 if (server
.rdbcompression
&& len
> 20) {
3673 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3674 if (retval
== -1) return -1;
3675 if (retval
> 0) return 0;
3676 /* retval == 0 means data can't be compressed, save the old way */
3679 /* Store verbatim */
3680 if (rdbSaveLen(fp
,len
) == -1) return -1;
3681 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3685 /* Save a long long value as either an encoded string or a string. */
3686 static int rdbSaveLongLongAsStringObject(FILE *fp
, long long value
) {
3687 unsigned char buf
[32];
3688 int enclen
= rdbEncodeInteger(value
,buf
);
3690 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3692 /* Encode as string */
3693 enclen
= ll2string((char*)buf
,32,value
);
3694 redisAssert(enclen
< 32);
3695 if (rdbSaveLen(fp
,enclen
) == -1) return -1;
3696 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3701 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3702 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3703 /* Avoid to decode the object, then encode it again, if the
3704 * object is alrady integer encoded. */
3705 if (obj
->encoding
== REDIS_ENCODING_INT
) {
3706 return rdbSaveLongLongAsStringObject(fp
,(long)obj
->ptr
);
3708 redisAssert(obj
->encoding
== REDIS_ENCODING_RAW
);
3709 return rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3713 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3714 * 8 bit integer specifing the length of the representation.
3715 * This 8 bit integer has special values in order to specify the following
3721 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3722 unsigned char buf
[128];
3728 } else if (!isfinite(val
)) {
3730 buf
[0] = (val
< 0) ? 255 : 254;
3732 #if (DBL_MANT_DIG >= 52) && (LLONG_MAX == 0x7fffffffffffffffLL)
3733 /* Check if the float is in a safe range to be casted into a
3734 * long long. We are assuming that long long is 64 bit here.
3735 * Also we are assuming that there are no implementations around where
3736 * double has precision < 52 bit.
3738 * Under this assumptions we test if a double is inside an interval
3739 * where casting to long long is safe. Then using two castings we
3740 * make sure the decimal part is zero. If all this is true we use
3741 * integer printing function that is much faster. */
3742 double min
= -4503599627370495; /* (2^52)-1 */
3743 double max
= 4503599627370496; /* -(2^52) */
3744 if (val
> min
&& val
< max
&& val
== ((double)((long long)val
)))
3745 ll2string((char*)buf
+1,sizeof(buf
),(long long)val
);
3748 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3749 buf
[0] = strlen((char*)buf
+1);
3752 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3756 /* Save a Redis object. */
3757 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3758 if (o
->type
== REDIS_STRING
) {
3759 /* Save a string value */
3760 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3761 } else if (o
->type
== REDIS_LIST
) {
3762 /* Save a list value */
3763 if (o
->encoding
== REDIS_ENCODING_ZIPLIST
) {
3765 unsigned char *vstr
;
3769 if (rdbSaveLen(fp
,ziplistLen(o
->ptr
)) == -1) return -1;
3770 p
= ziplistIndex(o
->ptr
,0);
3771 while(ziplistGet(p
,&vstr
,&vlen
,&vlong
)) {
3773 if (rdbSaveRawString(fp
,vstr
,vlen
) == -1)
3776 if (rdbSaveLongLongAsStringObject(fp
,vlong
) == -1)
3779 p
= ziplistNext(o
->ptr
,p
);
3781 } else if (o
->encoding
== REDIS_ENCODING_LIST
) {
3782 list
*list
= o
->ptr
;
3786 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3787 listRewind(list
,&li
);
3788 while((ln
= listNext(&li
))) {
3789 robj
*eleobj
= listNodeValue(ln
);
3790 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3793 redisPanic("Unknown list encoding");
3795 } else if (o
->type
== REDIS_SET
) {
3796 /* Save a set value */
3798 dictIterator
*di
= dictGetIterator(set
);
3801 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3802 while((de
= dictNext(di
)) != NULL
) {
3803 robj
*eleobj
= dictGetEntryKey(de
);
3805 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3807 dictReleaseIterator(di
);
3808 } else if (o
->type
== REDIS_ZSET
) {
3809 /* Save a set value */
3811 dictIterator
*di
= dictGetIterator(zs
->dict
);
3814 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3815 while((de
= dictNext(di
)) != NULL
) {
3816 robj
*eleobj
= dictGetEntryKey(de
);
3817 double *score
= dictGetEntryVal(de
);
3819 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3820 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3822 dictReleaseIterator(di
);
3823 } else if (o
->type
== REDIS_HASH
) {
3824 /* Save a hash value */
3825 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3826 unsigned char *p
= zipmapRewind(o
->ptr
);
3827 unsigned int count
= zipmapLen(o
->ptr
);
3828 unsigned char *key
, *val
;
3829 unsigned int klen
, vlen
;
3831 if (rdbSaveLen(fp
,count
) == -1) return -1;
3832 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3833 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3834 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3837 dictIterator
*di
= dictGetIterator(o
->ptr
);
3840 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3841 while((de
= dictNext(di
)) != NULL
) {
3842 robj
*key
= dictGetEntryKey(de
);
3843 robj
*val
= dictGetEntryVal(de
);
3845 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3846 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3848 dictReleaseIterator(di
);
3851 redisPanic("Unknown object type");
3856 /* Return the length the object will have on disk if saved with
3857 * the rdbSaveObject() function. Currently we use a trick to get
3858 * this length with very little changes to the code. In the future
3859 * we could switch to a faster solution. */
3860 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3861 if (fp
== NULL
) fp
= server
.devnull
;
3863 assert(rdbSaveObject(fp
,o
) != 1);
3867 /* Return the number of pages required to save this object in the swap file */
3868 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3869 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3871 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3874 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3875 static int rdbSave(char *filename
) {
3876 dictIterator
*di
= NULL
;
3881 time_t now
= time(NULL
);
3883 /* Wait for I/O therads to terminate, just in case this is a
3884 * foreground-saving, to avoid seeking the swap file descriptor at the
3886 if (server
.vm_enabled
)
3887 waitEmptyIOJobsQueue();
3889 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3890 fp
= fopen(tmpfile
,"w");
3892 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3895 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3896 for (j
= 0; j
< server
.dbnum
; j
++) {
3897 redisDb
*db
= server
.db
+j
;
3899 if (dictSize(d
) == 0) continue;
3900 di
= dictGetIterator(d
);
3906 /* Write the SELECT DB opcode */
3907 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3908 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3910 /* Iterate this DB writing every entry */
3911 while((de
= dictNext(di
)) != NULL
) {
3912 sds keystr
= dictGetEntryKey(de
);
3913 robj key
, *o
= dictGetEntryVal(de
);
3916 initStaticStringObject(key
,keystr
);
3917 expiretime
= getExpire(db
,&key
);
3919 /* Save the expire time */
3920 if (expiretime
!= -1) {
3921 /* If this key is already expired skip it */
3922 if (expiretime
< now
) continue;
3923 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3924 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3926 /* Save the key and associated value. This requires special
3927 * handling if the value is swapped out. */
3928 if (!server
.vm_enabled
|| o
->storage
== REDIS_VM_MEMORY
||
3929 o
->storage
== REDIS_VM_SWAPPING
) {
3930 /* Save type, key, value */
3931 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3932 if (rdbSaveStringObject(fp
,&key
) == -1) goto werr
;
3933 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3935 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3937 /* Get a preview of the object in memory */
3938 po
= vmPreviewObject(o
);
3939 /* Save type, key, value */
3940 if (rdbSaveType(fp
,po
->type
) == -1) goto werr
;
3941 if (rdbSaveStringObject(fp
,&key
) == -1) goto werr
;
3942 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3943 /* Remove the loaded object from memory */
3947 dictReleaseIterator(di
);
3950 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3952 /* Make sure data will not remain on the OS's output buffers */
3957 /* Use RENAME to make sure the DB file is changed atomically only
3958 * if the generate DB file is ok. */
3959 if (rename(tmpfile
,filename
) == -1) {
3960 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3964 redisLog(REDIS_NOTICE
,"DB saved on disk");
3966 server
.lastsave
= time(NULL
);
3972 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3973 if (di
) dictReleaseIterator(di
);
3977 static int rdbSaveBackground(char *filename
) {
3980 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3981 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3982 if ((childpid
= fork()) == 0) {
3984 if (server
.vm_enabled
) vmReopenSwapFile();
3986 if (rdbSave(filename
) == REDIS_OK
) {
3993 if (childpid
== -1) {
3994 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3998 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3999 server
.bgsavechildpid
= childpid
;
4000 updateDictResizePolicy();
4003 return REDIS_OK
; /* unreached */
4006 static void rdbRemoveTempFile(pid_t childpid
) {
4009 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
4013 static int rdbLoadType(FILE *fp
) {
4015 if (fread(&type
,1,1,fp
) == 0) return -1;
4019 static time_t rdbLoadTime(FILE *fp
) {
4021 if (fread(&t32
,4,1,fp
) == 0) return -1;
4022 return (time_t) t32
;
4025 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
4026 * of this file for a description of how this are stored on disk.
4028 * isencoded is set to 1 if the readed length is not actually a length but
4029 * an "encoding type", check the above comments for more info */
4030 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
4031 unsigned char buf
[2];
4035 if (isencoded
) *isencoded
= 0;
4036 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
4037 type
= (buf
[0]&0xC0)>>6;
4038 if (type
== REDIS_RDB_6BITLEN
) {
4039 /* Read a 6 bit len */
4041 } else if (type
== REDIS_RDB_ENCVAL
) {
4042 /* Read a 6 bit len encoding type */
4043 if (isencoded
) *isencoded
= 1;
4045 } else if (type
== REDIS_RDB_14BITLEN
) {
4046 /* Read a 14 bit len */
4047 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
4048 return ((buf
[0]&0x3F)<<8)|buf
[1];
4050 /* Read a 32 bit len */
4051 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
4056 /* Load an integer-encoded object from file 'fp', with the specified
4057 * encoding type 'enctype'. If encode is true the function may return
4058 * an integer-encoded object as reply, otherwise the returned object
4059 * will always be encoded as a raw string. */
4060 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
, int encode
) {
4061 unsigned char enc
[4];
4064 if (enctype
== REDIS_RDB_ENC_INT8
) {
4065 if (fread(enc
,1,1,fp
) == 0) return NULL
;
4066 val
= (signed char)enc
[0];
4067 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
4069 if (fread(enc
,2,1,fp
) == 0) return NULL
;
4070 v
= enc
[0]|(enc
[1]<<8);
4072 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
4074 if (fread(enc
,4,1,fp
) == 0) return NULL
;
4075 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
4078 val
= 0; /* anti-warning */
4079 redisPanic("Unknown RDB integer encoding type");
4082 return createStringObjectFromLongLong(val
);
4084 return createObject(REDIS_STRING
,sdsfromlonglong(val
));
4087 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
4088 unsigned int len
, clen
;
4089 unsigned char *c
= NULL
;
4092 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4093 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4094 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
4095 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
4096 if (fread(c
,clen
,1,fp
) == 0) goto err
;
4097 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
4099 return createObject(REDIS_STRING
,val
);
4106 static robj
*rdbGenericLoadStringObject(FILE*fp
, int encode
) {
4111 len
= rdbLoadLen(fp
,&isencoded
);
4114 case REDIS_RDB_ENC_INT8
:
4115 case REDIS_RDB_ENC_INT16
:
4116 case REDIS_RDB_ENC_INT32
:
4117 return rdbLoadIntegerObject(fp
,len
,encode
);
4118 case REDIS_RDB_ENC_LZF
:
4119 return rdbLoadLzfStringObject(fp
);
4121 redisPanic("Unknown RDB encoding type");
4125 if (len
== REDIS_RDB_LENERR
) return NULL
;
4126 val
= sdsnewlen(NULL
,len
);
4127 if (len
&& fread(val
,len
,1,fp
) == 0) {
4131 return createObject(REDIS_STRING
,val
);
4134 static robj
*rdbLoadStringObject(FILE *fp
) {
4135 return rdbGenericLoadStringObject(fp
,0);
4138 static robj
*rdbLoadEncodedStringObject(FILE *fp
) {
4139 return rdbGenericLoadStringObject(fp
,1);
4142 /* For information about double serialization check rdbSaveDoubleValue() */
4143 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
4147 if (fread(&len
,1,1,fp
) == 0) return -1;
4149 case 255: *val
= R_NegInf
; return 0;
4150 case 254: *val
= R_PosInf
; return 0;
4151 case 253: *val
= R_Nan
; return 0;
4153 if (fread(buf
,len
,1,fp
) == 0) return -1;
4155 sscanf(buf
, "%lg", val
);
4160 /* Load a Redis object of the specified type from the specified file.
4161 * On success a newly allocated object is returned, otherwise NULL. */
4162 static robj
*rdbLoadObject(int type
, FILE *fp
) {
4163 robj
*o
, *ele
, *dec
;
4166 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
4167 if (type
== REDIS_STRING
) {
4168 /* Read string value */
4169 if ((o
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4170 o
= tryObjectEncoding(o
);
4171 } else if (type
== REDIS_LIST
) {
4172 /* Read list value */
4173 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4175 /* Use a real list when there are too many entries */
4176 if (len
> server
.list_max_ziplist_entries
) {
4177 o
= createListObject();
4179 o
= createZiplistObject();
4182 /* Load every single element of the list */
4184 if ((ele
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4186 /* If we are using a ziplist and the value is too big, convert
4187 * the object to a real list. */
4188 if (o
->encoding
== REDIS_ENCODING_ZIPLIST
&&
4189 ele
->encoding
== REDIS_ENCODING_RAW
&&
4190 sdslen(ele
->ptr
) > server
.list_max_ziplist_value
)
4191 listTypeConvert(o
,REDIS_ENCODING_LIST
);
4193 if (o
->encoding
== REDIS_ENCODING_ZIPLIST
) {
4194 dec
= getDecodedObject(ele
);
4195 o
->ptr
= ziplistPush(o
->ptr
,dec
->ptr
,sdslen(dec
->ptr
),REDIS_TAIL
);
4199 ele
= tryObjectEncoding(ele
);
4200 listAddNodeTail(o
->ptr
,ele
);
4203 } else if (type
== REDIS_SET
) {
4204 /* Read list/set value */
4205 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4206 o
= createSetObject();
4207 /* It's faster to expand the dict to the right size asap in order
4208 * to avoid rehashing */
4209 if (len
> DICT_HT_INITIAL_SIZE
)
4210 dictExpand(o
->ptr
,len
);
4211 /* Load every single element of the list/set */
4213 if ((ele
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4214 ele
= tryObjectEncoding(ele
);
4215 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
4217 } else if (type
== REDIS_ZSET
) {
4218 /* Read list/set value */
4222 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4223 o
= createZsetObject();
4225 /* Load every single element of the list/set */
4228 double *score
= zmalloc(sizeof(double));
4230 if ((ele
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4231 ele
= tryObjectEncoding(ele
);
4232 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
4233 dictAdd(zs
->dict
,ele
,score
);
4234 zslInsert(zs
->zsl
,*score
,ele
);
4235 incrRefCount(ele
); /* added to skiplist */
4237 } else if (type
== REDIS_HASH
) {
4240 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4241 o
= createHashObject();
4242 /* Too many entries? Use an hash table. */
4243 if (hashlen
> server
.hash_max_zipmap_entries
)
4244 convertToRealHash(o
);
4245 /* Load every key/value, then set it into the zipmap or hash
4246 * table, as needed. */
4250 if ((key
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4251 if ((val
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4252 /* If we are using a zipmap and there are too big values
4253 * the object is converted to real hash table encoding. */
4254 if (o
->encoding
!= REDIS_ENCODING_HT
&&
4255 ((key
->encoding
== REDIS_ENCODING_RAW
&&
4256 sdslen(key
->ptr
) > server
.hash_max_zipmap_value
) ||
4257 (val
->encoding
== REDIS_ENCODING_RAW
&&
4258 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
)))
4260 convertToRealHash(o
);
4263 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
4264 unsigned char *zm
= o
->ptr
;
4265 robj
*deckey
, *decval
;
4267 /* We need raw string objects to add them to the zipmap */
4268 deckey
= getDecodedObject(key
);
4269 decval
= getDecodedObject(val
);
4270 zm
= zipmapSet(zm
,deckey
->ptr
,sdslen(deckey
->ptr
),
4271 decval
->ptr
,sdslen(decval
->ptr
),NULL
);
4273 decrRefCount(deckey
);
4274 decrRefCount(decval
);
4278 key
= tryObjectEncoding(key
);
4279 val
= tryObjectEncoding(val
);
4280 dictAdd((dict
*)o
->ptr
,key
,val
);
4284 redisPanic("Unknown object type");
4289 static int rdbLoad(char *filename
) {
4292 int type
, retval
, rdbver
;
4293 int swap_all_values
= 0;
4294 redisDb
*db
= server
.db
+0;
4296 time_t expiretime
, now
= time(NULL
);
4298 fp
= fopen(filename
,"r");
4299 if (!fp
) return REDIS_ERR
;
4300 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
4302 if (memcmp(buf
,"REDIS",5) != 0) {
4304 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
4307 rdbver
= atoi(buf
+5);
4310 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
4319 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
4320 if (type
== REDIS_EXPIRETIME
) {
4321 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
4322 /* We read the time so we need to read the object type again */
4323 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
4325 if (type
== REDIS_EOF
) break;
4326 /* Handle SELECT DB opcode as a special case */
4327 if (type
== REDIS_SELECTDB
) {
4328 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
4330 if (dbid
>= (unsigned)server
.dbnum
) {
4331 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
4334 db
= server
.db
+dbid
;
4338 if ((key
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
4340 if ((val
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
4341 /* Check if the key already expired */
4342 if (expiretime
!= -1 && expiretime
< now
) {
4347 /* Add the new object in the hash table */
4348 retval
= dbAdd(db
,key
,val
);
4349 if (retval
== REDIS_ERR
) {
4350 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", key
->ptr
);
4353 /* Set the expire time if needed */
4354 if (expiretime
!= -1) setExpire(db
,key
,expiretime
);
4356 /* Handle swapping while loading big datasets when VM is on */
4358 /* If we detecter we are hopeless about fitting something in memory
4359 * we just swap every new key on disk. Directly...
4360 * Note that's important to check for this condition before resorting
4361 * to random sampling, otherwise we may try to swap already
4363 if (swap_all_values
) {
4364 dictEntry
*de
= dictFind(db
->dict
,key
->ptr
);
4366 /* de may be NULL since the key already expired */
4369 val
= dictGetEntryVal(de
);
4371 if (val
->refcount
== 1 &&
4372 (vp
= vmSwapObjectBlocking(val
)) != NULL
)
4373 dictGetEntryVal(de
) = vp
;
4380 /* Flush data on disk once 32 MB of additional RAM are used... */
4382 if ((zmalloc_used_memory() - server
.vm_max_memory
) > 1024*1024*32)
4385 /* If we have still some hope of having some value fitting memory
4386 * then we try random sampling. */
4387 if (!swap_all_values
&& server
.vm_enabled
&& force_swapout
) {
4388 while (zmalloc_used_memory() > server
.vm_max_memory
) {
4389 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
4391 if (zmalloc_used_memory() > server
.vm_max_memory
)
4392 swap_all_values
= 1; /* We are already using too much mem */
4398 eoferr
: /* unexpected end of file is handled here with a fatal exit */
4399 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
4401 return REDIS_ERR
; /* Just to avoid warning */
4404 /*================================== Shutdown =============================== */
4405 static int prepareForShutdown() {
4406 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4407 /* Kill the saving child if there is a background saving in progress.
4408 We want to avoid race conditions, for instance our saving child may
4409 overwrite the synchronous saving did by SHUTDOWN. */
4410 if (server
.bgsavechildpid
!= -1) {
4411 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4412 kill(server
.bgsavechildpid
,SIGKILL
);
4413 rdbRemoveTempFile(server
.bgsavechildpid
);
4415 if (server
.appendonly
) {
4416 /* Append only file: fsync() the AOF and exit */
4417 aof_fsync(server
.appendfd
);
4418 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4420 /* Snapshotting. Perform a SYNC SAVE and exit */
4421 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4422 if (server
.daemonize
)
4423 unlink(server
.pidfile
);
4424 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4426 /* Ooops.. error saving! The best we can do is to continue
4427 * operating. Note that if there was a background saving process,
4428 * in the next cron() Redis will be notified that the background
4429 * saving aborted, handling special stuff like slaves pending for
4430 * synchronization... */
4431 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4435 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4439 /*================================== Commands =============================== */
4441 static void authCommand(redisClient
*c
) {
4442 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
4443 c
->authenticated
= 1;
4444 addReply(c
,shared
.ok
);
4446 c
->authenticated
= 0;
4447 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
4451 static void pingCommand(redisClient
*c
) {
4452 addReply(c
,shared
.pong
);
4455 static void echoCommand(redisClient
*c
) {
4456 addReplyBulk(c
,c
->argv
[1]);
4459 /*=================================== Strings =============================== */
4461 static void setGenericCommand(redisClient
*c
, int nx
, robj
*key
, robj
*val
, robj
*expire
) {
4463 long seconds
= 0; /* initialized to avoid an harmness warning */
4466 if (getLongFromObjectOrReply(c
, expire
, &seconds
, NULL
) != REDIS_OK
)
4469 addReplySds(c
,sdsnew("-ERR invalid expire time in SETEX\r\n"));
4474 touchWatchedKey(c
->db
,key
);
4475 if (nx
) deleteIfVolatile(c
->db
,key
);
4476 retval
= dbAdd(c
->db
,key
,val
);
4477 if (retval
== REDIS_ERR
) {
4479 dbReplace(c
->db
,key
,val
);
4482 addReply(c
,shared
.czero
);
4489 removeExpire(c
->db
,key
);
4490 if (expire
) setExpire(c
->db
,key
,time(NULL
)+seconds
);
4491 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4494 static void setCommand(redisClient
*c
) {
4495 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[2],NULL
);
4498 static void setnxCommand(redisClient
*c
) {
4499 setGenericCommand(c
,1,c
->argv
[1],c
->argv
[2],NULL
);
4502 static void setexCommand(redisClient
*c
) {
4503 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[3],c
->argv
[2]);
4506 static int getGenericCommand(redisClient
*c
) {
4509 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
4512 if (o
->type
!= REDIS_STRING
) {
4513 addReply(c
,shared
.wrongtypeerr
);
4521 static void getCommand(redisClient
*c
) {
4522 getGenericCommand(c
);
4525 static void getsetCommand(redisClient
*c
) {
4526 if (getGenericCommand(c
) == REDIS_ERR
) return;
4527 dbReplace(c
->db
,c
->argv
[1],c
->argv
[2]);
4528 incrRefCount(c
->argv
[2]);
4530 removeExpire(c
->db
,c
->argv
[1]);
4533 static void mgetCommand(redisClient
*c
) {
4536 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
4537 for (j
= 1; j
< c
->argc
; j
++) {
4538 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
4540 addReply(c
,shared
.nullbulk
);
4542 if (o
->type
!= REDIS_STRING
) {
4543 addReply(c
,shared
.nullbulk
);
4551 static void msetGenericCommand(redisClient
*c
, int nx
) {
4552 int j
, busykeys
= 0;
4554 if ((c
->argc
% 2) == 0) {
4555 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
4558 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
4559 * set nothing at all if at least one already key exists. */
4561 for (j
= 1; j
< c
->argc
; j
+= 2) {
4562 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
4568 addReply(c
, shared
.czero
);
4572 for (j
= 1; j
< c
->argc
; j
+= 2) {
4573 c
->argv
[j
+1] = tryObjectEncoding(c
->argv
[j
+1]);
4574 dbReplace(c
->db
,c
->argv
[j
],c
->argv
[j
+1]);
4575 incrRefCount(c
->argv
[j
+1]);
4576 removeExpire(c
->db
,c
->argv
[j
]);
4578 server
.dirty
+= (c
->argc
-1)/2;
4579 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4582 static void msetCommand(redisClient
*c
) {
4583 msetGenericCommand(c
,0);
4586 static void msetnxCommand(redisClient
*c
) {
4587 msetGenericCommand(c
,1);
4590 static void incrDecrCommand(redisClient
*c
, long long incr
) {
4594 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4595 if (o
!= NULL
&& checkType(c
,o
,REDIS_STRING
)) return;
4596 if (getLongLongFromObjectOrReply(c
,o
,&value
,NULL
) != REDIS_OK
) return;
4599 o
= createStringObjectFromLongLong(value
);
4600 dbReplace(c
->db
,c
->argv
[1],o
);
4602 addReply(c
,shared
.colon
);
4604 addReply(c
,shared
.crlf
);
4607 static void incrCommand(redisClient
*c
) {
4608 incrDecrCommand(c
,1);
4611 static void decrCommand(redisClient
*c
) {
4612 incrDecrCommand(c
,-1);
4615 static void incrbyCommand(redisClient
*c
) {
4618 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4619 incrDecrCommand(c
,incr
);
4622 static void decrbyCommand(redisClient
*c
) {
4625 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4626 incrDecrCommand(c
,-incr
);
4629 static void appendCommand(redisClient
*c
) {
4634 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4636 /* Create the key */
4637 retval
= dbAdd(c
->db
,c
->argv
[1],c
->argv
[2]);
4638 incrRefCount(c
->argv
[2]);
4639 totlen
= stringObjectLen(c
->argv
[2]);
4641 if (o
->type
!= REDIS_STRING
) {
4642 addReply(c
,shared
.wrongtypeerr
);
4645 /* If the object is specially encoded or shared we have to make
4647 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
4648 robj
*decoded
= getDecodedObject(o
);
4650 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
4651 decrRefCount(decoded
);
4652 dbReplace(c
->db
,c
->argv
[1],o
);
4655 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
4656 o
->ptr
= sdscatlen(o
->ptr
,
4657 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
4659 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
4660 (unsigned long) c
->argv
[2]->ptr
);
4662 totlen
= sdslen(o
->ptr
);
4665 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
4668 static void substrCommand(redisClient
*c
) {
4670 long start
= atoi(c
->argv
[2]->ptr
);
4671 long end
= atoi(c
->argv
[3]->ptr
);
4672 size_t rangelen
, strlen
;
4675 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4676 checkType(c
,o
,REDIS_STRING
)) return;
4678 o
= getDecodedObject(o
);
4679 strlen
= sdslen(o
->ptr
);
4681 /* convert negative indexes */
4682 if (start
< 0) start
= strlen
+start
;
4683 if (end
< 0) end
= strlen
+end
;
4684 if (start
< 0) start
= 0;
4685 if (end
< 0) end
= 0;
4687 /* indexes sanity checks */
4688 if (start
> end
|| (size_t)start
>= strlen
) {
4689 /* Out of range start or start > end result in null reply */
4690 addReply(c
,shared
.nullbulk
);
4694 if ((size_t)end
>= strlen
) end
= strlen
-1;
4695 rangelen
= (end
-start
)+1;
4697 /* Return the result */
4698 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4699 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4700 addReplySds(c
,range
);
4701 addReply(c
,shared
.crlf
);
4705 /* ========================= Type agnostic commands ========================= */
4707 static void delCommand(redisClient
*c
) {
4710 for (j
= 1; j
< c
->argc
; j
++) {
4711 if (dbDelete(c
->db
,c
->argv
[j
])) {
4712 touchWatchedKey(c
->db
,c
->argv
[j
]);
4717 addReplyLongLong(c
,deleted
);
4720 static void existsCommand(redisClient
*c
) {
4721 expireIfNeeded(c
->db
,c
->argv
[1]);
4722 if (dbExists(c
->db
,c
->argv
[1])) {
4723 addReply(c
, shared
.cone
);
4725 addReply(c
, shared
.czero
);
4729 static void selectCommand(redisClient
*c
) {
4730 int id
= atoi(c
->argv
[1]->ptr
);
4732 if (selectDb(c
,id
) == REDIS_ERR
) {
4733 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4735 addReply(c
,shared
.ok
);
4739 static void randomkeyCommand(redisClient
*c
) {
4742 if ((key
= dbRandomKey(c
->db
)) == NULL
) {
4743 addReply(c
,shared
.nullbulk
);
4747 addReplyBulk(c
,key
);
4751 static void keysCommand(redisClient
*c
) {
4754 sds pattern
= c
->argv
[1]->ptr
;
4755 int plen
= sdslen(pattern
);
4756 unsigned long numkeys
= 0;
4757 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4759 di
= dictGetIterator(c
->db
->dict
);
4761 decrRefCount(lenobj
);
4762 while((de
= dictNext(di
)) != NULL
) {
4763 sds key
= dictGetEntryKey(de
);
4766 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4767 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4768 keyobj
= createStringObject(key
,sdslen(key
));
4769 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4770 addReplyBulk(c
,keyobj
);
4773 decrRefCount(keyobj
);
4776 dictReleaseIterator(di
);
4777 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4780 static void dbsizeCommand(redisClient
*c
) {
4782 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4785 static void lastsaveCommand(redisClient
*c
) {
4787 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4790 static void typeCommand(redisClient
*c
) {
4794 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4799 case REDIS_STRING
: type
= "+string"; break;
4800 case REDIS_LIST
: type
= "+list"; break;
4801 case REDIS_SET
: type
= "+set"; break;
4802 case REDIS_ZSET
: type
= "+zset"; break;
4803 case REDIS_HASH
: type
= "+hash"; break;
4804 default: type
= "+unknown"; break;
4807 addReplySds(c
,sdsnew(type
));
4808 addReply(c
,shared
.crlf
);
4811 static void saveCommand(redisClient
*c
) {
4812 if (server
.bgsavechildpid
!= -1) {
4813 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4816 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4817 addReply(c
,shared
.ok
);
4819 addReply(c
,shared
.err
);
4823 static void bgsaveCommand(redisClient
*c
) {
4824 if (server
.bgsavechildpid
!= -1) {
4825 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4828 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4829 char *status
= "+Background saving started\r\n";
4830 addReplySds(c
,sdsnew(status
));
4832 addReply(c
,shared
.err
);
4836 static void shutdownCommand(redisClient
*c
) {
4837 if (prepareForShutdown() == REDIS_OK
)
4839 addReplySds(c
, sdsnew("-ERR Errors trying to SHUTDOWN. Check logs.\r\n"));
4842 static void renameGenericCommand(redisClient
*c
, int nx
) {
4845 /* To use the same key as src and dst is probably an error */
4846 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4847 addReply(c
,shared
.sameobjecterr
);
4851 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4855 deleteIfVolatile(c
->db
,c
->argv
[2]);
4856 if (dbAdd(c
->db
,c
->argv
[2],o
) == REDIS_ERR
) {
4859 addReply(c
,shared
.czero
);
4862 dbReplace(c
->db
,c
->argv
[2],o
);
4864 dbDelete(c
->db
,c
->argv
[1]);
4865 touchWatchedKey(c
->db
,c
->argv
[2]);
4867 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4870 static void renameCommand(redisClient
*c
) {
4871 renameGenericCommand(c
,0);
4874 static void renamenxCommand(redisClient
*c
) {
4875 renameGenericCommand(c
,1);
4878 static void moveCommand(redisClient
*c
) {
4883 /* Obtain source and target DB pointers */
4886 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4887 addReply(c
,shared
.outofrangeerr
);
4891 selectDb(c
,srcid
); /* Back to the source DB */
4893 /* If the user is moving using as target the same
4894 * DB as the source DB it is probably an error. */
4896 addReply(c
,shared
.sameobjecterr
);
4900 /* Check if the element exists and get a reference */
4901 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4903 addReply(c
,shared
.czero
);
4907 /* Try to add the element to the target DB */
4908 deleteIfVolatile(dst
,c
->argv
[1]);
4909 if (dbAdd(dst
,c
->argv
[1],o
) == REDIS_ERR
) {
4910 addReply(c
,shared
.czero
);
4915 /* OK! key moved, free the entry in the source DB */
4916 dbDelete(src
,c
->argv
[1]);
4918 addReply(c
,shared
.cone
);
4921 /* =================================== Lists ================================ */
4924 /* Check the argument length to see if it requires us to convert the ziplist
4925 * to a real list. Only check raw-encoded objects because integer encoded
4926 * objects are never too long. */
4927 static void listTypeTryConversion(robj
*subject
, robj
*value
) {
4928 if (subject
->encoding
!= REDIS_ENCODING_ZIPLIST
) return;
4929 if (value
->encoding
== REDIS_ENCODING_RAW
&&
4930 sdslen(value
->ptr
) > server
.list_max_ziplist_value
)
4931 listTypeConvert(subject
,REDIS_ENCODING_LIST
);
4934 static void listTypePush(robj
*subject
, robj
*value
, int where
) {
4935 /* Check if we need to convert the ziplist */
4936 listTypeTryConversion(subject
,value
);
4937 if (subject
->encoding
== REDIS_ENCODING_ZIPLIST
&&
4938 ziplistLen(subject
->ptr
) >= server
.list_max_ziplist_entries
)
4939 listTypeConvert(subject
,REDIS_ENCODING_LIST
);
4941 if (subject
->encoding
== REDIS_ENCODING_ZIPLIST
) {
4942 int pos
= (where
== REDIS_HEAD
) ? ZIPLIST_HEAD
: ZIPLIST_TAIL
;
4943 value
= getDecodedObject(value
);
4944 subject
->ptr
= ziplistPush(subject
->ptr
,value
->ptr
,sdslen(value
->ptr
),pos
);
4945 decrRefCount(value
);
4946 } else if (subject
->encoding
== REDIS_ENCODING_LIST
) {
4947 if (where
== REDIS_HEAD
) {
4948 listAddNodeHead(subject
->ptr
,value
);
4950 listAddNodeTail(subject
->ptr
,value
);
4952 incrRefCount(value
);
4954 redisPanic("Unknown list encoding");
4958 static robj
*listTypePop(robj
*subject
, int where
) {
4960 if (subject
->encoding
== REDIS_ENCODING_ZIPLIST
) {
4962 unsigned char *vstr
;
4965 int pos
= (where
== REDIS_HEAD
) ? 0 : -1;
4966 p
= ziplistIndex(subject
->ptr
,pos
);
4967 if (ziplistGet(p
,&vstr
,&vlen
,&vlong
)) {
4969 value
= createStringObject((char*)vstr
,vlen
);
4971 value
= createStringObjectFromLongLong(vlong
);
4973 /* We only need to delete an element when it exists */
4974 subject
->ptr
= ziplistDelete(subject
->ptr
,&p
);
4976 } else if (subject
->encoding
== REDIS_ENCODING_LIST
) {
4977 list
*list
= subject
->ptr
;
4979 if (where
== REDIS_HEAD
) {
4980 ln
= listFirst(list
);
4982 ln
= listLast(list
);
4985 value
= listNodeValue(ln
);
4986 incrRefCount(value
);
4987 listDelNode(list
,ln
);
4990 redisPanic("Unknown list encoding");
4995 static unsigned long listTypeLength(robj
*subject
) {
4996 if (subject
->encoding
== REDIS_ENCODING_ZIPLIST
) {
4997 return ziplistLen(subject
->ptr
);
4998 } else if (subject
->encoding
== REDIS_ENCODING_LIST
) {
4999 return listLength((list
*)subject
->ptr
);
5001 redisPanic("Unknown list encoding");
5005 /* Structure to hold set iteration abstraction. */
5008 unsigned char encoding
;
5009 unsigned char direction
; /* Iteration direction */
5014 /* Structure for an entry while iterating over a list. */
5016 listTypeIterator
*li
;
5017 unsigned char *zi
; /* Entry in ziplist */
5018 listNode
*ln
; /* Entry in linked list */
5021 /* Initialize an iterator at the specified index. */
5022 static listTypeIterator
*listTypeInitIterator(robj
*subject
, int index
, unsigned char direction
) {
5023 listTypeIterator
*li
= zmalloc(sizeof(listTypeIterator
));
5024 li
->subject
= subject
;
5025 li
->encoding
= subject
->encoding
;
5026 li
->direction
= direction
;
5027 if (li
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5028 li
->zi
= ziplistIndex(subject
->ptr
,index
);
5029 } else if (li
->encoding
== REDIS_ENCODING_LIST
) {
5030 li
->ln
= listIndex(subject
->ptr
,index
);
5032 redisPanic("Unknown list encoding");
5037 /* Clean up the iterator. */
5038 static void listTypeReleaseIterator(listTypeIterator
*li
) {
5042 /* Stores pointer to current the entry in the provided entry structure
5043 * and advances the position of the iterator. Returns 1 when the current
5044 * entry is in fact an entry, 0 otherwise. */
5045 static int listTypeNext(listTypeIterator
*li
, listTypeEntry
*entry
) {
5046 /* Protect from converting when iterating */
5047 redisAssert(li
->subject
->encoding
== li
->encoding
);
5050 if (li
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5052 if (entry
->zi
!= NULL
) {
5053 if (li
->direction
== REDIS_TAIL
)
5054 li
->zi
= ziplistNext(li
->subject
->ptr
,li
->zi
);
5056 li
->zi
= ziplistPrev(li
->subject
->ptr
,li
->zi
);
5059 } else if (li
->encoding
== REDIS_ENCODING_LIST
) {
5061 if (entry
->ln
!= NULL
) {
5062 if (li
->direction
== REDIS_TAIL
)
5063 li
->ln
= li
->ln
->next
;
5065 li
->ln
= li
->ln
->prev
;
5069 redisPanic("Unknown list encoding");
5074 /* Return entry or NULL at the current position of the iterator. */
5075 static robj
*listTypeGet(listTypeEntry
*entry
) {
5076 listTypeIterator
*li
= entry
->li
;
5078 if (li
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5079 unsigned char *vstr
;
5082 redisAssert(entry
->zi
!= NULL
);
5083 if (ziplistGet(entry
->zi
,&vstr
,&vlen
,&vlong
)) {
5085 value
= createStringObject((char*)vstr
,vlen
);
5087 value
= createStringObjectFromLongLong(vlong
);
5090 } else if (li
->encoding
== REDIS_ENCODING_LIST
) {
5091 redisAssert(entry
->ln
!= NULL
);
5092 value
= listNodeValue(entry
->ln
);
5093 incrRefCount(value
);
5095 redisPanic("Unknown list encoding");
5100 static void listTypeInsert(listTypeEntry
*entry
, robj
*value
, int where
) {
5101 robj
*subject
= entry
->li
->subject
;
5102 if (entry
->li
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5103 value
= getDecodedObject(value
);
5104 if (where
== REDIS_TAIL
) {
5105 unsigned char *next
= ziplistNext(subject
->ptr
,entry
->zi
);
5107 /* When we insert after the current element, but the current element
5108 * is the tail of the list, we need to do a push. */
5110 subject
->ptr
= ziplistPush(subject
->ptr
,value
->ptr
,sdslen(value
->ptr
),REDIS_TAIL
);
5112 subject
->ptr
= ziplistInsert(subject
->ptr
,next
,value
->ptr
,sdslen(value
->ptr
));
5115 subject
->ptr
= ziplistInsert(subject
->ptr
,entry
->zi
,value
->ptr
,sdslen(value
->ptr
));
5117 decrRefCount(value
);
5118 } else if (entry
->li
->encoding
== REDIS_ENCODING_LIST
) {
5119 if (where
== REDIS_TAIL
) {
5120 listInsertNode(subject
->ptr
,entry
->ln
,value
,AL_START_TAIL
);
5122 listInsertNode(subject
->ptr
,entry
->ln
,value
,AL_START_HEAD
);
5124 incrRefCount(value
);
5126 redisPanic("Unknown list encoding");
5130 /* Compare the given object with the entry at the current position. */
5131 static int listTypeEqual(listTypeEntry
*entry
, robj
*o
) {
5132 listTypeIterator
*li
= entry
->li
;
5133 if (li
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5134 redisAssert(o
->encoding
== REDIS_ENCODING_RAW
);
5135 return ziplistCompare(entry
->zi
,o
->ptr
,sdslen(o
->ptr
));
5136 } else if (li
->encoding
== REDIS_ENCODING_LIST
) {
5137 return equalStringObjects(o
,listNodeValue(entry
->ln
));
5139 redisPanic("Unknown list encoding");
5143 /* Delete the element pointed to. */
5144 static void listTypeDelete(listTypeEntry
*entry
) {
5145 listTypeIterator
*li
= entry
->li
;
5146 if (li
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5147 unsigned char *p
= entry
->zi
;
5148 li
->subject
->ptr
= ziplistDelete(li
->subject
->ptr
,&p
);
5150 /* Update position of the iterator depending on the direction */
5151 if (li
->direction
== REDIS_TAIL
)
5154 li
->zi
= ziplistPrev(li
->subject
->ptr
,p
);
5155 } else if (entry
->li
->encoding
== REDIS_ENCODING_LIST
) {
5157 if (li
->direction
== REDIS_TAIL
)
5158 next
= entry
->ln
->next
;
5160 next
= entry
->ln
->prev
;
5161 listDelNode(li
->subject
->ptr
,entry
->ln
);
5164 redisPanic("Unknown list encoding");
5168 static void listTypeConvert(robj
*subject
, int enc
) {
5169 listTypeIterator
*li
;
5170 listTypeEntry entry
;
5171 redisAssert(subject
->type
== REDIS_LIST
);
5173 if (enc
== REDIS_ENCODING_LIST
) {
5174 list
*l
= listCreate();
5175 listSetFreeMethod(l
,decrRefCount
);
5177 /* listTypeGet returns a robj with incremented refcount */
5178 li
= listTypeInitIterator(subject
,0,REDIS_TAIL
);
5179 while (listTypeNext(li
,&entry
)) listAddNodeTail(l
,listTypeGet(&entry
));
5180 listTypeReleaseIterator(li
);
5182 subject
->encoding
= REDIS_ENCODING_LIST
;
5183 zfree(subject
->ptr
);
5186 redisPanic("Unsupported list conversion");
5190 static void pushGenericCommand(redisClient
*c
, int where
) {
5191 robj
*lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5193 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
5194 addReply(c
,shared
.cone
);
5197 lobj
= createZiplistObject();
5198 dbAdd(c
->db
,c
->argv
[1],lobj
);
5200 if (lobj
->type
!= REDIS_LIST
) {
5201 addReply(c
,shared
.wrongtypeerr
);
5204 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
5205 addReply(c
,shared
.cone
);
5209 listTypePush(lobj
,c
->argv
[2],where
);
5210 addReplyLongLong(c
,listTypeLength(lobj
));
5214 static void lpushCommand(redisClient
*c
) {
5215 pushGenericCommand(c
,REDIS_HEAD
);
5218 static void rpushCommand(redisClient
*c
) {
5219 pushGenericCommand(c
,REDIS_TAIL
);
5222 static void pushxGenericCommand(redisClient
*c
, robj
*refval
, robj
*val
, int where
) {
5224 listTypeIterator
*iter
;
5225 listTypeEntry entry
;
5228 if ((subject
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5229 checkType(c
,subject
,REDIS_LIST
)) return;
5231 if (refval
!= NULL
) {
5232 /* Note: we expect refval to be string-encoded because it is *not* the
5233 * last argument of the multi-bulk LINSERT. */
5234 redisAssert(refval
->encoding
== REDIS_ENCODING_RAW
);
5236 /* We're not sure if this value can be inserted yet, but we cannot
5237 * convert the list inside the iterator. We don't want to loop over
5238 * the list twice (once to see if the value can be inserted and once
5239 * to do the actual insert), so we assume this value can be inserted
5240 * and convert the ziplist to a regular list if necessary. */
5241 listTypeTryConversion(subject
,val
);
5243 /* Seek refval from head to tail */
5244 iter
= listTypeInitIterator(subject
,0,REDIS_TAIL
);
5245 while (listTypeNext(iter
,&entry
)) {
5246 if (listTypeEqual(&entry
,refval
)) {
5247 listTypeInsert(&entry
,val
,where
);
5252 listTypeReleaseIterator(iter
);
5255 /* Check if the length exceeds the ziplist length threshold. */
5256 if (subject
->encoding
== REDIS_ENCODING_ZIPLIST
&&
5257 ziplistLen(subject
->ptr
) > server
.list_max_ziplist_entries
)
5258 listTypeConvert(subject
,REDIS_ENCODING_LIST
);
5261 /* Notify client of a failed insert */
5262 addReply(c
,shared
.cnegone
);
5266 listTypePush(subject
,val
,where
);
5270 addReplyUlong(c
,listTypeLength(subject
));
5273 static void lpushxCommand(redisClient
*c
) {
5274 pushxGenericCommand(c
,NULL
,c
->argv
[2],REDIS_HEAD
);
5277 static void rpushxCommand(redisClient
*c
) {
5278 pushxGenericCommand(c
,NULL
,c
->argv
[2],REDIS_TAIL
);
5281 static void linsertCommand(redisClient
*c
) {
5282 if (strcasecmp(c
->argv
[2]->ptr
,"after") == 0) {
5283 pushxGenericCommand(c
,c
->argv
[3],c
->argv
[4],REDIS_TAIL
);
5284 } else if (strcasecmp(c
->argv
[2]->ptr
,"before") == 0) {
5285 pushxGenericCommand(c
,c
->argv
[3],c
->argv
[4],REDIS_HEAD
);
5287 addReply(c
,shared
.syntaxerr
);
5291 static void llenCommand(redisClient
*c
) {
5292 robj
*o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
);
5293 if (o
== NULL
|| checkType(c
,o
,REDIS_LIST
)) return;
5294 addReplyUlong(c
,listTypeLength(o
));
5297 static void lindexCommand(redisClient
*c
) {
5298 robj
*o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
);
5299 if (o
== NULL
|| checkType(c
,o
,REDIS_LIST
)) return;
5300 int index
= atoi(c
->argv
[2]->ptr
);
5303 if (o
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5305 unsigned char *vstr
;
5308 p
= ziplistIndex(o
->ptr
,index
);
5309 if (ziplistGet(p
,&vstr
,&vlen
,&vlong
)) {
5311 value
= createStringObject((char*)vstr
,vlen
);
5313 value
= createStringObjectFromLongLong(vlong
);
5315 addReplyBulk(c
,value
);
5316 decrRefCount(value
);
5318 addReply(c
,shared
.nullbulk
);
5320 } else if (o
->encoding
== REDIS_ENCODING_LIST
) {
5321 listNode
*ln
= listIndex(o
->ptr
,index
);
5323 value
= listNodeValue(ln
);
5324 addReplyBulk(c
,value
);
5326 addReply(c
,shared
.nullbulk
);
5329 redisPanic("Unknown list encoding");
5333 static void lsetCommand(redisClient
*c
) {
5334 robj
*o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
);
5335 if (o
== NULL
|| checkType(c
,o
,REDIS_LIST
)) return;
5336 int index
= atoi(c
->argv
[2]->ptr
);
5337 robj
*value
= c
->argv
[3];
5339 listTypeTryConversion(o
,value
);
5340 if (o
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5341 unsigned char *p
, *zl
= o
->ptr
;
5342 p
= ziplistIndex(zl
,index
);
5344 addReply(c
,shared
.outofrangeerr
);
5346 o
->ptr
= ziplistDelete(o
->ptr
,&p
);
5347 value
= getDecodedObject(value
);
5348 o
->ptr
= ziplistInsert(o
->ptr
,p
,value
->ptr
,sdslen(value
->ptr
));
5349 decrRefCount(value
);
5350 addReply(c
,shared
.ok
);
5353 } else if (o
->encoding
== REDIS_ENCODING_LIST
) {
5354 listNode
*ln
= listIndex(o
->ptr
,index
);
5356 addReply(c
,shared
.outofrangeerr
);
5358 decrRefCount((robj
*)listNodeValue(ln
));
5359 listNodeValue(ln
) = value
;
5360 incrRefCount(value
);
5361 addReply(c
,shared
.ok
);
5365 redisPanic("Unknown list encoding");
5369 static void popGenericCommand(redisClient
*c
, int where
) {
5370 robj
*o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
);
5371 if (o
== NULL
|| checkType(c
,o
,REDIS_LIST
)) return;
5373 robj
*value
= listTypePop(o
,where
);
5374 if (value
== NULL
) {
5375 addReply(c
,shared
.nullbulk
);
5377 addReplyBulk(c
,value
);
5378 decrRefCount(value
);
5379 if (listTypeLength(o
) == 0) dbDelete(c
->db
,c
->argv
[1]);
5384 static void lpopCommand(redisClient
*c
) {
5385 popGenericCommand(c
,REDIS_HEAD
);
5388 static void rpopCommand(redisClient
*c
) {
5389 popGenericCommand(c
,REDIS_TAIL
);
5392 static void lrangeCommand(redisClient
*c
) {
5394 int start
= atoi(c
->argv
[2]->ptr
);
5395 int end
= atoi(c
->argv
[3]->ptr
);
5398 listTypeEntry entry
;
5400 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
5401 || checkType(c
,o
,REDIS_LIST
)) return;
5402 llen
= listTypeLength(o
);
5404 /* convert negative indexes */
5405 if (start
< 0) start
= llen
+start
;
5406 if (end
< 0) end
= llen
+end
;
5407 if (start
< 0) start
= 0;
5408 if (end
< 0) end
= 0;
5410 /* indexes sanity checks */
5411 if (start
> end
|| start
>= llen
) {
5412 /* Out of range start or start > end result in empty list */
5413 addReply(c
,shared
.emptymultibulk
);
5416 if (end
>= llen
) end
= llen
-1;
5417 rangelen
= (end
-start
)+1;
5419 /* Return the result in form of a multi-bulk reply */
5420 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
5421 listTypeIterator
*li
= listTypeInitIterator(o
,start
,REDIS_TAIL
);
5422 for (j
= 0; j
< rangelen
; j
++) {
5423 redisAssert(listTypeNext(li
,&entry
));
5424 value
= listTypeGet(&entry
);
5425 addReplyBulk(c
,value
);
5426 decrRefCount(value
);
5428 listTypeReleaseIterator(li
);
5431 static void ltrimCommand(redisClient
*c
) {
5433 int start
= atoi(c
->argv
[2]->ptr
);
5434 int end
= atoi(c
->argv
[3]->ptr
);
5436 int j
, ltrim
, rtrim
;
5440 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
5441 checkType(c
,o
,REDIS_LIST
)) return;
5442 llen
= listTypeLength(o
);
5444 /* convert negative indexes */
5445 if (start
< 0) start
= llen
+start
;
5446 if (end
< 0) end
= llen
+end
;
5447 if (start
< 0) start
= 0;
5448 if (end
< 0) end
= 0;
5450 /* indexes sanity checks */
5451 if (start
> end
|| start
>= llen
) {
5452 /* Out of range start or start > end result in empty list */
5456 if (end
>= llen
) end
= llen
-1;
5461 /* Remove list elements to perform the trim */
5462 if (o
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5463 o
->ptr
= ziplistDeleteRange(o
->ptr
,0,ltrim
);
5464 o
->ptr
= ziplistDeleteRange(o
->ptr
,-rtrim
,rtrim
);
5465 } else if (o
->encoding
== REDIS_ENCODING_LIST
) {
5467 for (j
= 0; j
< ltrim
; j
++) {
5468 ln
= listFirst(list
);
5469 listDelNode(list
,ln
);
5471 for (j
= 0; j
< rtrim
; j
++) {
5472 ln
= listLast(list
);
5473 listDelNode(list
,ln
);
5476 redisPanic("Unknown list encoding");
5478 if (listTypeLength(o
) == 0) dbDelete(c
->db
,c
->argv
[1]);
5480 addReply(c
,shared
.ok
);
5483 static void lremCommand(redisClient
*c
) {
5484 robj
*subject
, *obj
= c
->argv
[3];
5485 int toremove
= atoi(c
->argv
[2]->ptr
);
5487 listTypeEntry entry
;
5489 subject
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
);
5490 if (subject
== NULL
|| checkType(c
,subject
,REDIS_LIST
)) return;
5492 /* Make sure obj is raw when we're dealing with a ziplist */
5493 if (subject
->encoding
== REDIS_ENCODING_ZIPLIST
)
5494 obj
= getDecodedObject(obj
);
5496 listTypeIterator
*li
;
5498 toremove
= -toremove
;
5499 li
= listTypeInitIterator(subject
,-1,REDIS_HEAD
);
5501 li
= listTypeInitIterator(subject
,0,REDIS_TAIL
);
5504 while (listTypeNext(li
,&entry
)) {
5505 if (listTypeEqual(&entry
,obj
)) {
5506 listTypeDelete(&entry
);
5509 if (toremove
&& removed
== toremove
) break;
5512 listTypeReleaseIterator(li
);
5514 /* Clean up raw encoded object */
5515 if (subject
->encoding
== REDIS_ENCODING_ZIPLIST
)
5518 if (listTypeLength(subject
) == 0) dbDelete(c
->db
,c
->argv
[1]);
5519 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
5522 /* This is the semantic of this command:
5523 * RPOPLPUSH srclist dstlist:
5524 * IF LLEN(srclist) > 0
5525 * element = RPOP srclist
5526 * LPUSH dstlist element
5533 * The idea is to be able to get an element from a list in a reliable way
5534 * since the element is not just returned but pushed against another list
5535 * as well. This command was originally proposed by Ezra Zygmuntowicz.
5537 static void rpoplpushcommand(redisClient
*c
) {
5539 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5540 checkType(c
,sobj
,REDIS_LIST
)) return;
5542 if (listTypeLength(sobj
) == 0) {
5543 addReply(c
,shared
.nullbulk
);
5545 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
5546 if (dobj
&& checkType(c
,dobj
,REDIS_LIST
)) return;
5547 value
= listTypePop(sobj
,REDIS_TAIL
);
5549 /* Add the element to the target list (unless it's directly
5550 * passed to some BLPOP-ing client */
5551 if (!handleClientsWaitingListPush(c
,c
->argv
[2],value
)) {
5552 /* Create the list if the key does not exist */
5554 dobj
= createZiplistObject();
5555 dbAdd(c
->db
,c
->argv
[2],dobj
);
5557 listTypePush(dobj
,value
,REDIS_HEAD
);
5560 /* Send the element to the client as reply as well */
5561 addReplyBulk(c
,value
);
5563 /* listTypePop returns an object with its refcount incremented */
5564 decrRefCount(value
);
5566 /* Delete the source list when it is empty */
5567 if (listTypeLength(sobj
) == 0) dbDelete(c
->db
,c
->argv
[1]);
5572 /* ==================================== Sets ================================ */
5574 static void saddCommand(redisClient
*c
) {
5577 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5579 set
= createSetObject();
5580 dbAdd(c
->db
,c
->argv
[1],set
);
5582 if (set
->type
!= REDIS_SET
) {
5583 addReply(c
,shared
.wrongtypeerr
);
5587 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
5588 incrRefCount(c
->argv
[2]);
5590 addReply(c
,shared
.cone
);
5592 addReply(c
,shared
.czero
);
5596 static void sremCommand(redisClient
*c
) {
5599 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5600 checkType(c
,set
,REDIS_SET
)) return;
5602 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
5604 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
5605 if (dictSize((dict
*)set
->ptr
) == 0) dbDelete(c
->db
,c
->argv
[1]);
5606 addReply(c
,shared
.cone
);
5608 addReply(c
,shared
.czero
);
5612 static void smoveCommand(redisClient
*c
) {
5613 robj
*srcset
, *dstset
;
5615 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5616 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
5618 /* If the source key does not exist return 0, if it's of the wrong type
5620 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
5621 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
5624 /* Error if the destination key is not a set as well */
5625 if (dstset
&& dstset
->type
!= REDIS_SET
) {
5626 addReply(c
,shared
.wrongtypeerr
);
5629 /* Remove the element from the source set */
5630 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
5631 /* Key not found in the src set! return zero */
5632 addReply(c
,shared
.czero
);
5635 if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset
!= dstset
)
5636 dbDelete(c
->db
,c
->argv
[1]);
5638 /* Add the element to the destination set */
5640 dstset
= createSetObject();
5641 dbAdd(c
->db
,c
->argv
[2],dstset
);
5643 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
5644 incrRefCount(c
->argv
[3]);
5645 addReply(c
,shared
.cone
);
5648 static void sismemberCommand(redisClient
*c
) {
5651 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5652 checkType(c
,set
,REDIS_SET
)) return;
5654 if (dictFind(set
->ptr
,c
->argv
[2]))
5655 addReply(c
,shared
.cone
);
5657 addReply(c
,shared
.czero
);
5660 static void scardCommand(redisClient
*c
) {
5664 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5665 checkType(c
,o
,REDIS_SET
)) return;
5668 addReplyUlong(c
,dictSize(s
));
5671 static void spopCommand(redisClient
*c
) {
5675 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5676 checkType(c
,set
,REDIS_SET
)) return;
5678 de
= dictGetRandomKey(set
->ptr
);
5680 addReply(c
,shared
.nullbulk
);
5682 robj
*ele
= dictGetEntryKey(de
);
5684 addReplyBulk(c
,ele
);
5685 dictDelete(set
->ptr
,ele
);
5686 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
5687 if (dictSize((dict
*)set
->ptr
) == 0) dbDelete(c
->db
,c
->argv
[1]);
5692 static void srandmemberCommand(redisClient
*c
) {
5696 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5697 checkType(c
,set
,REDIS_SET
)) return;
5699 de
= dictGetRandomKey(set
->ptr
);
5701 addReply(c
,shared
.nullbulk
);
5703 robj
*ele
= dictGetEntryKey(de
);
5705 addReplyBulk(c
,ele
);
5709 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
5710 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
5712 return dictSize(*d1
)-dictSize(*d2
);
5715 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
5716 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
5719 robj
*lenobj
= NULL
, *dstset
= NULL
;
5720 unsigned long j
, cardinality
= 0;
5722 for (j
= 0; j
< setsnum
; j
++) {
5726 lookupKeyWrite(c
->db
,setskeys
[j
]) :
5727 lookupKeyRead(c
->db
,setskeys
[j
]);
5731 if (dbDelete(c
->db
,dstkey
))
5733 addReply(c
,shared
.czero
);
5735 addReply(c
,shared
.emptymultibulk
);
5739 if (setobj
->type
!= REDIS_SET
) {
5741 addReply(c
,shared
.wrongtypeerr
);
5744 dv
[j
] = setobj
->ptr
;
5746 /* Sort sets from the smallest to largest, this will improve our
5747 * algorithm's performace */
5748 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
5750 /* The first thing we should output is the total number of elements...
5751 * since this is a multi-bulk write, but at this stage we don't know
5752 * the intersection set size, so we use a trick, append an empty object
5753 * to the output list and save the pointer to later modify it with the
5756 lenobj
= createObject(REDIS_STRING
,NULL
);
5758 decrRefCount(lenobj
);
5760 /* If we have a target key where to store the resulting set
5761 * create this key with an empty set inside */
5762 dstset
= createSetObject();
5765 /* Iterate all the elements of the first (smallest) set, and test
5766 * the element against all the other sets, if at least one set does
5767 * not include the element it is discarded */
5768 di
= dictGetIterator(dv
[0]);
5770 while((de
= dictNext(di
)) != NULL
) {
5773 for (j
= 1; j
< setsnum
; j
++)
5774 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
5776 continue; /* at least one set does not contain the member */
5777 ele
= dictGetEntryKey(de
);
5779 addReplyBulk(c
,ele
);
5782 dictAdd(dstset
->ptr
,ele
,NULL
);
5786 dictReleaseIterator(di
);
5789 /* Store the resulting set into the target, if the intersection
5790 * is not an empty set. */
5791 dbDelete(c
->db
,dstkey
);
5792 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5793 dbAdd(c
->db
,dstkey
,dstset
);
5794 addReplyLongLong(c
,dictSize((dict
*)dstset
->ptr
));
5796 decrRefCount(dstset
);
5797 addReply(c
,shared
.czero
);
5801 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
5806 static void sinterCommand(redisClient
*c
) {
5807 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
5810 static void sinterstoreCommand(redisClient
*c
) {
5811 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
5814 #define REDIS_OP_UNION 0
5815 #define REDIS_OP_DIFF 1
5816 #define REDIS_OP_INTER 2
5818 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
5819 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
5822 robj
*dstset
= NULL
;
5823 int j
, cardinality
= 0;
5825 for (j
= 0; j
< setsnum
; j
++) {
5829 lookupKeyWrite(c
->db
,setskeys
[j
]) :
5830 lookupKeyRead(c
->db
,setskeys
[j
]);
5835 if (setobj
->type
!= REDIS_SET
) {
5837 addReply(c
,shared
.wrongtypeerr
);
5840 dv
[j
] = setobj
->ptr
;
5843 /* We need a temp set object to store our union. If the dstkey
5844 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
5845 * this set object will be the resulting object to set into the target key*/
5846 dstset
= createSetObject();
5848 /* Iterate all the elements of all the sets, add every element a single
5849 * time to the result set */
5850 for (j
= 0; j
< setsnum
; j
++) {
5851 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
5852 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
5854 di
= dictGetIterator(dv
[j
]);
5856 while((de
= dictNext(di
)) != NULL
) {
5859 /* dictAdd will not add the same element multiple times */
5860 ele
= dictGetEntryKey(de
);
5861 if (op
== REDIS_OP_UNION
|| j
== 0) {
5862 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
5866 } else if (op
== REDIS_OP_DIFF
) {
5867 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
5872 dictReleaseIterator(di
);
5874 /* result set is empty? Exit asap. */
5875 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
5878 /* Output the content of the resulting set, if not in STORE mode */
5880 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
5881 di
= dictGetIterator(dstset
->ptr
);
5882 while((de
= dictNext(di
)) != NULL
) {
5885 ele
= dictGetEntryKey(de
);
5886 addReplyBulk(c
,ele
);
5888 dictReleaseIterator(di
);
5889 decrRefCount(dstset
);
5891 /* If we have a target key where to store the resulting set
5892 * create this key with the result set inside */
5893 dbDelete(c
->db
,dstkey
);
5894 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5895 dbAdd(c
->db
,dstkey
,dstset
);
5896 addReplyLongLong(c
,dictSize((dict
*)dstset
->ptr
));
5898 decrRefCount(dstset
);
5899 addReply(c
,shared
.czero
);
5906 static void sunionCommand(redisClient
*c
) {
5907 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
5910 static void sunionstoreCommand(redisClient
*c
) {
5911 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
5914 static void sdiffCommand(redisClient
*c
) {
5915 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
5918 static void sdiffstoreCommand(redisClient
*c
) {
5919 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
5922 /* ==================================== ZSets =============================== */
5924 /* ZSETs are ordered sets using two data structures to hold the same elements
5925 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
5928 * The elements are added to an hash table mapping Redis objects to scores.
5929 * At the same time the elements are added to a skip list mapping scores
5930 * to Redis objects (so objects are sorted by scores in this "view"). */
5932 /* This skiplist implementation is almost a C translation of the original
5933 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
5934 * Alternative to Balanced Trees", modified in three ways:
5935 * a) this implementation allows for repeated values.
5936 * b) the comparison is not just by key (our 'score') but by satellite data.
5937 * c) there is a back pointer, so it's a doubly linked list with the back
5938 * pointers being only at "level 1". This allows to traverse the list
5939 * from tail to head, useful for ZREVRANGE. */
5941 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
5942 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
5944 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
5946 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
5954 static zskiplist
*zslCreate(void) {
5958 zsl
= zmalloc(sizeof(*zsl
));
5961 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
5962 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
5963 zsl
->header
->forward
[j
] = NULL
;
5965 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
5966 if (j
< ZSKIPLIST_MAXLEVEL
-1)
5967 zsl
->header
->span
[j
] = 0;
5969 zsl
->header
->backward
= NULL
;
5974 static void zslFreeNode(zskiplistNode
*node
) {
5975 decrRefCount(node
->obj
);
5976 zfree(node
->forward
);
5981 static void zslFree(zskiplist
*zsl
) {
5982 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
5984 zfree(zsl
->header
->forward
);
5985 zfree(zsl
->header
->span
);
5988 next
= node
->forward
[0];
5995 static int zslRandomLevel(void) {
5997 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
5999 return (level
<ZSKIPLIST_MAXLEVEL
) ? level
: ZSKIPLIST_MAXLEVEL
;
6002 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
6003 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
6004 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
6008 for (i
= zsl
->level
-1; i
>= 0; i
--) {
6009 /* store rank that is crossed to reach the insert position */
6010 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
6012 while (x
->forward
[i
] &&
6013 (x
->forward
[i
]->score
< score
||
6014 (x
->forward
[i
]->score
== score
&&
6015 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
6016 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
6021 /* we assume the key is not already inside, since we allow duplicated
6022 * scores, and the re-insertion of score and redis object should never
6023 * happpen since the caller of zslInsert() should test in the hash table
6024 * if the element is already inside or not. */
6025 level
= zslRandomLevel();
6026 if (level
> zsl
->level
) {
6027 for (i
= zsl
->level
; i
< level
; i
++) {
6029 update
[i
] = zsl
->header
;
6030 update
[i
]->span
[i
-1] = zsl
->length
;
6034 x
= zslCreateNode(level
,score
,obj
);
6035 for (i
= 0; i
< level
; i
++) {
6036 x
->forward
[i
] = update
[i
]->forward
[i
];
6037 update
[i
]->forward
[i
] = x
;
6039 /* update span covered by update[i] as x is inserted here */
6041 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
6042 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
6046 /* increment span for untouched levels */
6047 for (i
= level
; i
< zsl
->level
; i
++) {
6048 update
[i
]->span
[i
-1]++;
6051 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
6053 x
->forward
[0]->backward
= x
;
6059 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
6060 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
6062 for (i
= 0; i
< zsl
->level
; i
++) {
6063 if (update
[i
]->forward
[i
] == x
) {
6065 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
6067 update
[i
]->forward
[i
] = x
->forward
[i
];
6069 /* invariant: i > 0, because update[0]->forward[0]
6070 * is always equal to x */
6071 update
[i
]->span
[i
-1] -= 1;
6074 if (x
->forward
[0]) {
6075 x
->forward
[0]->backward
= x
->backward
;
6077 zsl
->tail
= x
->backward
;
6079 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
6084 /* Delete an element with matching score/object from the skiplist. */
6085 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
6086 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
6090 for (i
= zsl
->level
-1; i
>= 0; i
--) {
6091 while (x
->forward
[i
] &&
6092 (x
->forward
[i
]->score
< score
||
6093 (x
->forward
[i
]->score
== score
&&
6094 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
6098 /* We may have multiple elements with the same score, what we need
6099 * is to find the element with both the right score and object. */
6101 if (x
&& score
== x
->score
&& equalStringObjects(x
->obj
,obj
)) {
6102 zslDeleteNode(zsl
, x
, update
);
6106 return 0; /* not found */
6108 return 0; /* not found */
6111 /* Delete all the elements with score between min and max from the skiplist.
6112 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
6113 * Note that this function takes the reference to the hash table view of the
6114 * sorted set, in order to remove the elements from the hash table too. */
6115 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
6116 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
6117 unsigned long removed
= 0;
6121 for (i
= zsl
->level
-1; i
>= 0; i
--) {
6122 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
6126 /* We may have multiple elements with the same score, what we need
6127 * is to find the element with both the right score and object. */
6129 while (x
&& x
->score
<= max
) {
6130 zskiplistNode
*next
= x
->forward
[0];
6131 zslDeleteNode(zsl
, x
, update
);
6132 dictDelete(dict
,x
->obj
);
6137 return removed
; /* not found */
6140 /* Delete all the elements with rank between start and end from the skiplist.
6141 * Start and end are inclusive. Note that start and end need to be 1-based */
6142 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
6143 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
6144 unsigned long traversed
= 0, removed
= 0;
6148 for (i
= zsl
->level
-1; i
>= 0; i
--) {
6149 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
6150 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
6158 while (x
&& traversed
<= end
) {
6159 zskiplistNode
*next
= x
->forward
[0];
6160 zslDeleteNode(zsl
, x
, update
);
6161 dictDelete(dict
,x
->obj
);
6170 /* Find the first node having a score equal or greater than the specified one.
6171 * Returns NULL if there is no match. */
6172 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
6177 for (i
= zsl
->level
-1; i
>= 0; i
--) {
6178 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
6181 /* We may have multiple elements with the same score, what we need
6182 * is to find the element with both the right score and object. */
6183 return x
->forward
[0];
6186 /* Find the rank for an element by both score and key.
6187 * Returns 0 when the element cannot be found, rank otherwise.
6188 * Note that the rank is 1-based due to the span of zsl->header to the
6190 static unsigned long zslistTypeGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
6192 unsigned long rank
= 0;
6196 for (i
= zsl
->level
-1; i
>= 0; i
--) {
6197 while (x
->forward
[i
] &&
6198 (x
->forward
[i
]->score
< score
||
6199 (x
->forward
[i
]->score
== score
&&
6200 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
6201 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
6205 /* x might be equal to zsl->header, so test if obj is non-NULL */
6206 if (x
->obj
&& equalStringObjects(x
->obj
,o
)) {
6213 /* Finds an element by its rank. The rank argument needs to be 1-based. */
6214 zskiplistNode
* zslistTypeGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
6216 unsigned long traversed
= 0;
6220 for (i
= zsl
->level
-1; i
>= 0; i
--) {
6221 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
6223 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
6226 if (traversed
== rank
) {
6233 /* The actual Z-commands implementations */
6235 /* This generic command implements both ZADD and ZINCRBY.
6236 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
6237 * the increment if the operation is a ZINCRBY (doincrement == 1). */
6238 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
6243 if (isnan(scoreval
)) {
6244 addReplySds(c
,sdsnew("-ERR provide score is Not A Number (nan)\r\n"));
6248 zsetobj
= lookupKeyWrite(c
->db
,key
);
6249 if (zsetobj
== NULL
) {
6250 zsetobj
= createZsetObject();
6251 dbAdd(c
->db
,key
,zsetobj
);
6253 if (zsetobj
->type
!= REDIS_ZSET
) {
6254 addReply(c
,shared
.wrongtypeerr
);
6260 /* Ok now since we implement both ZADD and ZINCRBY here the code
6261 * needs to handle the two different conditions. It's all about setting
6262 * '*score', that is, the new score to set, to the right value. */
6263 score
= zmalloc(sizeof(double));
6267 /* Read the old score. If the element was not present starts from 0 */
6268 de
= dictFind(zs
->dict
,ele
);
6270 double *oldscore
= dictGetEntryVal(de
);
6271 *score
= *oldscore
+ scoreval
;
6275 if (isnan(*score
)) {
6277 sdsnew("-ERR resulting score is Not A Number (nan)\r\n"));
6279 /* Note that we don't need to check if the zset may be empty and
6280 * should be removed here, as we can only obtain Nan as score if
6281 * there was already an element in the sorted set. */
6288 /* What follows is a simple remove and re-insert operation that is common
6289 * to both ZADD and ZINCRBY... */
6290 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
6291 /* case 1: New element */
6292 incrRefCount(ele
); /* added to hash */
6293 zslInsert(zs
->zsl
,*score
,ele
);
6294 incrRefCount(ele
); /* added to skiplist */
6297 addReplyDouble(c
,*score
);
6299 addReply(c
,shared
.cone
);
6304 /* case 2: Score update operation */
6305 de
= dictFind(zs
->dict
,ele
);
6306 redisAssert(de
!= NULL
);
6307 oldscore
= dictGetEntryVal(de
);
6308 if (*score
!= *oldscore
) {
6311 /* Remove and insert the element in the skip list with new score */
6312 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
6313 redisAssert(deleted
!= 0);
6314 zslInsert(zs
->zsl
,*score
,ele
);
6316 /* Update the score in the hash table */
6317 dictReplace(zs
->dict
,ele
,score
);
6323 addReplyDouble(c
,*score
);
6325 addReply(c
,shared
.czero
);
6329 static void zaddCommand(redisClient
*c
) {
6332 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
6333 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
6336 static void zincrbyCommand(redisClient
*c
) {
6339 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
6340 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
6343 static void zremCommand(redisClient
*c
) {
6350 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6351 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
6354 de
= dictFind(zs
->dict
,c
->argv
[2]);
6356 addReply(c
,shared
.czero
);
6359 /* Delete from the skiplist */
6360 oldscore
= dictGetEntryVal(de
);
6361 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
6362 redisAssert(deleted
!= 0);
6364 /* Delete from the hash table */
6365 dictDelete(zs
->dict
,c
->argv
[2]);
6366 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
6367 if (dictSize(zs
->dict
) == 0) dbDelete(c
->db
,c
->argv
[1]);
6369 addReply(c
,shared
.cone
);
6372 static void zremrangebyscoreCommand(redisClient
*c
) {
6379 if ((getDoubleFromObjectOrReply(c
, c
->argv
[2], &min
, NULL
) != REDIS_OK
) ||
6380 (getDoubleFromObjectOrReply(c
, c
->argv
[3], &max
, NULL
) != REDIS_OK
)) return;
6382 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6383 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
6386 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
6387 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
6388 if (dictSize(zs
->dict
) == 0) dbDelete(c
->db
,c
->argv
[1]);
6389 server
.dirty
+= deleted
;
6390 addReplyLongLong(c
,deleted
);
6393 static void zremrangebyrankCommand(redisClient
*c
) {
6401 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
6402 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
6404 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6405 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
6407 llen
= zs
->zsl
->length
;
6409 /* convert negative indexes */
6410 if (start
< 0) start
= llen
+start
;
6411 if (end
< 0) end
= llen
+end
;
6412 if (start
< 0) start
= 0;
6413 if (end
< 0) end
= 0;
6415 /* indexes sanity checks */
6416 if (start
> end
|| start
>= llen
) {
6417 addReply(c
,shared
.czero
);
6420 if (end
>= llen
) end
= llen
-1;
6422 /* increment start and end because zsl*Rank functions
6423 * use 1-based rank */
6424 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
6425 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
6426 if (dictSize(zs
->dict
) == 0) dbDelete(c
->db
,c
->argv
[1]);
6427 server
.dirty
+= deleted
;
6428 addReplyLongLong(c
, deleted
);
6436 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
6437 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
6438 unsigned long size1
, size2
;
6439 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
6440 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
6441 return size1
- size2
;
6444 #define REDIS_AGGR_SUM 1
6445 #define REDIS_AGGR_MIN 2
6446 #define REDIS_AGGR_MAX 3
6447 #define zunionInterDictValue(_e) (dictGetEntryVal(_e) == NULL ? 1.0 : *(double*)dictGetEntryVal(_e))
6449 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
6450 if (aggregate
== REDIS_AGGR_SUM
) {
6451 *target
= *target
+ val
;
6452 } else if (aggregate
== REDIS_AGGR_MIN
) {
6453 *target
= val
< *target
? val
: *target
;
6454 } else if (aggregate
== REDIS_AGGR_MAX
) {
6455 *target
= val
> *target
? val
: *target
;
6458 redisPanic("Unknown ZUNION/INTER aggregate type");
6462 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
6464 int aggregate
= REDIS_AGGR_SUM
;
6471 /* expect setnum input keys to be given */
6472 setnum
= atoi(c
->argv
[2]->ptr
);
6474 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNIONSTORE/ZINTERSTORE\r\n"));
6478 /* test if the expected number of keys would overflow */
6479 if (3+setnum
> c
->argc
) {
6480 addReply(c
,shared
.syntaxerr
);
6484 /* read keys to be used for input */
6485 src
= zmalloc(sizeof(zsetopsrc
) * setnum
);
6486 for (i
= 0, j
= 3; i
< setnum
; i
++, j
++) {
6487 robj
*obj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
6491 if (obj
->type
== REDIS_ZSET
) {
6492 src
[i
].dict
= ((zset
*)obj
->ptr
)->dict
;
6493 } else if (obj
->type
== REDIS_SET
) {
6494 src
[i
].dict
= (obj
->ptr
);
6497 addReply(c
,shared
.wrongtypeerr
);
6502 /* default all weights to 1 */
6503 src
[i
].weight
= 1.0;
6506 /* parse optional extra arguments */
6508 int remaining
= c
->argc
- j
;
6511 if (remaining
>= (setnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
6513 for (i
= 0; i
< setnum
; i
++, j
++, remaining
--) {
6514 if (getDoubleFromObjectOrReply(c
, c
->argv
[j
], &src
[i
].weight
, NULL
) != REDIS_OK
)
6517 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
6519 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
6520 aggregate
= REDIS_AGGR_SUM
;
6521 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
6522 aggregate
= REDIS_AGGR_MIN
;
6523 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
6524 aggregate
= REDIS_AGGR_MAX
;
6527 addReply(c
,shared
.syntaxerr
);
6533 addReply(c
,shared
.syntaxerr
);
6539 /* sort sets from the smallest to largest, this will improve our
6540 * algorithm's performance */
6541 qsort(src
,setnum
,sizeof(zsetopsrc
),qsortCompareZsetopsrcByCardinality
);
6543 dstobj
= createZsetObject();
6544 dstzset
= dstobj
->ptr
;
6546 if (op
== REDIS_OP_INTER
) {
6547 /* skip going over all entries if the smallest zset is NULL or empty */
6548 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
6549 /* precondition: as src[0].dict is non-empty and the zsets are ordered
6550 * from small to large, all src[i > 0].dict are non-empty too */
6551 di
= dictGetIterator(src
[0].dict
);
6552 while((de
= dictNext(di
)) != NULL
) {
6553 double *score
= zmalloc(sizeof(double)), value
;
6554 *score
= src
[0].weight
* zunionInterDictValue(de
);
6556 for (j
= 1; j
< setnum
; j
++) {
6557 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
6559 value
= src
[j
].weight
* zunionInterDictValue(other
);
6560 zunionInterAggregate(score
, value
, aggregate
);
6566 /* skip entry when not present in every source dict */
6570 robj
*o
= dictGetEntryKey(de
);
6571 dictAdd(dstzset
->dict
,o
,score
);
6572 incrRefCount(o
); /* added to dictionary */
6573 zslInsert(dstzset
->zsl
,*score
,o
);
6574 incrRefCount(o
); /* added to skiplist */
6577 dictReleaseIterator(di
);
6579 } else if (op
== REDIS_OP_UNION
) {
6580 for (i
= 0; i
< setnum
; i
++) {
6581 if (!src
[i
].dict
) continue;
6583 di
= dictGetIterator(src
[i
].dict
);
6584 while((de
= dictNext(di
)) != NULL
) {
6585 /* skip key when already processed */
6586 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
6588 double *score
= zmalloc(sizeof(double)), value
;
6589 *score
= src
[i
].weight
* zunionInterDictValue(de
);
6591 /* because the zsets are sorted by size, its only possible
6592 * for sets at larger indices to hold this entry */
6593 for (j
= (i
+1); j
< setnum
; j
++) {
6594 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
6596 value
= src
[j
].weight
* zunionInterDictValue(other
);
6597 zunionInterAggregate(score
, value
, aggregate
);
6601 robj
*o
= dictGetEntryKey(de
);
6602 dictAdd(dstzset
->dict
,o
,score
);
6603 incrRefCount(o
); /* added to dictionary */
6604 zslInsert(dstzset
->zsl
,*score
,o
);
6605 incrRefCount(o
); /* added to skiplist */
6607 dictReleaseIterator(di
);
6610 /* unknown operator */
6611 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
6614 dbDelete(c
->db
,dstkey
);
6615 if (dstzset
->zsl
->length
) {
6616 dbAdd(c
->db
,dstkey
,dstobj
);
6617 addReplyLongLong(c
, dstzset
->zsl
->length
);
6620 decrRefCount(dstobj
);
6621 addReply(c
, shared
.czero
);
6626 static void zunionstoreCommand(redisClient
*c
) {
6627 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
6630 static void zinterstoreCommand(redisClient
*c
) {
6631 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
6634 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
6646 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
6647 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
6649 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
6651 } else if (c
->argc
>= 5) {
6652 addReply(c
,shared
.syntaxerr
);
6656 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6657 || checkType(c
,o
,REDIS_ZSET
)) return;
6662 /* convert negative indexes */
6663 if (start
< 0) start
= llen
+start
;
6664 if (end
< 0) end
= llen
+end
;
6665 if (start
< 0) start
= 0;
6666 if (end
< 0) end
= 0;
6668 /* indexes sanity checks */
6669 if (start
> end
|| start
>= llen
) {
6670 /* Out of range start or start > end result in empty list */
6671 addReply(c
,shared
.emptymultibulk
);
6674 if (end
>= llen
) end
= llen
-1;
6675 rangelen
= (end
-start
)+1;
6677 /* check if starting point is trivial, before searching
6678 * the element in log(N) time */
6680 ln
= start
== 0 ? zsl
->tail
: zslistTypeGetElementByRank(zsl
, llen
-start
);
6683 zsl
->header
->forward
[0] : zslistTypeGetElementByRank(zsl
, start
+1);
6686 /* Return the result in form of a multi-bulk reply */
6687 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
6688 withscores
? (rangelen
*2) : rangelen
));
6689 for (j
= 0; j
< rangelen
; j
++) {
6691 addReplyBulk(c
,ele
);
6693 addReplyDouble(c
,ln
->score
);
6694 ln
= reverse
? ln
->backward
: ln
->forward
[0];
6698 static void zrangeCommand(redisClient
*c
) {
6699 zrangeGenericCommand(c
,0);
6702 static void zrevrangeCommand(redisClient
*c
) {
6703 zrangeGenericCommand(c
,1);
6706 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
6707 * If justcount is non-zero, just the count is returned. */
6708 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
6711 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
6712 int offset
= 0, limit
= -1;
6716 /* Parse the min-max interval. If one of the values is prefixed
6717 * by the "(" character, it's considered "open". For instance
6718 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
6719 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
6720 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
6721 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
6724 min
= strtod(c
->argv
[2]->ptr
,NULL
);
6726 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
6727 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
6730 max
= strtod(c
->argv
[3]->ptr
,NULL
);
6733 /* Parse "WITHSCORES": note that if the command was called with
6734 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
6735 * enter the following paths to parse WITHSCORES and LIMIT. */
6736 if (c
->argc
== 5 || c
->argc
== 8) {
6737 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
6742 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
6746 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
6751 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
6752 addReply(c
,shared
.syntaxerr
);
6754 } else if (c
->argc
== (7 + withscores
)) {
6755 offset
= atoi(c
->argv
[5]->ptr
);
6756 limit
= atoi(c
->argv
[6]->ptr
);
6757 if (offset
< 0) offset
= 0;
6760 /* Ok, lookup the key and get the range */
6761 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
6763 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6765 if (o
->type
!= REDIS_ZSET
) {
6766 addReply(c
,shared
.wrongtypeerr
);
6768 zset
*zsetobj
= o
->ptr
;
6769 zskiplist
*zsl
= zsetobj
->zsl
;
6771 robj
*ele
, *lenobj
= NULL
;
6772 unsigned long rangelen
= 0;
6774 /* Get the first node with the score >= min, or with
6775 * score > min if 'minex' is true. */
6776 ln
= zslFirstWithScore(zsl
,min
);
6777 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
6780 /* No element matching the speciifed interval */
6781 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6785 /* We don't know in advance how many matching elements there
6786 * are in the list, so we push this object that will represent
6787 * the multi-bulk length in the output buffer, and will "fix"
6790 lenobj
= createObject(REDIS_STRING
,NULL
);
6792 decrRefCount(lenobj
);
6795 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
6798 ln
= ln
->forward
[0];
6801 if (limit
== 0) break;
6804 addReplyBulk(c
,ele
);
6806 addReplyDouble(c
,ln
->score
);
6808 ln
= ln
->forward
[0];
6810 if (limit
> 0) limit
--;
6813 addReplyLongLong(c
,(long)rangelen
);
6815 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
6816 withscores
? (rangelen
*2) : rangelen
);
6822 static void zrangebyscoreCommand(redisClient
*c
) {
6823 genericZrangebyscoreCommand(c
,0);
6826 static void zcountCommand(redisClient
*c
) {
6827 genericZrangebyscoreCommand(c
,1);
6830 static void zcardCommand(redisClient
*c
) {
6834 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6835 checkType(c
,o
,REDIS_ZSET
)) return;
6838 addReplyUlong(c
,zs
->zsl
->length
);
6841 static void zscoreCommand(redisClient
*c
) {
6846 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6847 checkType(c
,o
,REDIS_ZSET
)) return;
6850 de
= dictFind(zs
->dict
,c
->argv
[2]);
6852 addReply(c
,shared
.nullbulk
);
6854 double *score
= dictGetEntryVal(de
);
6856 addReplyDouble(c
,*score
);
6860 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
6868 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6869 checkType(c
,o
,REDIS_ZSET
)) return;
6873 de
= dictFind(zs
->dict
,c
->argv
[2]);
6875 addReply(c
,shared
.nullbulk
);
6879 score
= dictGetEntryVal(de
);
6880 rank
= zslistTypeGetRank(zsl
, *score
, c
->argv
[2]);
6883 addReplyLongLong(c
, zsl
->length
- rank
);
6885 addReplyLongLong(c
, rank
-1);
6888 addReply(c
,shared
.nullbulk
);
6892 static void zrankCommand(redisClient
*c
) {
6893 zrankGenericCommand(c
, 0);
6896 static void zrevrankCommand(redisClient
*c
) {
6897 zrankGenericCommand(c
, 1);
6900 /* ========================= Hashes utility functions ======================= */
6901 #define REDIS_HASH_KEY 1
6902 #define REDIS_HASH_VALUE 2
6904 /* Check the length of a number of objects to see if we need to convert a
6905 * zipmap to a real hash. Note that we only check string encoded objects
6906 * as their string length can be queried in constant time. */
6907 static void hashTypeTryConversion(robj
*subject
, robj
**argv
, int start
, int end
) {
6909 if (subject
->encoding
!= REDIS_ENCODING_ZIPMAP
) return;
6911 for (i
= start
; i
<= end
; i
++) {
6912 if (argv
[i
]->encoding
== REDIS_ENCODING_RAW
&&
6913 sdslen(argv
[i
]->ptr
) > server
.hash_max_zipmap_value
)
6915 convertToRealHash(subject
);
6921 /* Encode given objects in-place when the hash uses a dict. */
6922 static void hashTypeTryObjectEncoding(robj
*subject
, robj
**o1
, robj
**o2
) {
6923 if (subject
->encoding
== REDIS_ENCODING_HT
) {
6924 if (o1
) *o1
= tryObjectEncoding(*o1
);
6925 if (o2
) *o2
= tryObjectEncoding(*o2
);
6929 /* Get the value from a hash identified by key. Returns either a string
6930 * object or NULL if the value cannot be found. The refcount of the object
6931 * is always increased by 1 when the value was found. */
6932 static robj
*hashTypeGet(robj
*o
, robj
*key
) {
6934 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6937 key
= getDecodedObject(key
);
6938 if (zipmapGet(o
->ptr
,key
->ptr
,sdslen(key
->ptr
),&v
,&vlen
)) {
6939 value
= createStringObject((char*)v
,vlen
);
6943 dictEntry
*de
= dictFind(o
->ptr
,key
);
6945 value
= dictGetEntryVal(de
);
6946 incrRefCount(value
);
6952 /* Test if the key exists in the given hash. Returns 1 if the key
6953 * exists and 0 when it doesn't. */
6954 static int hashTypeExists(robj
*o
, robj
*key
) {
6955 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6956 key
= getDecodedObject(key
);
6957 if (zipmapExists(o
->ptr
,key
->ptr
,sdslen(key
->ptr
))) {
6963 if (dictFind(o
->ptr
,key
) != NULL
) {
6970 /* Add an element, discard the old if the key already exists.
6971 * Return 0 on insert and 1 on update. */
6972 static int hashTypeSet(robj
*o
, robj
*key
, robj
*value
) {
6974 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6975 key
= getDecodedObject(key
);
6976 value
= getDecodedObject(value
);
6977 o
->ptr
= zipmapSet(o
->ptr
,
6978 key
->ptr
,sdslen(key
->ptr
),
6979 value
->ptr
,sdslen(value
->ptr
), &update
);
6981 decrRefCount(value
);
6983 /* Check if the zipmap needs to be upgraded to a real hash table */
6984 if (zipmapLen(o
->ptr
) > server
.hash_max_zipmap_entries
)
6985 convertToRealHash(o
);
6987 if (dictReplace(o
->ptr
,key
,value
)) {
6994 incrRefCount(value
);
6999 /* Delete an element from a hash.
7000 * Return 1 on deleted and 0 on not found. */
7001 static int hashTypeDelete(robj
*o
, robj
*key
) {
7003 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
7004 key
= getDecodedObject(key
);
7005 o
->ptr
= zipmapDel(o
->ptr
,key
->ptr
,sdslen(key
->ptr
), &deleted
);
7008 deleted
= dictDelete((dict
*)o
->ptr
,key
) == DICT_OK
;
7009 /* Always check if the dictionary needs a resize after a delete. */
7010 if (deleted
&& htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
7015 /* Return the number of elements in a hash. */
7016 static unsigned long hashTypeLength(robj
*o
) {
7017 return (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
7018 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
7021 /* Structure to hold hash iteration abstration. Note that iteration over
7022 * hashes involves both fields and values. Because it is possible that
7023 * not both are required, store pointers in the iterator to avoid
7024 * unnecessary memory allocation for fields/values. */
7028 unsigned char *zk
, *zv
;
7029 unsigned int zklen
, zvlen
;
7035 static hashTypeIterator
*hashTypeInitIterator(robj
*subject
) {
7036 hashTypeIterator
*hi
= zmalloc(sizeof(hashTypeIterator
));
7037 hi
->encoding
= subject
->encoding
;
7038 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
7039 hi
->zi
= zipmapRewind(subject
->ptr
);
7040 } else if (hi
->encoding
== REDIS_ENCODING_HT
) {
7041 hi
->di
= dictGetIterator(subject
->ptr
);
7048 static void hashTypeReleaseIterator(hashTypeIterator
*hi
) {
7049 if (hi
->encoding
== REDIS_ENCODING_HT
) {
7050 dictReleaseIterator(hi
->di
);
7055 /* Move to the next entry in the hash. Return REDIS_OK when the next entry
7056 * could be found and REDIS_ERR when the iterator reaches the end. */
7057 static int hashTypeNext(hashTypeIterator
*hi
) {
7058 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
7059 if ((hi
->zi
= zipmapNext(hi
->zi
, &hi
->zk
, &hi
->zklen
,
7060 &hi
->zv
, &hi
->zvlen
)) == NULL
) return REDIS_ERR
;
7062 if ((hi
->de
= dictNext(hi
->di
)) == NULL
) return REDIS_ERR
;
7067 /* Get key or value object at current iteration position.
7068 * This increases the refcount of the field object by 1. */
7069 static robj
*hashTypeCurrent(hashTypeIterator
*hi
, int what
) {
7071 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
7072 if (what
& REDIS_HASH_KEY
) {
7073 o
= createStringObject((char*)hi
->zk
,hi
->zklen
);
7075 o
= createStringObject((char*)hi
->zv
,hi
->zvlen
);
7078 if (what
& REDIS_HASH_KEY
) {
7079 o
= dictGetEntryKey(hi
->de
);
7081 o
= dictGetEntryVal(hi
->de
);
7088 static robj
*hashTypeLookupWriteOrCreate(redisClient
*c
, robj
*key
) {
7089 robj
*o
= lookupKeyWrite(c
->db
,key
);
7091 o
= createHashObject();
7094 if (o
->type
!= REDIS_HASH
) {
7095 addReply(c
,shared
.wrongtypeerr
);
7102 /* ============================= Hash commands ============================== */
7103 static void hsetCommand(redisClient
*c
) {
7107 if ((o
= hashTypeLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
7108 hashTypeTryConversion(o
,c
->argv
,2,3);
7109 hashTypeTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
7110 update
= hashTypeSet(o
,c
->argv
[2],c
->argv
[3]);
7111 addReply(c
, update
? shared
.czero
: shared
.cone
);
7115 static void hsetnxCommand(redisClient
*c
) {
7117 if ((o
= hashTypeLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
7118 hashTypeTryConversion(o
,c
->argv
,2,3);
7120 if (hashTypeExists(o
, c
->argv
[2])) {
7121 addReply(c
, shared
.czero
);
7123 hashTypeTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
7124 hashTypeSet(o
,c
->argv
[2],c
->argv
[3]);
7125 addReply(c
, shared
.cone
);
7130 static void hmsetCommand(redisClient
*c
) {
7134 if ((c
->argc
% 2) == 1) {
7135 addReplySds(c
,sdsnew("-ERR wrong number of arguments for HMSET\r\n"));
7139 if ((o
= hashTypeLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
7140 hashTypeTryConversion(o
,c
->argv
,2,c
->argc
-1);
7141 for (i
= 2; i
< c
->argc
; i
+= 2) {
7142 hashTypeTryObjectEncoding(o
,&c
->argv
[i
], &c
->argv
[i
+1]);
7143 hashTypeSet(o
,c
->argv
[i
],c
->argv
[i
+1]);
7145 addReply(c
, shared
.ok
);
7149 static void hincrbyCommand(redisClient
*c
) {
7150 long long value
, incr
;
7151 robj
*o
, *current
, *new;
7153 if (getLongLongFromObjectOrReply(c
,c
->argv
[3],&incr
,NULL
) != REDIS_OK
) return;
7154 if ((o
= hashTypeLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
7155 if ((current
= hashTypeGet(o
,c
->argv
[2])) != NULL
) {
7156 if (getLongLongFromObjectOrReply(c
,current
,&value
,
7157 "hash value is not an integer") != REDIS_OK
) {
7158 decrRefCount(current
);
7161 decrRefCount(current
);
7167 new = createStringObjectFromLongLong(value
);
7168 hashTypeTryObjectEncoding(o
,&c
->argv
[2],NULL
);
7169 hashTypeSet(o
,c
->argv
[2],new);
7171 addReplyLongLong(c
,value
);
7175 static void hgetCommand(redisClient
*c
) {
7177 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
7178 checkType(c
,o
,REDIS_HASH
)) return;
7180 if ((value
= hashTypeGet(o
,c
->argv
[2])) != NULL
) {
7181 addReplyBulk(c
,value
);
7182 decrRefCount(value
);
7184 addReply(c
,shared
.nullbulk
);
7188 static void hmgetCommand(redisClient
*c
) {
7191 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
7192 if (o
!= NULL
&& o
->type
!= REDIS_HASH
) {
7193 addReply(c
,shared
.wrongtypeerr
);
7196 /* Note the check for o != NULL happens inside the loop. This is
7197 * done because objects that cannot be found are considered to be
7198 * an empty hash. The reply should then be a series of NULLs. */
7199 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-2));
7200 for (i
= 2; i
< c
->argc
; i
++) {
7201 if (o
!= NULL
&& (value
= hashTypeGet(o
,c
->argv
[i
])) != NULL
) {
7202 addReplyBulk(c
,value
);
7203 decrRefCount(value
);
7205 addReply(c
,shared
.nullbulk
);
7210 static void hdelCommand(redisClient
*c
) {
7212 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
7213 checkType(c
,o
,REDIS_HASH
)) return;
7215 if (hashTypeDelete(o
,c
->argv
[2])) {
7216 if (hashTypeLength(o
) == 0) dbDelete(c
->db
,c
->argv
[1]);
7217 addReply(c
,shared
.cone
);
7220 addReply(c
,shared
.czero
);
7224 static void hlenCommand(redisClient
*c
) {
7226 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
7227 checkType(c
,o
,REDIS_HASH
)) return;
7229 addReplyUlong(c
,hashTypeLength(o
));
7232 static void genericHgetallCommand(redisClient
*c
, int flags
) {
7233 robj
*o
, *lenobj
, *obj
;
7234 unsigned long count
= 0;
7235 hashTypeIterator
*hi
;
7237 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
7238 || checkType(c
,o
,REDIS_HASH
)) return;
7240 lenobj
= createObject(REDIS_STRING
,NULL
);
7242 decrRefCount(lenobj
);
7244 hi
= hashTypeInitIterator(o
);
7245 while (hashTypeNext(hi
) != REDIS_ERR
) {
7246 if (flags
& REDIS_HASH_KEY
) {
7247 obj
= hashTypeCurrent(hi
,REDIS_HASH_KEY
);
7248 addReplyBulk(c
,obj
);
7252 if (flags
& REDIS_HASH_VALUE
) {
7253 obj
= hashTypeCurrent(hi
,REDIS_HASH_VALUE
);
7254 addReplyBulk(c
,obj
);
7259 hashTypeReleaseIterator(hi
);
7261 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
7264 static void hkeysCommand(redisClient
*c
) {
7265 genericHgetallCommand(c
,REDIS_HASH_KEY
);
7268 static void hvalsCommand(redisClient
*c
) {
7269 genericHgetallCommand(c
,REDIS_HASH_VALUE
);
7272 static void hgetallCommand(redisClient
*c
) {
7273 genericHgetallCommand(c
,REDIS_HASH_KEY
|REDIS_HASH_VALUE
);
7276 static void hexistsCommand(redisClient
*c
) {
7278 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
7279 checkType(c
,o
,REDIS_HASH
)) return;
7281 addReply(c
, hashTypeExists(o
,c
->argv
[2]) ? shared
.cone
: shared
.czero
);
7284 static void convertToRealHash(robj
*o
) {
7285 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
7286 unsigned int klen
, vlen
;
7287 dict
*dict
= dictCreate(&hashDictType
,NULL
);
7289 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
7290 p
= zipmapRewind(zm
);
7291 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
7292 robj
*keyobj
, *valobj
;
7294 keyobj
= createStringObject((char*)key
,klen
);
7295 valobj
= createStringObject((char*)val
,vlen
);
7296 keyobj
= tryObjectEncoding(keyobj
);
7297 valobj
= tryObjectEncoding(valobj
);
7298 dictAdd(dict
,keyobj
,valobj
);
7300 o
->encoding
= REDIS_ENCODING_HT
;
7305 /* ========================= Non type-specific commands ==================== */
7307 static void flushdbCommand(redisClient
*c
) {
7308 server
.dirty
+= dictSize(c
->db
->dict
);
7309 touchWatchedKeysOnFlush(c
->db
->id
);
7310 dictEmpty(c
->db
->dict
);
7311 dictEmpty(c
->db
->expires
);
7312 addReply(c
,shared
.ok
);
7315 static void flushallCommand(redisClient
*c
) {
7316 touchWatchedKeysOnFlush(-1);
7317 server
.dirty
+= emptyDb();
7318 addReply(c
,shared
.ok
);
7319 if (server
.bgsavechildpid
!= -1) {
7320 kill(server
.bgsavechildpid
,SIGKILL
);
7321 rdbRemoveTempFile(server
.bgsavechildpid
);
7323 rdbSave(server
.dbfilename
);
7327 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
7328 redisSortOperation
*so
= zmalloc(sizeof(*so
));
7330 so
->pattern
= pattern
;
7334 /* Return the value associated to the key with a name obtained
7335 * substituting the first occurence of '*' in 'pattern' with 'subst'.
7336 * The returned object will always have its refcount increased by 1
7337 * when it is non-NULL. */
7338 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
7341 robj keyobj
, fieldobj
, *o
;
7342 int prefixlen
, sublen
, postfixlen
, fieldlen
;
7343 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
7347 char buf
[REDIS_SORTKEY_MAX
+1];
7348 } keyname
, fieldname
;
7350 /* If the pattern is "#" return the substitution object itself in order
7351 * to implement the "SORT ... GET #" feature. */
7352 spat
= pattern
->ptr
;
7353 if (spat
[0] == '#' && spat
[1] == '\0') {
7354 incrRefCount(subst
);
7358 /* The substitution object may be specially encoded. If so we create
7359 * a decoded object on the fly. Otherwise getDecodedObject will just
7360 * increment the ref count, that we'll decrement later. */
7361 subst
= getDecodedObject(subst
);
7364 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
7365 p
= strchr(spat
,'*');
7367 decrRefCount(subst
);
7371 /* Find out if we're dealing with a hash dereference. */
7372 if ((f
= strstr(p
+1, "->")) != NULL
) {
7373 fieldlen
= sdslen(spat
)-(f
-spat
);
7374 /* this also copies \0 character */
7375 memcpy(fieldname
.buf
,f
+2,fieldlen
-1);
7376 fieldname
.len
= fieldlen
-2;
7382 sublen
= sdslen(ssub
);
7383 postfixlen
= sdslen(spat
)-(prefixlen
+1)-fieldlen
;
7384 memcpy(keyname
.buf
,spat
,prefixlen
);
7385 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
7386 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
7387 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
7388 keyname
.len
= prefixlen
+sublen
+postfixlen
;
7389 decrRefCount(subst
);
7391 /* Lookup substituted key */
7392 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2));
7393 o
= lookupKeyRead(db
,&keyobj
);
7394 if (o
== NULL
) return NULL
;
7397 if (o
->type
!= REDIS_HASH
|| fieldname
.len
< 1) return NULL
;
7399 /* Retrieve value from hash by the field name. This operation
7400 * already increases the refcount of the returned object. */
7401 initStaticStringObject(fieldobj
,((char*)&fieldname
)+(sizeof(long)*2));
7402 o
= hashTypeGet(o
, &fieldobj
);
7404 if (o
->type
!= REDIS_STRING
) return NULL
;
7406 /* Every object that this function returns needs to have its refcount
7407 * increased. sortCommand decreases it again. */
7414 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
7415 * the additional parameter is not standard but a BSD-specific we have to
7416 * pass sorting parameters via the global 'server' structure */
7417 static int sortCompare(const void *s1
, const void *s2
) {
7418 const redisSortObject
*so1
= s1
, *so2
= s2
;
7421 if (!server
.sort_alpha
) {
7422 /* Numeric sorting. Here it's trivial as we precomputed scores */
7423 if (so1
->u
.score
> so2
->u
.score
) {
7425 } else if (so1
->u
.score
< so2
->u
.score
) {
7431 /* Alphanumeric sorting */
7432 if (server
.sort_bypattern
) {
7433 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
7434 /* At least one compare object is NULL */
7435 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
7437 else if (so1
->u
.cmpobj
== NULL
)
7442 /* We have both the objects, use strcoll */
7443 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
7446 /* Compare elements directly. */
7447 cmp
= compareStringObjects(so1
->obj
,so2
->obj
);
7450 return server
.sort_desc
? -cmp
: cmp
;
7453 /* The SORT command is the most complex command in Redis. Warning: this code
7454 * is optimized for speed and a bit less for readability */
7455 static void sortCommand(redisClient
*c
) {
7457 unsigned int outputlen
= 0;
7458 int desc
= 0, alpha
= 0;
7459 int limit_start
= 0, limit_count
= -1, start
, end
;
7460 int j
, dontsort
= 0, vectorlen
;
7461 int getop
= 0; /* GET operation counter */
7462 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
7463 redisSortObject
*vector
; /* Resulting vector to sort */
7465 /* Lookup the key to sort. It must be of the right types */
7466 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
7467 if (sortval
== NULL
) {
7468 addReply(c
,shared
.emptymultibulk
);
7471 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
7472 sortval
->type
!= REDIS_ZSET
)
7474 addReply(c
,shared
.wrongtypeerr
);
7478 /* Create a list of operations to perform for every sorted element.
7479 * Operations can be GET/DEL/INCR/DECR */
7480 operations
= listCreate();
7481 listSetFreeMethod(operations
,zfree
);
7484 /* Now we need to protect sortval incrementing its count, in the future
7485 * SORT may have options able to overwrite/delete keys during the sorting
7486 * and the sorted key itself may get destroied */
7487 incrRefCount(sortval
);
7489 /* The SORT command has an SQL-alike syntax, parse it */
7490 while(j
< c
->argc
) {
7491 int leftargs
= c
->argc
-j
-1;
7492 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
7494 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
7496 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
7498 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
7499 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
7500 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
7502 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
7503 storekey
= c
->argv
[j
+1];
7505 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
7506 sortby
= c
->argv
[j
+1];
7507 /* If the BY pattern does not contain '*', i.e. it is constant,
7508 * we don't need to sort nor to lookup the weight keys. */
7509 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
7511 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
7512 listAddNodeTail(operations
,createSortOperation(
7513 REDIS_SORT_GET
,c
->argv
[j
+1]));
7517 decrRefCount(sortval
);
7518 listRelease(operations
);
7519 addReply(c
,shared
.syntaxerr
);
7525 /* Load the sorting vector with all the objects to sort */
7526 switch(sortval
->type
) {
7527 case REDIS_LIST
: vectorlen
= listTypeLength(sortval
); break;
7528 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
7529 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
7530 default: vectorlen
= 0; redisPanic("Bad SORT type"); /* Avoid GCC warning */
7532 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
7535 if (sortval
->type
== REDIS_LIST
) {
7536 listTypeIterator
*li
= listTypeInitIterator(sortval
,0,REDIS_TAIL
);
7537 listTypeEntry entry
;
7538 while(listTypeNext(li
,&entry
)) {
7539 vector
[j
].obj
= listTypeGet(&entry
);
7540 vector
[j
].u
.score
= 0;
7541 vector
[j
].u
.cmpobj
= NULL
;
7544 listTypeReleaseIterator(li
);
7550 if (sortval
->type
== REDIS_SET
) {
7553 zset
*zs
= sortval
->ptr
;
7557 di
= dictGetIterator(set
);
7558 while((setele
= dictNext(di
)) != NULL
) {
7559 vector
[j
].obj
= dictGetEntryKey(setele
);
7560 vector
[j
].u
.score
= 0;
7561 vector
[j
].u
.cmpobj
= NULL
;
7564 dictReleaseIterator(di
);
7566 redisAssert(j
== vectorlen
);
7568 /* Now it's time to load the right scores in the sorting vector */
7569 if (dontsort
== 0) {
7570 for (j
= 0; j
< vectorlen
; j
++) {
7573 /* lookup value to sort by */
7574 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
7575 if (!byval
) continue;
7577 /* use object itself to sort by */
7578 byval
= vector
[j
].obj
;
7582 if (sortby
) vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
7584 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
7585 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
7586 } else if (byval
->encoding
== REDIS_ENCODING_INT
) {
7587 /* Don't need to decode the object if it's
7588 * integer-encoded (the only encoding supported) so
7589 * far. We can just cast it */
7590 vector
[j
].u
.score
= (long)byval
->ptr
;
7592 redisAssert(1 != 1);
7596 /* when the object was retrieved using lookupKeyByPattern,
7597 * its refcount needs to be decreased. */
7599 decrRefCount(byval
);
7604 /* We are ready to sort the vector... perform a bit of sanity check
7605 * on the LIMIT option too. We'll use a partial version of quicksort. */
7606 start
= (limit_start
< 0) ? 0 : limit_start
;
7607 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
7608 if (start
>= vectorlen
) {
7609 start
= vectorlen
-1;
7612 if (end
>= vectorlen
) end
= vectorlen
-1;
7614 if (dontsort
== 0) {
7615 server
.sort_desc
= desc
;
7616 server
.sort_alpha
= alpha
;
7617 server
.sort_bypattern
= sortby
? 1 : 0;
7618 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
7619 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
7621 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
7624 /* Send command output to the output buffer, performing the specified
7625 * GET/DEL/INCR/DECR operations if any. */
7626 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
7627 if (storekey
== NULL
) {
7628 /* STORE option not specified, sent the sorting result to client */
7629 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
7630 for (j
= start
; j
<= end
; j
++) {
7634 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
7635 listRewind(operations
,&li
);
7636 while((ln
= listNext(&li
))) {
7637 redisSortOperation
*sop
= ln
->value
;
7638 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
7641 if (sop
->type
== REDIS_SORT_GET
) {
7643 addReply(c
,shared
.nullbulk
);
7645 addReplyBulk(c
,val
);
7649 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
7654 robj
*sobj
= createZiplistObject();
7656 /* STORE option specified, set the sorting result as a List object */
7657 for (j
= start
; j
<= end
; j
++) {
7662 listTypePush(sobj
,vector
[j
].obj
,REDIS_TAIL
);
7664 listRewind(operations
,&li
);
7665 while((ln
= listNext(&li
))) {
7666 redisSortOperation
*sop
= ln
->value
;
7667 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
7670 if (sop
->type
== REDIS_SORT_GET
) {
7671 if (!val
) val
= createStringObject("",0);
7673 /* listTypePush does an incrRefCount, so we should take care
7674 * care of the incremented refcount caused by either
7675 * lookupKeyByPattern or createStringObject("",0) */
7676 listTypePush(sobj
,val
,REDIS_TAIL
);
7680 redisAssert(sop
->type
== REDIS_SORT_GET
);
7685 dbReplace(c
->db
,storekey
,sobj
);
7686 /* Note: we add 1 because the DB is dirty anyway since even if the
7687 * SORT result is empty a new key is set and maybe the old content
7689 server
.dirty
+= 1+outputlen
;
7690 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
7694 if (sortval
->type
== REDIS_LIST
)
7695 for (j
= 0; j
< vectorlen
; j
++)
7696 decrRefCount(vector
[j
].obj
);
7697 decrRefCount(sortval
);
7698 listRelease(operations
);
7699 for (j
= 0; j
< vectorlen
; j
++) {
7700 if (alpha
&& vector
[j
].u
.cmpobj
)
7701 decrRefCount(vector
[j
].u
.cmpobj
);
7706 /* Convert an amount of bytes into a human readable string in the form
7707 * of 100B, 2G, 100M, 4K, and so forth. */
7708 static void bytesToHuman(char *s
, unsigned long long n
) {
7713 sprintf(s
,"%lluB",n
);
7715 } else if (n
< (1024*1024)) {
7716 d
= (double)n
/(1024);
7717 sprintf(s
,"%.2fK",d
);
7718 } else if (n
< (1024LL*1024*1024)) {
7719 d
= (double)n
/(1024*1024);
7720 sprintf(s
,"%.2fM",d
);
7721 } else if (n
< (1024LL*1024*1024*1024)) {
7722 d
= (double)n
/(1024LL*1024*1024);
7723 sprintf(s
,"%.2fG",d
);
7727 /* Create the string returned by the INFO command. This is decoupled
7728 * by the INFO command itself as we need to report the same information
7729 * on memory corruption problems. */
7730 static sds
genRedisInfoString(void) {
7732 time_t uptime
= time(NULL
)-server
.stat_starttime
;
7736 bytesToHuman(hmem
,zmalloc_used_memory());
7737 info
= sdscatprintf(sdsempty(),
7738 "redis_version:%s\r\n"
7739 "redis_git_sha1:%s\r\n"
7740 "redis_git_dirty:%d\r\n"
7742 "multiplexing_api:%s\r\n"
7743 "process_id:%ld\r\n"
7744 "uptime_in_seconds:%ld\r\n"
7745 "uptime_in_days:%ld\r\n"
7746 "connected_clients:%d\r\n"
7747 "connected_slaves:%d\r\n"
7748 "blocked_clients:%d\r\n"
7749 "used_memory:%zu\r\n"
7750 "used_memory_human:%s\r\n"
7751 "changes_since_last_save:%lld\r\n"
7752 "bgsave_in_progress:%d\r\n"
7753 "last_save_time:%ld\r\n"
7754 "bgrewriteaof_in_progress:%d\r\n"
7755 "total_connections_received:%lld\r\n"
7756 "total_commands_processed:%lld\r\n"
7757 "expired_keys:%lld\r\n"
7758 "hash_max_zipmap_entries:%zu\r\n"
7759 "hash_max_zipmap_value:%zu\r\n"
7760 "pubsub_channels:%ld\r\n"
7761 "pubsub_patterns:%u\r\n"
7766 strtol(redisGitDirty(),NULL
,10) > 0,
7767 (sizeof(long) == 8) ? "64" : "32",
7772 listLength(server
.clients
)-listLength(server
.slaves
),
7773 listLength(server
.slaves
),
7774 server
.blpop_blocked_clients
,
7775 zmalloc_used_memory(),
7778 server
.bgsavechildpid
!= -1,
7780 server
.bgrewritechildpid
!= -1,
7781 server
.stat_numconnections
,
7782 server
.stat_numcommands
,
7783 server
.stat_expiredkeys
,
7784 server
.hash_max_zipmap_entries
,
7785 server
.hash_max_zipmap_value
,
7786 dictSize(server
.pubsub_channels
),
7787 listLength(server
.pubsub_patterns
),
7788 server
.vm_enabled
!= 0,
7789 server
.masterhost
== NULL
? "master" : "slave"
7791 if (server
.masterhost
) {
7792 info
= sdscatprintf(info
,
7793 "master_host:%s\r\n"
7794 "master_port:%d\r\n"
7795 "master_link_status:%s\r\n"
7796 "master_last_io_seconds_ago:%d\r\n"
7799 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
7801 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
7804 if (server
.vm_enabled
) {
7806 info
= sdscatprintf(info
,
7807 "vm_conf_max_memory:%llu\r\n"
7808 "vm_conf_page_size:%llu\r\n"
7809 "vm_conf_pages:%llu\r\n"
7810 "vm_stats_used_pages:%llu\r\n"
7811 "vm_stats_swapped_objects:%llu\r\n"
7812 "vm_stats_swappin_count:%llu\r\n"
7813 "vm_stats_swappout_count:%llu\r\n"
7814 "vm_stats_io_newjobs_len:%lu\r\n"
7815 "vm_stats_io_processing_len:%lu\r\n"
7816 "vm_stats_io_processed_len:%lu\r\n"
7817 "vm_stats_io_active_threads:%lu\r\n"
7818 "vm_stats_blocked_clients:%lu\r\n"
7819 ,(unsigned long long) server
.vm_max_memory
,
7820 (unsigned long long) server
.vm_page_size
,
7821 (unsigned long long) server
.vm_pages
,
7822 (unsigned long long) server
.vm_stats_used_pages
,
7823 (unsigned long long) server
.vm_stats_swapped_objects
,
7824 (unsigned long long) server
.vm_stats_swapins
,
7825 (unsigned long long) server
.vm_stats_swapouts
,
7826 (unsigned long) listLength(server
.io_newjobs
),
7827 (unsigned long) listLength(server
.io_processing
),
7828 (unsigned long) listLength(server
.io_processed
),
7829 (unsigned long) server
.io_active_threads
,
7830 (unsigned long) server
.vm_blocked_clients
7834 for (j
= 0; j
< server
.dbnum
; j
++) {
7835 long long keys
, vkeys
;
7837 keys
= dictSize(server
.db
[j
].dict
);
7838 vkeys
= dictSize(server
.db
[j
].expires
);
7839 if (keys
|| vkeys
) {
7840 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
7847 static void infoCommand(redisClient
*c
) {
7848 sds info
= genRedisInfoString();
7849 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
7850 (unsigned long)sdslen(info
)));
7851 addReplySds(c
,info
);
7852 addReply(c
,shared
.crlf
);
7855 static void monitorCommand(redisClient
*c
) {
7856 /* ignore MONITOR if aleady slave or in monitor mode */
7857 if (c
->flags
& REDIS_SLAVE
) return;
7859 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
7861 listAddNodeTail(server
.monitors
,c
);
7862 addReply(c
,shared
.ok
);
7865 /* ================================= Expire ================================= */
7866 static int removeExpire(redisDb
*db
, robj
*key
) {
7867 /* An expire may only be removed if there is a corresponding entry in the
7868 * main dict. Otherwise, the key will never be freed. */
7869 redisAssert(dictFind(db
->dict
,key
->ptr
) != NULL
);
7870 if (dictDelete(db
->expires
,key
->ptr
) == DICT_OK
) {
7877 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
7880 /* Reuse the sds from the main dict in the expire dict */
7881 redisAssert((de
= dictFind(db
->dict
,key
->ptr
)) != NULL
);
7882 if (dictAdd(db
->expires
,dictGetEntryKey(de
),(void*)when
) == DICT_ERR
) {
7889 /* Return the expire time of the specified key, or -1 if no expire
7890 * is associated with this key (i.e. the key is non volatile) */
7891 static time_t getExpire(redisDb
*db
, robj
*key
) {
7894 /* No expire? return ASAP */
7895 if (dictSize(db
->expires
) == 0 ||
7896 (de
= dictFind(db
->expires
,key
->ptr
)) == NULL
) return -1;
7898 /* The entry was found in the expire dict, this means it should also
7899 * be present in the main dict (safety check). */
7900 redisAssert(dictFind(db
->dict
,key
->ptr
) != NULL
);
7901 return (time_t) dictGetEntryVal(de
);
7904 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
7905 time_t when
= getExpire(db
,key
);
7906 if (when
< 0) return 0;
7908 /* Return when this key has not expired */
7909 if (time(NULL
) <= when
) return 0;
7911 /* Delete the key */
7912 server
.stat_expiredkeys
++;
7914 return dbDelete(db
,key
);
7917 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
7918 if (getExpire(db
,key
) < 0) return 0;
7920 /* Delete the key */
7921 server
.stat_expiredkeys
++;
7923 return dbDelete(db
,key
);
7926 static void expireGenericCommand(redisClient
*c
, robj
*key
, robj
*param
, long offset
) {
7930 if (getLongFromObjectOrReply(c
, param
, &seconds
, NULL
) != REDIS_OK
) return;
7934 de
= dictFind(c
->db
->dict
,key
->ptr
);
7936 addReply(c
,shared
.czero
);
7940 if (dbDelete(c
->db
,key
)) server
.dirty
++;
7941 addReply(c
, shared
.cone
);
7944 time_t when
= time(NULL
)+seconds
;
7945 if (setExpire(c
->db
,key
,when
)) {
7946 addReply(c
,shared
.cone
);
7949 addReply(c
,shared
.czero
);
7955 static void expireCommand(redisClient
*c
) {
7956 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],0);
7959 static void expireatCommand(redisClient
*c
) {
7960 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],time(NULL
));
7963 static void ttlCommand(redisClient
*c
) {
7967 expire
= getExpire(c
->db
,c
->argv
[1]);
7969 ttl
= (int) (expire
-time(NULL
));
7970 if (ttl
< 0) ttl
= -1;
7972 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
7975 /* ================================ MULTI/EXEC ============================== */
7977 /* Client state initialization for MULTI/EXEC */
7978 static void initClientMultiState(redisClient
*c
) {
7979 c
->mstate
.commands
= NULL
;
7980 c
->mstate
.count
= 0;
7983 /* Release all the resources associated with MULTI/EXEC state */
7984 static void freeClientMultiState(redisClient
*c
) {
7987 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7989 multiCmd
*mc
= c
->mstate
.commands
+j
;
7991 for (i
= 0; i
< mc
->argc
; i
++)
7992 decrRefCount(mc
->argv
[i
]);
7995 zfree(c
->mstate
.commands
);
7998 /* Add a new command into the MULTI commands queue */
7999 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
8003 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
8004 sizeof(multiCmd
)*(c
->mstate
.count
+1));
8005 mc
= c
->mstate
.commands
+c
->mstate
.count
;
8008 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
8009 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
8010 for (j
= 0; j
< c
->argc
; j
++)
8011 incrRefCount(mc
->argv
[j
]);
8015 static void multiCommand(redisClient
*c
) {
8016 if (c
->flags
& REDIS_MULTI
) {
8017 addReplySds(c
,sdsnew("-ERR MULTI calls can not be nested\r\n"));
8020 c
->flags
|= REDIS_MULTI
;
8021 addReply(c
,shared
.ok
);
8024 static void discardCommand(redisClient
*c
) {
8025 if (!(c
->flags
& REDIS_MULTI
)) {
8026 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
8030 freeClientMultiState(c
);
8031 initClientMultiState(c
);
8032 c
->flags
&= (~REDIS_MULTI
);
8034 addReply(c
,shared
.ok
);
8037 /* Send a MULTI command to all the slaves and AOF file. Check the execCommand
8038 * implememntation for more information. */
8039 static void execCommandReplicateMulti(redisClient
*c
) {
8040 struct redisCommand
*cmd
;
8041 robj
*multistring
= createStringObject("MULTI",5);
8043 cmd
= lookupCommand("multi");
8044 if (server
.appendonly
)
8045 feedAppendOnlyFile(cmd
,c
->db
->id
,&multistring
,1);
8046 if (listLength(server
.slaves
))
8047 replicationFeedSlaves(server
.slaves
,c
->db
->id
,&multistring
,1);
8048 decrRefCount(multistring
);
8051 static void execCommand(redisClient
*c
) {
8056 if (!(c
->flags
& REDIS_MULTI
)) {
8057 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
8061 /* Check if we need to abort the EXEC if some WATCHed key was touched.
8062 * A failed EXEC will return a multi bulk nil object. */
8063 if (c
->flags
& REDIS_DIRTY_CAS
) {
8064 freeClientMultiState(c
);
8065 initClientMultiState(c
);
8066 c
->flags
&= ~(REDIS_MULTI
|REDIS_DIRTY_CAS
);
8068 addReply(c
,shared
.nullmultibulk
);
8072 /* Replicate a MULTI request now that we are sure the block is executed.
8073 * This way we'll deliver the MULTI/..../EXEC block as a whole and
8074 * both the AOF and the replication link will have the same consistency
8075 * and atomicity guarantees. */
8076 execCommandReplicateMulti(c
);
8078 /* Exec all the queued commands */
8079 unwatchAllKeys(c
); /* Unwatch ASAP otherwise we'll waste CPU cycles */
8080 orig_argv
= c
->argv
;
8081 orig_argc
= c
->argc
;
8082 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
8083 for (j
= 0; j
< c
->mstate
.count
; j
++) {
8084 c
->argc
= c
->mstate
.commands
[j
].argc
;
8085 c
->argv
= c
->mstate
.commands
[j
].argv
;
8086 call(c
,c
->mstate
.commands
[j
].cmd
);
8088 c
->argv
= orig_argv
;
8089 c
->argc
= orig_argc
;
8090 freeClientMultiState(c
);
8091 initClientMultiState(c
);
8092 c
->flags
&= ~(REDIS_MULTI
|REDIS_DIRTY_CAS
);
8093 /* Make sure the EXEC command is always replicated / AOF, since we
8094 * always send the MULTI command (we can't know beforehand if the
8095 * next operations will contain at least a modification to the DB). */
8099 /* =========================== Blocking Operations ========================= */
8101 /* Currently Redis blocking operations support is limited to list POP ops,
8102 * so the current implementation is not fully generic, but it is also not
8103 * completely specific so it will not require a rewrite to support new
8104 * kind of blocking operations in the future.
8106 * Still it's important to note that list blocking operations can be already
8107 * used as a notification mechanism in order to implement other blocking
8108 * operations at application level, so there must be a very strong evidence
8109 * of usefulness and generality before new blocking operations are implemented.
8111 * This is how the current blocking POP works, we use BLPOP as example:
8112 * - If the user calls BLPOP and the key exists and contains a non empty list
8113 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
8114 * if there is not to block.
8115 * - If instead BLPOP is called and the key does not exists or the list is
8116 * empty we need to block. In order to do so we remove the notification for
8117 * new data to read in the client socket (so that we'll not serve new
8118 * requests if the blocking request is not served). Also we put the client
8119 * in a dictionary (db->blocking_keys) mapping keys to a list of clients
8120 * blocking for this keys.
8121 * - If a PUSH operation against a key with blocked clients waiting is
8122 * performed, we serve the first in the list: basically instead to push
8123 * the new element inside the list we return it to the (first / oldest)
8124 * blocking client, unblock the client, and remove it form the list.
8126 * The above comment and the source code should be enough in order to understand
8127 * the implementation and modify / fix it later.
8130 /* Set a client in blocking mode for the specified key, with the specified
8132 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
8137 c
->blocking_keys
= zmalloc(sizeof(robj
*)*numkeys
);
8138 c
->blocking_keys_num
= numkeys
;
8139 c
->blockingto
= timeout
;
8140 for (j
= 0; j
< numkeys
; j
++) {
8141 /* Add the key in the client structure, to map clients -> keys */
8142 c
->blocking_keys
[j
] = keys
[j
];
8143 incrRefCount(keys
[j
]);
8145 /* And in the other "side", to map keys -> clients */
8146 de
= dictFind(c
->db
->blocking_keys
,keys
[j
]);
8150 /* For every key we take a list of clients blocked for it */
8152 retval
= dictAdd(c
->db
->blocking_keys
,keys
[j
],l
);
8153 incrRefCount(keys
[j
]);
8154 assert(retval
== DICT_OK
);
8156 l
= dictGetEntryVal(de
);
8158 listAddNodeTail(l
,c
);
8160 /* Mark the client as a blocked client */
8161 c
->flags
|= REDIS_BLOCKED
;
8162 server
.blpop_blocked_clients
++;
8165 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
8166 static void unblockClientWaitingData(redisClient
*c
) {
8171 assert(c
->blocking_keys
!= NULL
);
8172 /* The client may wait for multiple keys, so unblock it for every key. */
8173 for (j
= 0; j
< c
->blocking_keys_num
; j
++) {
8174 /* Remove this client from the list of clients waiting for this key. */
8175 de
= dictFind(c
->db
->blocking_keys
,c
->blocking_keys
[j
]);
8177 l
= dictGetEntryVal(de
);
8178 listDelNode(l
,listSearchKey(l
,c
));
8179 /* If the list is empty we need to remove it to avoid wasting memory */
8180 if (listLength(l
) == 0)
8181 dictDelete(c
->db
->blocking_keys
,c
->blocking_keys
[j
]);
8182 decrRefCount(c
->blocking_keys
[j
]);
8184 /* Cleanup the client structure */
8185 zfree(c
->blocking_keys
);
8186 c
->blocking_keys
= NULL
;
8187 c
->flags
&= (~REDIS_BLOCKED
);
8188 server
.blpop_blocked_clients
--;
8189 /* We want to process data if there is some command waiting
8190 * in the input buffer. Note that this is safe even if
8191 * unblockClientWaitingData() gets called from freeClient() because
8192 * freeClient() will be smart enough to call this function
8193 * *after* c->querybuf was set to NULL. */
8194 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
8197 /* This should be called from any function PUSHing into lists.
8198 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
8199 * 'ele' is the element pushed.
8201 * If the function returns 0 there was no client waiting for a list push
8204 * If the function returns 1 there was a client waiting for a list push
8205 * against this key, the element was passed to this client thus it's not
8206 * needed to actually add it to the list and the caller should return asap. */
8207 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
8208 struct dictEntry
*de
;
8209 redisClient
*receiver
;
8213 de
= dictFind(c
->db
->blocking_keys
,key
);
8214 if (de
== NULL
) return 0;
8215 l
= dictGetEntryVal(de
);
8218 receiver
= ln
->value
;
8220 addReplySds(receiver
,sdsnew("*2\r\n"));
8221 addReplyBulk(receiver
,key
);
8222 addReplyBulk(receiver
,ele
);
8223 unblockClientWaitingData(receiver
);
8227 /* Blocking RPOP/LPOP */
8228 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
8233 for (j
= 1; j
< c
->argc
-1; j
++) {
8234 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
8236 if (o
->type
!= REDIS_LIST
) {
8237 addReply(c
,shared
.wrongtypeerr
);
8240 list
*list
= o
->ptr
;
8241 if (listLength(list
) != 0) {
8242 /* If the list contains elements fall back to the usual
8243 * non-blocking POP operation */
8244 robj
*argv
[2], **orig_argv
;
8247 /* We need to alter the command arguments before to call
8248 * popGenericCommand() as the command takes a single key. */
8249 orig_argv
= c
->argv
;
8250 orig_argc
= c
->argc
;
8251 argv
[1] = c
->argv
[j
];
8255 /* Also the return value is different, we need to output
8256 * the multi bulk reply header and the key name. The
8257 * "real" command will add the last element (the value)
8258 * for us. If this souds like an hack to you it's just
8259 * because it is... */
8260 addReplySds(c
,sdsnew("*2\r\n"));
8261 addReplyBulk(c
,argv
[1]);
8262 popGenericCommand(c
,where
);
8264 /* Fix the client structure with the original stuff */
8265 c
->argv
= orig_argv
;
8266 c
->argc
= orig_argc
;
8272 /* If the list is empty or the key does not exists we must block */
8273 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
8274 if (timeout
> 0) timeout
+= time(NULL
);
8275 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
8278 static void blpopCommand(redisClient
*c
) {
8279 blockingPopGenericCommand(c
,REDIS_HEAD
);
8282 static void brpopCommand(redisClient
*c
) {
8283 blockingPopGenericCommand(c
,REDIS_TAIL
);
8286 /* =============================== Replication ============================= */
8288 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
8289 ssize_t nwritten
, ret
= size
;
8290 time_t start
= time(NULL
);
8294 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
8295 nwritten
= write(fd
,ptr
,size
);
8296 if (nwritten
== -1) return -1;
8300 if ((time(NULL
)-start
) > timeout
) {
8308 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
8309 ssize_t nread
, totread
= 0;
8310 time_t start
= time(NULL
);
8314 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
8315 nread
= read(fd
,ptr
,size
);
8316 if (nread
== -1) return -1;
8321 if ((time(NULL
)-start
) > timeout
) {
8329 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
8336 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
8339 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
8350 static void syncCommand(redisClient
*c
) {
8351 /* ignore SYNC if aleady slave or in monitor mode */
8352 if (c
->flags
& REDIS_SLAVE
) return;
8354 /* SYNC can't be issued when the server has pending data to send to
8355 * the client about already issued commands. We need a fresh reply
8356 * buffer registering the differences between the BGSAVE and the current
8357 * dataset, so that we can copy to other slaves if needed. */
8358 if (listLength(c
->reply
) != 0) {
8359 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
8363 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
8364 /* Here we need to check if there is a background saving operation
8365 * in progress, or if it is required to start one */
8366 if (server
.bgsavechildpid
!= -1) {
8367 /* Ok a background save is in progress. Let's check if it is a good
8368 * one for replication, i.e. if there is another slave that is
8369 * registering differences since the server forked to save */
8374 listRewind(server
.slaves
,&li
);
8375 while((ln
= listNext(&li
))) {
8377 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
8380 /* Perfect, the server is already registering differences for
8381 * another slave. Set the right state, and copy the buffer. */
8382 listRelease(c
->reply
);
8383 c
->reply
= listDup(slave
->reply
);
8384 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
8385 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
8387 /* No way, we need to wait for the next BGSAVE in order to
8388 * register differences */
8389 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
8390 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
8393 /* Ok we don't have a BGSAVE in progress, let's start one */
8394 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
8395 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
8396 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
8397 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
8400 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
8403 c
->flags
|= REDIS_SLAVE
;
8405 listAddNodeTail(server
.slaves
,c
);
8409 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
8410 redisClient
*slave
= privdata
;
8412 REDIS_NOTUSED(mask
);
8413 char buf
[REDIS_IOBUF_LEN
];
8414 ssize_t nwritten
, buflen
;
8416 if (slave
->repldboff
== 0) {
8417 /* Write the bulk write count before to transfer the DB. In theory here
8418 * we don't know how much room there is in the output buffer of the
8419 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
8420 * operations) will never be smaller than the few bytes we need. */
8423 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
8425 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
8433 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
8434 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
8436 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
8437 (buflen
== 0) ? "premature EOF" : strerror(errno
));
8441 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
8442 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
8447 slave
->repldboff
+= nwritten
;
8448 if (slave
->repldboff
== slave
->repldbsize
) {
8449 close(slave
->repldbfd
);
8450 slave
->repldbfd
= -1;
8451 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
8452 slave
->replstate
= REDIS_REPL_ONLINE
;
8453 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
8454 sendReplyToClient
, slave
) == AE_ERR
) {
8458 addReplySds(slave
,sdsempty());
8459 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
8463 /* This function is called at the end of every backgrond saving.
8464 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
8465 * otherwise REDIS_ERR is passed to the function.
8467 * The goal of this function is to handle slaves waiting for a successful
8468 * background saving in order to perform non-blocking synchronization. */
8469 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
8471 int startbgsave
= 0;
8474 listRewind(server
.slaves
,&li
);
8475 while((ln
= listNext(&li
))) {
8476 redisClient
*slave
= ln
->value
;
8478 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
8480 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
8481 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
8482 struct redis_stat buf
;
8484 if (bgsaveerr
!= REDIS_OK
) {
8486 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
8489 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
8490 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
8492 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
8495 slave
->repldboff
= 0;
8496 slave
->repldbsize
= buf
.st_size
;
8497 slave
->replstate
= REDIS_REPL_SEND_BULK
;
8498 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
8499 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
8506 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
8509 listRewind(server
.slaves
,&li
);
8510 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
8511 while((ln
= listNext(&li
))) {
8512 redisClient
*slave
= ln
->value
;
8514 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
8521 static int syncWithMaster(void) {
8522 char buf
[1024], tmpfile
[256], authcmd
[1024];
8524 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
8525 int dfd
, maxtries
= 5;
8528 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
8533 /* AUTH with the master if required. */
8534 if(server
.masterauth
) {
8535 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
8536 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
8538 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
8542 /* Read the AUTH result. */
8543 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
8545 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
8549 if (buf
[0] != '+') {
8551 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
8556 /* Issue the SYNC command */
8557 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
8559 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
8563 /* Read the bulk write count */
8564 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
8566 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
8570 if (buf
[0] != '$') {
8572 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
8575 dumpsize
= strtol(buf
+1,NULL
,10);
8576 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
8577 /* Read the bulk write data on a temp file */
8579 snprintf(tmpfile
,256,
8580 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
8581 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
8582 if (dfd
!= -1) break;
8587 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
8591 int nread
, nwritten
;
8593 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
8595 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
8601 nwritten
= write(dfd
,buf
,nread
);
8602 if (nwritten
== -1) {
8603 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
8611 if (rename(tmpfile
,server
.dbfilename
) == -1) {
8612 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
8618 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
8619 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
8623 server
.master
= createClient(fd
);
8624 server
.master
->flags
|= REDIS_MASTER
;
8625 server
.master
->authenticated
= 1;
8626 server
.replstate
= REDIS_REPL_CONNECTED
;
8630 static void slaveofCommand(redisClient
*c
) {
8631 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
8632 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
8633 if (server
.masterhost
) {
8634 sdsfree(server
.masterhost
);
8635 server
.masterhost
= NULL
;
8636 if (server
.master
) freeClient(server
.master
);
8637 server
.replstate
= REDIS_REPL_NONE
;
8638 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
8641 sdsfree(server
.masterhost
);
8642 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
8643 server
.masterport
= atoi(c
->argv
[2]->ptr
);
8644 if (server
.master
) freeClient(server
.master
);
8645 server
.replstate
= REDIS_REPL_CONNECT
;
8646 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
8647 server
.masterhost
, server
.masterport
);
8649 addReply(c
,shared
.ok
);
8652 /* ============================ Maxmemory directive ======================== */
8654 /* Try to free one object form the pre-allocated objects free list.
8655 * This is useful under low mem conditions as by default we take 1 million
8656 * free objects allocated. On success REDIS_OK is returned, otherwise
8658 static int tryFreeOneObjectFromFreelist(void) {
8661 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
8662 if (listLength(server
.objfreelist
)) {
8663 listNode
*head
= listFirst(server
.objfreelist
);
8664 o
= listNodeValue(head
);
8665 listDelNode(server
.objfreelist
,head
);
8666 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
8670 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
8675 /* This function gets called when 'maxmemory' is set on the config file to limit
8676 * the max memory used by the server, and we are out of memory.
8677 * This function will try to, in order:
8679 * - Free objects from the free list
8680 * - Try to remove keys with an EXPIRE set
8682 * It is not possible to free enough memory to reach used-memory < maxmemory
8683 * the server will start refusing commands that will enlarge even more the
8686 static void freeMemoryIfNeeded(void) {
8687 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
8688 int j
, k
, freed
= 0;
8690 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
8691 for (j
= 0; j
< server
.dbnum
; j
++) {
8693 robj
*minkey
= NULL
;
8694 struct dictEntry
*de
;
8696 if (dictSize(server
.db
[j
].expires
)) {
8698 /* From a sample of three keys drop the one nearest to
8699 * the natural expire */
8700 for (k
= 0; k
< 3; k
++) {
8703 de
= dictGetRandomKey(server
.db
[j
].expires
);
8704 t
= (time_t) dictGetEntryVal(de
);
8705 if (minttl
== -1 || t
< minttl
) {
8706 minkey
= dictGetEntryKey(de
);
8710 dbDelete(server
.db
+j
,minkey
);
8713 if (!freed
) return; /* nothing to free... */
8717 /* ============================== Append Only file ========================== */
8719 /* Called when the user switches from "appendonly yes" to "appendonly no"
8720 * at runtime using the CONFIG command. */
8721 static void stopAppendOnly(void) {
8722 flushAppendOnlyFile();
8723 aof_fsync(server
.appendfd
);
8724 close(server
.appendfd
);
8726 server
.appendfd
= -1;
8727 server
.appendseldb
= -1;
8728 server
.appendonly
= 0;
8729 /* rewrite operation in progress? kill it, wait child exit */
8730 if (server
.bgsavechildpid
!= -1) {
8733 if (kill(server
.bgsavechildpid
,SIGKILL
) != -1)
8734 wait3(&statloc
,0,NULL
);
8735 /* reset the buffer accumulating changes while the child saves */
8736 sdsfree(server
.bgrewritebuf
);
8737 server
.bgrewritebuf
= sdsempty();
8738 server
.bgsavechildpid
= -1;
8742 /* Called when the user switches from "appendonly no" to "appendonly yes"
8743 * at runtime using the CONFIG command. */
8744 static int startAppendOnly(void) {
8745 server
.appendonly
= 1;
8746 server
.lastfsync
= time(NULL
);
8747 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
8748 if (server
.appendfd
== -1) {
8749 redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, but I can't open the AOF file: %s",strerror(errno
));
8752 if (rewriteAppendOnlyFileBackground() == REDIS_ERR
) {
8753 server
.appendonly
= 0;
8754 close(server
.appendfd
);
8755 redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, I can't trigger a background AOF rewrite operation. Check the above logs for more info about the error.",strerror(errno
));
8761 /* Write the append only file buffer on disk.
8763 * Since we are required to write the AOF before replying to the client,
8764 * and the only way the client socket can get a write is entering when the
8765 * the event loop, we accumulate all the AOF writes in a memory
8766 * buffer and write it on disk using this function just before entering
8767 * the event loop again. */
8768 static void flushAppendOnlyFile(void) {
8772 if (sdslen(server
.aofbuf
) == 0) return;
8774 /* We want to perform a single write. This should be guaranteed atomic
8775 * at least if the filesystem we are writing is a real physical one.
8776 * While this will save us against the server being killed I don't think
8777 * there is much to do about the whole server stopping for power problems
8779 nwritten
= write(server
.appendfd
,server
.aofbuf
,sdslen(server
.aofbuf
));
8780 if (nwritten
!= (signed)sdslen(server
.aofbuf
)) {
8781 /* Ooops, we are in troubles. The best thing to do for now is
8782 * aborting instead of giving the illusion that everything is
8783 * working as expected. */
8784 if (nwritten
== -1) {
8785 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
8787 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
8791 sdsfree(server
.aofbuf
);
8792 server
.aofbuf
= sdsempty();
8794 /* Don't Fsync if no-appendfsync-on-rewrite is set to yes and we have
8795 * childs performing heavy I/O on disk. */
8796 if (server
.no_appendfsync_on_rewrite
&&
8797 (server
.bgrewritechildpid
!= -1 || server
.bgsavechildpid
!= -1))
8799 /* Fsync if needed */
8801 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
8802 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
8803 now
-server
.lastfsync
> 1))
8805 /* aof_fsync is defined as fdatasync() for Linux in order to avoid
8806 * flushing metadata. */
8807 aof_fsync(server
.appendfd
); /* Let's try to get this data on the disk */
8808 server
.lastfsync
= now
;
8812 static sds
catAppendOnlyGenericCommand(sds buf
, int argc
, robj
**argv
) {
8814 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
8815 for (j
= 0; j
< argc
; j
++) {
8816 robj
*o
= getDecodedObject(argv
[j
]);
8817 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
8818 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
8819 buf
= sdscatlen(buf
,"\r\n",2);
8825 static sds
catAppendOnlyExpireAtCommand(sds buf
, robj
*key
, robj
*seconds
) {
8830 /* Make sure we can use strtol */
8831 seconds
= getDecodedObject(seconds
);
8832 when
= time(NULL
)+strtol(seconds
->ptr
,NULL
,10);
8833 decrRefCount(seconds
);
8835 argv
[0] = createStringObject("EXPIREAT",8);
8837 argv
[2] = createObject(REDIS_STRING
,
8838 sdscatprintf(sdsempty(),"%ld",when
));
8839 buf
= catAppendOnlyGenericCommand(buf
, argc
, argv
);
8840 decrRefCount(argv
[0]);
8841 decrRefCount(argv
[2]);
8845 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
8846 sds buf
= sdsempty();
8849 /* The DB this command was targetting is not the same as the last command
8850 * we appendend. To issue a SELECT command is needed. */
8851 if (dictid
!= server
.appendseldb
) {
8854 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
8855 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
8856 (unsigned long)strlen(seldb
),seldb
);
8857 server
.appendseldb
= dictid
;
8860 if (cmd
->proc
== expireCommand
) {
8861 /* Translate EXPIRE into EXPIREAT */
8862 buf
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]);
8863 } else if (cmd
->proc
== setexCommand
) {
8864 /* Translate SETEX to SET and EXPIREAT */
8865 tmpargv
[0] = createStringObject("SET",3);
8866 tmpargv
[1] = argv
[1];
8867 tmpargv
[2] = argv
[3];
8868 buf
= catAppendOnlyGenericCommand(buf
,3,tmpargv
);
8869 decrRefCount(tmpargv
[0]);
8870 buf
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]);
8872 buf
= catAppendOnlyGenericCommand(buf
,argc
,argv
);
8875 /* Append to the AOF buffer. This will be flushed on disk just before
8876 * of re-entering the event loop, so before the client will get a
8877 * positive reply about the operation performed. */
8878 server
.aofbuf
= sdscatlen(server
.aofbuf
,buf
,sdslen(buf
));
8880 /* If a background append only file rewriting is in progress we want to
8881 * accumulate the differences between the child DB and the current one
8882 * in a buffer, so that when the child process will do its work we
8883 * can append the differences to the new append only file. */
8884 if (server
.bgrewritechildpid
!= -1)
8885 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
8890 /* In Redis commands are always executed in the context of a client, so in
8891 * order to load the append only file we need to create a fake client. */
8892 static struct redisClient
*createFakeClient(void) {
8893 struct redisClient
*c
= zmalloc(sizeof(*c
));
8897 c
->querybuf
= sdsempty();
8901 /* We set the fake client as a slave waiting for the synchronization
8902 * so that Redis will not try to send replies to this client. */
8903 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
8904 c
->reply
= listCreate();
8905 listSetFreeMethod(c
->reply
,decrRefCount
);
8906 listSetDupMethod(c
->reply
,dupClientReplyValue
);
8907 initClientMultiState(c
);
8911 static void freeFakeClient(struct redisClient
*c
) {
8912 sdsfree(c
->querybuf
);
8913 listRelease(c
->reply
);
8914 freeClientMultiState(c
);
8918 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
8919 * error (the append only file is zero-length) REDIS_ERR is returned. On
8920 * fatal error an error message is logged and the program exists. */
8921 int loadAppendOnlyFile(char *filename
) {
8922 struct redisClient
*fakeClient
;
8923 FILE *fp
= fopen(filename
,"r");
8924 struct redis_stat sb
;
8925 int appendonly
= server
.appendonly
;
8927 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
8931 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
8935 /* Temporarily disable AOF, to prevent EXEC from feeding a MULTI
8936 * to the same file we're about to read. */
8937 server
.appendonly
= 0;
8939 fakeClient
= createFakeClient();
8946 struct redisCommand
*cmd
;
8949 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
8955 if (buf
[0] != '*') goto fmterr
;
8957 argv
= zmalloc(sizeof(robj
*)*argc
);
8958 for (j
= 0; j
< argc
; j
++) {
8959 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
8960 if (buf
[0] != '$') goto fmterr
;
8961 len
= strtol(buf
+1,NULL
,10);
8962 argsds
= sdsnewlen(NULL
,len
);
8963 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
8964 argv
[j
] = createObject(REDIS_STRING
,argsds
);
8965 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
8968 /* Command lookup */
8969 cmd
= lookupCommand(argv
[0]->ptr
);
8971 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
8974 /* Try object encoding */
8975 if (cmd
->flags
& REDIS_CMD_BULK
)
8976 argv
[argc
-1] = tryObjectEncoding(argv
[argc
-1]);
8977 /* Run the command in the context of a fake client */
8978 fakeClient
->argc
= argc
;
8979 fakeClient
->argv
= argv
;
8980 cmd
->proc(fakeClient
);
8981 /* Discard the reply objects list from the fake client */
8982 while(listLength(fakeClient
->reply
))
8983 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
8984 /* Clean up, ready for the next command */
8985 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
8987 /* Handle swapping while loading big datasets when VM is on */
8989 if ((zmalloc_used_memory() - server
.vm_max_memory
) > 1024*1024*32)
8992 if (server
.vm_enabled
&& force_swapout
) {
8993 while (zmalloc_used_memory() > server
.vm_max_memory
) {
8994 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
8999 /* This point can only be reached when EOF is reached without errors.
9000 * If the client is in the middle of a MULTI/EXEC, log error and quit. */
9001 if (fakeClient
->flags
& REDIS_MULTI
) goto readerr
;
9004 freeFakeClient(fakeClient
);
9005 server
.appendonly
= appendonly
;
9010 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
9012 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
9016 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
9020 /* Write binary-safe string into a file in the bulkformat
9021 * $<count>\r\n<payload>\r\n */
9022 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
9026 clen
= 1+ll2string(cbuf
+1,sizeof(cbuf
)-1,len
);
9027 cbuf
[clen
++] = '\r';
9028 cbuf
[clen
++] = '\n';
9029 if (fwrite(cbuf
,clen
,1,fp
) == 0) return 0;
9030 if (len
> 0 && fwrite(s
,len
,1,fp
) == 0) return 0;
9031 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
9035 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
9036 static int fwriteBulkDouble(FILE *fp
, double d
) {
9037 char buf
[128], dbuf
[128];
9039 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
9040 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
9041 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
9042 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
9046 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
9047 static int fwriteBulkLongLong(FILE *fp
, long long l
) {
9048 char bbuf
[128], lbuf
[128];
9049 unsigned int blen
, llen
;
9050 llen
= ll2string(lbuf
,32,l
);
9051 blen
= snprintf(bbuf
,sizeof(bbuf
),"$%u\r\n%s\r\n",llen
,lbuf
);
9052 if (fwrite(bbuf
,blen
,1,fp
) == 0) return 0;
9056 /* Delegate writing an object to writing a bulk string or bulk long long. */
9057 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
9058 /* Avoid using getDecodedObject to help copy-on-write (we are often
9059 * in a child process when this function is called). */
9060 if (obj
->encoding
== REDIS_ENCODING_INT
) {
9061 return fwriteBulkLongLong(fp
,(long)obj
->ptr
);
9062 } else if (obj
->encoding
== REDIS_ENCODING_RAW
) {
9063 return fwriteBulkString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
9065 redisPanic("Unknown string encoding");
9069 /* Write a sequence of commands able to fully rebuild the dataset into
9070 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
9071 static int rewriteAppendOnlyFile(char *filename
) {
9072 dictIterator
*di
= NULL
;
9077 time_t now
= time(NULL
);
9079 /* Note that we have to use a different temp name here compared to the
9080 * one used by rewriteAppendOnlyFileBackground() function. */
9081 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
9082 fp
= fopen(tmpfile
,"w");
9084 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
9087 for (j
= 0; j
< server
.dbnum
; j
++) {
9088 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
9089 redisDb
*db
= server
.db
+j
;
9091 if (dictSize(d
) == 0) continue;
9092 di
= dictGetIterator(d
);
9098 /* SELECT the new DB */
9099 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
9100 if (fwriteBulkLongLong(fp
,j
) == 0) goto werr
;
9102 /* Iterate this DB writing every entry */
9103 while((de
= dictNext(di
)) != NULL
) {
9104 sds keystr
= dictGetEntryKey(de
);
9109 keystr
= dictGetEntryKey(de
);
9110 o
= dictGetEntryVal(de
);
9111 initStaticStringObject(key
,keystr
);
9112 /* If the value for this key is swapped, load a preview in memory.
9113 * We use a "swapped" flag to remember if we need to free the
9114 * value object instead to just increment the ref count anyway
9115 * in order to avoid copy-on-write of pages if we are forked() */
9116 if (!server
.vm_enabled
|| o
->storage
== REDIS_VM_MEMORY
||
9117 o
->storage
== REDIS_VM_SWAPPING
) {
9120 o
= vmPreviewObject(o
);
9123 expiretime
= getExpire(db
,&key
);
9125 /* Save the key and associated value */
9126 if (o
->type
== REDIS_STRING
) {
9127 /* Emit a SET command */
9128 char cmd
[]="*3\r\n$3\r\nSET\r\n";
9129 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
9131 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
9132 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
9133 } else if (o
->type
== REDIS_LIST
) {
9134 /* Emit the RPUSHes needed to rebuild the list */
9135 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
9136 if (o
->encoding
== REDIS_ENCODING_ZIPLIST
) {
9137 unsigned char *zl
= o
->ptr
;
9138 unsigned char *p
= ziplistIndex(zl
,0);
9139 unsigned char *vstr
;
9143 while(ziplistGet(p
,&vstr
,&vlen
,&vlong
)) {
9144 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
9145 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
9147 if (fwriteBulkString(fp
,(char*)vstr
,vlen
) == 0)
9150 if (fwriteBulkLongLong(fp
,vlong
) == 0)
9153 p
= ziplistNext(zl
,p
);
9155 } else if (o
->encoding
== REDIS_ENCODING_LIST
) {
9156 list
*list
= o
->ptr
;
9160 listRewind(list
,&li
);
9161 while((ln
= listNext(&li
))) {
9162 robj
*eleobj
= listNodeValue(ln
);
9164 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
9165 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
9166 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
9169 redisPanic("Unknown list encoding");
9171 } else if (o
->type
== REDIS_SET
) {
9172 /* Emit the SADDs needed to rebuild the set */
9174 dictIterator
*di
= dictGetIterator(set
);
9177 while((de
= dictNext(di
)) != NULL
) {
9178 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
9179 robj
*eleobj
= dictGetEntryKey(de
);
9181 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
9182 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
9183 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
9185 dictReleaseIterator(di
);
9186 } else if (o
->type
== REDIS_ZSET
) {
9187 /* Emit the ZADDs needed to rebuild the sorted set */
9189 dictIterator
*di
= dictGetIterator(zs
->dict
);
9192 while((de
= dictNext(di
)) != NULL
) {
9193 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
9194 robj
*eleobj
= dictGetEntryKey(de
);
9195 double *score
= dictGetEntryVal(de
);
9197 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
9198 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
9199 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
9200 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
9202 dictReleaseIterator(di
);
9203 } else if (o
->type
== REDIS_HASH
) {
9204 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
9206 /* Emit the HSETs needed to rebuild the hash */
9207 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
9208 unsigned char *p
= zipmapRewind(o
->ptr
);
9209 unsigned char *field
, *val
;
9210 unsigned int flen
, vlen
;
9212 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
9213 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
9214 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
9215 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
9217 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
9221 dictIterator
*di
= dictGetIterator(o
->ptr
);
9224 while((de
= dictNext(di
)) != NULL
) {
9225 robj
*field
= dictGetEntryKey(de
);
9226 robj
*val
= dictGetEntryVal(de
);
9228 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
9229 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
9230 if (fwriteBulkObject(fp
,field
) == -1) return -1;
9231 if (fwriteBulkObject(fp
,val
) == -1) return -1;
9233 dictReleaseIterator(di
);
9236 redisPanic("Unknown object type");
9238 /* Save the expire time */
9239 if (expiretime
!= -1) {
9240 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
9241 /* If this key is already expired skip it */
9242 if (expiretime
< now
) continue;
9243 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
9244 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
9245 if (fwriteBulkLongLong(fp
,expiretime
) == 0) goto werr
;
9247 if (swapped
) decrRefCount(o
);
9249 dictReleaseIterator(di
);
9252 /* Make sure data will not remain on the OS's output buffers */
9254 aof_fsync(fileno(fp
));
9257 /* Use RENAME to make sure the DB file is changed atomically only
9258 * if the generate DB file is ok. */
9259 if (rename(tmpfile
,filename
) == -1) {
9260 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
9264 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
9270 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
9271 if (di
) dictReleaseIterator(di
);
9275 /* This is how rewriting of the append only file in background works:
9277 * 1) The user calls BGREWRITEAOF
9278 * 2) Redis calls this function, that forks():
9279 * 2a) the child rewrite the append only file in a temp file.
9280 * 2b) the parent accumulates differences in server.bgrewritebuf.
9281 * 3) When the child finished '2a' exists.
9282 * 4) The parent will trap the exit code, if it's OK, will append the
9283 * data accumulated into server.bgrewritebuf into the temp file, and
9284 * finally will rename(2) the temp file in the actual file name.
9285 * The the new file is reopened as the new append only file. Profit!
9287 static int rewriteAppendOnlyFileBackground(void) {
9290 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
9291 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
9292 if ((childpid
= fork()) == 0) {
9296 if (server
.vm_enabled
) vmReopenSwapFile();
9298 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
9299 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
9306 if (childpid
== -1) {
9307 redisLog(REDIS_WARNING
,
9308 "Can't rewrite append only file in background: fork: %s",
9312 redisLog(REDIS_NOTICE
,
9313 "Background append only file rewriting started by pid %d",childpid
);
9314 server
.bgrewritechildpid
= childpid
;
9315 updateDictResizePolicy();
9316 /* We set appendseldb to -1 in order to force the next call to the
9317 * feedAppendOnlyFile() to issue a SELECT command, so the differences
9318 * accumulated by the parent into server.bgrewritebuf will start
9319 * with a SELECT statement and it will be safe to merge. */
9320 server
.appendseldb
= -1;
9323 return REDIS_OK
; /* unreached */
9326 static void bgrewriteaofCommand(redisClient
*c
) {
9327 if (server
.bgrewritechildpid
!= -1) {
9328 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
9331 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
9332 char *status
= "+Background append only file rewriting started\r\n";
9333 addReplySds(c
,sdsnew(status
));
9335 addReply(c
,shared
.err
);
9339 static void aofRemoveTempFile(pid_t childpid
) {
9342 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
9346 /* Virtual Memory is composed mainly of two subsystems:
9347 * - Blocking Virutal Memory
9348 * - Threaded Virtual Memory I/O
9349 * The two parts are not fully decoupled, but functions are split among two
9350 * different sections of the source code (delimited by comments) in order to
9351 * make more clear what functionality is about the blocking VM and what about
9352 * the threaded (not blocking) VM.
9356 * Redis VM is a blocking VM (one that blocks reading swapped values from
9357 * disk into memory when a value swapped out is needed in memory) that is made
9358 * unblocking by trying to examine the command argument vector in order to
9359 * load in background values that will likely be needed in order to exec
9360 * the command. The command is executed only once all the relevant keys
9361 * are loaded into memory.
9363 * This basically is almost as simple of a blocking VM, but almost as parallel
9364 * as a fully non-blocking VM.
9367 /* =================== Virtual Memory - Blocking Side ====================== */
9369 /* Create a VM pointer object. This kind of objects are used in place of
9370 * values in the key -> value hash table, for swapped out objects. */
9371 static vmpointer
*createVmPointer(int vtype
) {
9372 vmpointer
*vp
= zmalloc(sizeof(vmpointer
));
9374 vp
->type
= REDIS_VMPOINTER
;
9375 vp
->storage
= REDIS_VM_SWAPPED
;
9380 static void vmInit(void) {
9386 if (server
.vm_max_threads
!= 0)
9387 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
9389 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
9390 /* Try to open the old swap file, otherwise create it */
9391 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
9392 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
9394 if (server
.vm_fp
== NULL
) {
9395 redisLog(REDIS_WARNING
,
9396 "Can't open the swap file: %s. Exiting.",
9400 server
.vm_fd
= fileno(server
.vm_fp
);
9401 /* Lock the swap file for writing, this is useful in order to avoid
9402 * another instance to use the same swap file for a config error. */
9403 fl
.l_type
= F_WRLCK
;
9404 fl
.l_whence
= SEEK_SET
;
9405 fl
.l_start
= fl
.l_len
= 0;
9406 if (fcntl(server
.vm_fd
,F_SETLK
,&fl
) == -1) {
9407 redisLog(REDIS_WARNING
,
9408 "Can't lock the swap file at '%s': %s. Make sure it is not used by another Redis instance.", server
.vm_swap_file
, strerror(errno
));
9412 server
.vm_next_page
= 0;
9413 server
.vm_near_pages
= 0;
9414 server
.vm_stats_used_pages
= 0;
9415 server
.vm_stats_swapped_objects
= 0;
9416 server
.vm_stats_swapouts
= 0;
9417 server
.vm_stats_swapins
= 0;
9418 totsize
= server
.vm_pages
*server
.vm_page_size
;
9419 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
9420 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
9421 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
9425 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
9427 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
9428 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
9429 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
9430 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
9432 /* Initialize threaded I/O (used by Virtual Memory) */
9433 server
.io_newjobs
= listCreate();
9434 server
.io_processing
= listCreate();
9435 server
.io_processed
= listCreate();
9436 server
.io_ready_clients
= listCreate();
9437 pthread_mutex_init(&server
.io_mutex
,NULL
);
9438 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
9439 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
9440 server
.io_active_threads
= 0;
9441 if (pipe(pipefds
) == -1) {
9442 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
9446 server
.io_ready_pipe_read
= pipefds
[0];
9447 server
.io_ready_pipe_write
= pipefds
[1];
9448 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
9449 /* LZF requires a lot of stack */
9450 pthread_attr_init(&server
.io_threads_attr
);
9451 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
9452 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
9453 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
9454 /* Listen for events in the threaded I/O pipe */
9455 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
9456 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
9457 oom("creating file event");
9460 /* Mark the page as used */
9461 static void vmMarkPageUsed(off_t page
) {
9462 off_t byte
= page
/8;
9464 redisAssert(vmFreePage(page
) == 1);
9465 server
.vm_bitmap
[byte
] |= 1<<bit
;
9468 /* Mark N contiguous pages as used, with 'page' being the first. */
9469 static void vmMarkPagesUsed(off_t page
, off_t count
) {
9472 for (j
= 0; j
< count
; j
++)
9473 vmMarkPageUsed(page
+j
);
9474 server
.vm_stats_used_pages
+= count
;
9475 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
9476 (long long)count
, (long long)page
);
9479 /* Mark the page as free */
9480 static void vmMarkPageFree(off_t page
) {
9481 off_t byte
= page
/8;
9483 redisAssert(vmFreePage(page
) == 0);
9484 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
9487 /* Mark N contiguous pages as free, with 'page' being the first. */
9488 static void vmMarkPagesFree(off_t page
, off_t count
) {
9491 for (j
= 0; j
< count
; j
++)
9492 vmMarkPageFree(page
+j
);
9493 server
.vm_stats_used_pages
-= count
;
9494 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
9495 (long long)count
, (long long)page
);
9498 /* Test if the page is free */
9499 static int vmFreePage(off_t page
) {
9500 off_t byte
= page
/8;
9502 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
9505 /* Find N contiguous free pages storing the first page of the cluster in *first.
9506 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
9507 * REDIS_ERR is returned.
9509 * This function uses a simple algorithm: we try to allocate
9510 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
9511 * again from the start of the swap file searching for free spaces.
9513 * If it looks pretty clear that there are no free pages near our offset
9514 * we try to find less populated places doing a forward jump of
9515 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
9516 * without hurry, and then we jump again and so forth...
9518 * This function can be improved using a free list to avoid to guess
9519 * too much, since we could collect data about freed pages.
9521 * note: I implemented this function just after watching an episode of
9522 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
9524 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
9525 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
9527 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
9528 server
.vm_near_pages
= 0;
9529 server
.vm_next_page
= 0;
9531 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
9532 base
= server
.vm_next_page
;
9534 while(offset
< server
.vm_pages
) {
9535 off_t
this = base
+offset
;
9537 /* If we overflow, restart from page zero */
9538 if (this >= server
.vm_pages
) {
9539 this -= server
.vm_pages
;
9541 /* Just overflowed, what we found on tail is no longer
9542 * interesting, as it's no longer contiguous. */
9546 if (vmFreePage(this)) {
9547 /* This is a free page */
9549 /* Already got N free pages? Return to the caller, with success */
9551 *first
= this-(n
-1);
9552 server
.vm_next_page
= this+1;
9553 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
9557 /* The current one is not a free page */
9561 /* Fast-forward if the current page is not free and we already
9562 * searched enough near this place. */
9564 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
9565 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
9567 /* Note that even if we rewind after the jump, we are don't need
9568 * to make sure numfree is set to zero as we only jump *if* it
9569 * is set to zero. */
9571 /* Otherwise just check the next page */
9578 /* Write the specified object at the specified page of the swap file */
9579 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
9580 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
9581 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
9582 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9583 redisLog(REDIS_WARNING
,
9584 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
9588 rdbSaveObject(server
.vm_fp
,o
);
9589 fflush(server
.vm_fp
);
9590 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9594 /* Transfers the 'val' object to disk. Store all the information
9595 * a 'vmpointer' object containing all the information needed to load the
9596 * object back later is returned.
9598 * If we can't find enough contiguous empty pages to swap the object on disk
9599 * NULL is returned. */
9600 static vmpointer
*vmSwapObjectBlocking(robj
*val
) {
9601 off_t pages
= rdbSavedObjectPages(val
,NULL
);
9605 assert(val
->storage
== REDIS_VM_MEMORY
);
9606 assert(val
->refcount
== 1);
9607 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return NULL
;
9608 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return NULL
;
9610 vp
= createVmPointer(val
->type
);
9612 vp
->usedpages
= pages
;
9613 decrRefCount(val
); /* Deallocate the object from memory. */
9614 vmMarkPagesUsed(page
,pages
);
9615 redisLog(REDIS_DEBUG
,"VM: object %p swapped out at %lld (%lld pages)",
9617 (unsigned long long) page
, (unsigned long long) pages
);
9618 server
.vm_stats_swapped_objects
++;
9619 server
.vm_stats_swapouts
++;
9623 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
9626 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
9627 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
9628 redisLog(REDIS_WARNING
,
9629 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
9633 o
= rdbLoadObject(type
,server
.vm_fp
);
9635 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
9638 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9642 /* Load the specified object from swap to memory.
9643 * The newly allocated object is returned.
9645 * If preview is true the unserialized object is returned to the caller but
9646 * the pages are not marked as freed, nor the vp object is freed. */
9647 static robj
*vmGenericLoadObject(vmpointer
*vp
, int preview
) {
9650 redisAssert(vp
->type
== REDIS_VMPOINTER
&&
9651 (vp
->storage
== REDIS_VM_SWAPPED
|| vp
->storage
== REDIS_VM_LOADING
));
9652 val
= vmReadObjectFromSwap(vp
->page
,vp
->vtype
);
9654 redisLog(REDIS_DEBUG
, "VM: object %p loaded from disk", (void*)vp
);
9655 vmMarkPagesFree(vp
->page
,vp
->usedpages
);
9657 server
.vm_stats_swapped_objects
--;
9659 redisLog(REDIS_DEBUG
, "VM: object %p previewed from disk", (void*)vp
);
9661 server
.vm_stats_swapins
++;
9665 /* Plain object loading, from swap to memory.
9667 * 'o' is actually a redisVmPointer structure that will be freed by the call.
9668 * The return value is the loaded object. */
9669 static robj
*vmLoadObject(robj
*o
) {
9670 /* If we are loading the object in background, stop it, we
9671 * need to load this object synchronously ASAP. */
9672 if (o
->storage
== REDIS_VM_LOADING
)
9673 vmCancelThreadedIOJob(o
);
9674 return vmGenericLoadObject((vmpointer
*)o
,0);
9677 /* Just load the value on disk, without to modify the key.
9678 * This is useful when we want to perform some operation on the value
9679 * without to really bring it from swap to memory, like while saving the
9680 * dataset or rewriting the append only log. */
9681 static robj
*vmPreviewObject(robj
*o
) {
9682 return vmGenericLoadObject((vmpointer
*)o
,1);
9685 /* How a good candidate is this object for swapping?
9686 * The better candidate it is, the greater the returned value.
9688 * Currently we try to perform a fast estimation of the object size in
9689 * memory, and combine it with aging informations.
9691 * Basically swappability = idle-time * log(estimated size)
9693 * Bigger objects are preferred over smaller objects, but not
9694 * proportionally, this is why we use the logarithm. This algorithm is
9695 * just a first try and will probably be tuned later. */
9696 static double computeObjectSwappability(robj
*o
) {
9697 /* actual age can be >= minage, but not < minage. As we use wrapping
9698 * 21 bit clocks with minutes resolution for the LRU. */
9699 time_t minage
= abs(server
.lruclock
- o
->lru
);
9700 long asize
= 0, elesize
;
9705 struct dictEntry
*de
;
9708 if (minage
<= 0) return 0;
9711 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
9714 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
9718 if (o
->encoding
== REDIS_ENCODING_ZIPLIST
) {
9719 asize
= sizeof(*o
)+ziplistSize(o
->ptr
);
9723 asize
= sizeof(list
);
9726 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9727 (sizeof(*o
)+sdslen(ele
->ptr
)) : sizeof(*o
);
9728 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
9734 z
= (o
->type
== REDIS_ZSET
);
9735 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
9737 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
9738 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
9740 de
= dictGetRandomKey(d
);
9741 ele
= dictGetEntryKey(de
);
9742 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9743 (sizeof(*o
)+sdslen(ele
->ptr
)) : sizeof(*o
);
9744 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
9745 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
9749 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
9750 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
9751 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
9752 unsigned int klen
, vlen
;
9753 unsigned char *key
, *val
;
9755 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
9759 asize
= len
*(klen
+vlen
+3);
9760 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
9762 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
9764 de
= dictGetRandomKey(d
);
9765 ele
= dictGetEntryKey(de
);
9766 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9767 (sizeof(*o
)+sdslen(ele
->ptr
)) : sizeof(*o
);
9768 ele
= dictGetEntryVal(de
);
9769 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9770 (sizeof(*o
)+sdslen(ele
->ptr
)) : sizeof(*o
);
9771 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
9776 return (double)minage
*log(1+asize
);
9779 /* Try to swap an object that's a good candidate for swapping.
9780 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
9781 * to swap any object at all.
9783 * If 'usethreaded' is true, Redis will try to swap the object in background
9784 * using I/O threads. */
9785 static int vmSwapOneObject(int usethreads
) {
9787 struct dictEntry
*best
= NULL
;
9788 double best_swappability
= 0;
9789 redisDb
*best_db
= NULL
;
9793 for (j
= 0; j
< server
.dbnum
; j
++) {
9794 redisDb
*db
= server
.db
+j
;
9795 /* Why maxtries is set to 100?
9796 * Because this way (usually) we'll find 1 object even if just 1% - 2%
9797 * are swappable objects */
9800 if (dictSize(db
->dict
) == 0) continue;
9801 for (i
= 0; i
< 5; i
++) {
9803 double swappability
;
9805 if (maxtries
) maxtries
--;
9806 de
= dictGetRandomKey(db
->dict
);
9807 val
= dictGetEntryVal(de
);
9808 /* Only swap objects that are currently in memory.
9810 * Also don't swap shared objects: not a good idea in general and
9811 * we need to ensure that the main thread does not touch the
9812 * object while the I/O thread is using it, but we can't
9813 * control other keys without adding additional mutex. */
9814 if (val
->storage
!= REDIS_VM_MEMORY
|| val
->refcount
!= 1) {
9815 if (maxtries
) i
--; /* don't count this try */
9818 swappability
= computeObjectSwappability(val
);
9819 if (!best
|| swappability
> best_swappability
) {
9821 best_swappability
= swappability
;
9826 if (best
== NULL
) return REDIS_ERR
;
9827 key
= dictGetEntryKey(best
);
9828 val
= dictGetEntryVal(best
);
9830 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
9831 key
, best_swappability
);
9835 robj
*keyobj
= createStringObject(key
,sdslen(key
));
9836 vmSwapObjectThreaded(keyobj
,val
,best_db
);
9837 decrRefCount(keyobj
);
9842 if ((vp
= vmSwapObjectBlocking(val
)) != NULL
) {
9843 dictGetEntryVal(best
) = vp
;
9851 static int vmSwapOneObjectBlocking() {
9852 return vmSwapOneObject(0);
9855 static int vmSwapOneObjectThreaded() {
9856 return vmSwapOneObject(1);
9859 /* Return true if it's safe to swap out objects in a given moment.
9860 * Basically we don't want to swap objects out while there is a BGSAVE
9861 * or a BGAEOREWRITE running in backgroud. */
9862 static int vmCanSwapOut(void) {
9863 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
9866 /* =================== Virtual Memory - Threaded I/O ======================= */
9868 static void freeIOJob(iojob
*j
) {
9869 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
9870 j
->type
== REDIS_IOJOB_DO_SWAP
||
9871 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
9873 /* we fix the storage type, otherwise decrRefCount() will try to
9874 * kill the I/O thread Job (that does no longer exists). */
9875 if (j
->val
->storage
== REDIS_VM_SWAPPING
)
9876 j
->val
->storage
= REDIS_VM_MEMORY
;
9877 decrRefCount(j
->val
);
9879 decrRefCount(j
->key
);
9883 /* Every time a thread finished a Job, it writes a byte into the write side
9884 * of an unix pipe in order to "awake" the main thread, and this function
9886 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
9890 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
9892 REDIS_NOTUSED(mask
);
9893 REDIS_NOTUSED(privdata
);
9895 /* For every byte we read in the read side of the pipe, there is one
9896 * I/O job completed to process. */
9897 while((retval
= read(fd
,buf
,1)) == 1) {
9900 struct dictEntry
*de
;
9902 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
9904 /* Get the processed element (the oldest one) */
9906 assert(listLength(server
.io_processed
) != 0);
9907 if (toprocess
== -1) {
9908 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
9909 if (toprocess
<= 0) toprocess
= 1;
9911 ln
= listFirst(server
.io_processed
);
9913 listDelNode(server
.io_processed
,ln
);
9915 /* If this job is marked as canceled, just ignore it */
9920 /* Post process it in the main thread, as there are things we
9921 * can do just here to avoid race conditions and/or invasive locks */
9922 redisLog(REDIS_DEBUG
,"COMPLETED Job type: %d, ID %p, key: %s", j
->type
, (void*)j
->id
, (unsigned char*)j
->key
->ptr
);
9923 de
= dictFind(j
->db
->dict
,j
->key
->ptr
);
9924 redisAssert(de
!= NULL
);
9925 if (j
->type
== REDIS_IOJOB_LOAD
) {
9927 vmpointer
*vp
= dictGetEntryVal(de
);
9929 /* Key loaded, bring it at home */
9930 vmMarkPagesFree(vp
->page
,vp
->usedpages
);
9931 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
9932 (unsigned char*) j
->key
->ptr
);
9933 server
.vm_stats_swapped_objects
--;
9934 server
.vm_stats_swapins
++;
9935 dictGetEntryVal(de
) = j
->val
;
9936 incrRefCount(j
->val
);
9938 /* Handle clients waiting for this key to be loaded. */
9939 handleClientsBlockedOnSwappedKey(db
,j
->key
);
9942 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9943 /* Now we know the amount of pages required to swap this object.
9944 * Let's find some space for it, and queue this task again
9945 * rebranded as REDIS_IOJOB_DO_SWAP. */
9946 if (!vmCanSwapOut() ||
9947 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
9949 /* Ooops... no space or we can't swap as there is
9950 * a fork()ed Redis trying to save stuff on disk. */
9951 j
->val
->storage
= REDIS_VM_MEMORY
; /* undo operation */
9954 /* Note that we need to mark this pages as used now,
9955 * if the job will be canceled, we'll mark them as freed
9957 vmMarkPagesUsed(j
->page
,j
->pages
);
9958 j
->type
= REDIS_IOJOB_DO_SWAP
;
9963 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9966 /* Key swapped. We can finally free some memory. */
9967 if (j
->val
->storage
!= REDIS_VM_SWAPPING
) {
9968 vmpointer
*vp
= (vmpointer
*) j
->id
;
9969 printf("storage: %d\n",vp
->storage
);
9970 printf("key->name: %s\n",(char*)j
->key
->ptr
);
9971 printf("val: %p\n",(void*)j
->val
);
9972 printf("val->type: %d\n",j
->val
->type
);
9973 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
9975 redisAssert(j
->val
->storage
== REDIS_VM_SWAPPING
);
9976 vp
= createVmPointer(j
->val
->type
);
9978 vp
->usedpages
= j
->pages
;
9979 dictGetEntryVal(de
) = vp
;
9980 /* Fix the storage otherwise decrRefCount will attempt to
9981 * remove the associated I/O job */
9982 j
->val
->storage
= REDIS_VM_MEMORY
;
9983 decrRefCount(j
->val
);
9984 redisLog(REDIS_DEBUG
,
9985 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
9986 (unsigned char*) j
->key
->ptr
,
9987 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
9988 server
.vm_stats_swapped_objects
++;
9989 server
.vm_stats_swapouts
++;
9991 /* Put a few more swap requests in queue if we are still
9993 if (trytoswap
&& vmCanSwapOut() &&
9994 zmalloc_used_memory() > server
.vm_max_memory
)
9999 more
= listLength(server
.io_newjobs
) <
10000 (unsigned) server
.vm_max_threads
;
10001 unlockThreadedIO();
10002 /* Don't waste CPU time if swappable objects are rare. */
10003 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
10011 if (processed
== toprocess
) return;
10013 if (retval
< 0 && errno
!= EAGAIN
) {
10014 redisLog(REDIS_WARNING
,
10015 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
10020 static void lockThreadedIO(void) {
10021 pthread_mutex_lock(&server
.io_mutex
);
10024 static void unlockThreadedIO(void) {
10025 pthread_mutex_unlock(&server
.io_mutex
);
10028 /* Remove the specified object from the threaded I/O queue if still not
10029 * processed, otherwise make sure to flag it as canceled. */
10030 static void vmCancelThreadedIOJob(robj
*o
) {
10032 server
.io_newjobs
, /* 0 */
10033 server
.io_processing
, /* 1 */
10034 server
.io_processed
/* 2 */
10038 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
10041 /* Search for a matching object in one of the queues */
10042 for (i
= 0; i
< 3; i
++) {
10046 listRewind(lists
[i
],&li
);
10047 while ((ln
= listNext(&li
)) != NULL
) {
10048 iojob
*job
= ln
->value
;
10050 if (job
->canceled
) continue; /* Skip this, already canceled. */
10051 if (job
->id
== o
) {
10052 redisLog(REDIS_DEBUG
,"*** CANCELED %p (key %s) (type %d) (LIST ID %d)\n",
10053 (void*)job
, (char*)job
->key
->ptr
, job
->type
, i
);
10054 /* Mark the pages as free since the swap didn't happened
10055 * or happened but is now discarded. */
10056 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
10057 vmMarkPagesFree(job
->page
,job
->pages
);
10058 /* Cancel the job. It depends on the list the job is
10061 case 0: /* io_newjobs */
10062 /* If the job was yet not processed the best thing to do
10063 * is to remove it from the queue at all */
10065 listDelNode(lists
[i
],ln
);
10067 case 1: /* io_processing */
10068 /* Oh Shi- the thread is messing with the Job:
10070 * Probably it's accessing the object if this is a
10071 * PREPARE_SWAP or DO_SWAP job.
10072 * If it's a LOAD job it may be reading from disk and
10073 * if we don't wait for the job to terminate before to
10074 * cancel it, maybe in a few microseconds data can be
10075 * corrupted in this pages. So the short story is:
10077 * Better to wait for the job to move into the
10078 * next queue (processed)... */
10080 /* We try again and again until the job is completed. */
10081 unlockThreadedIO();
10082 /* But let's wait some time for the I/O thread
10083 * to finish with this job. After all this condition
10084 * should be very rare. */
10087 case 2: /* io_processed */
10088 /* The job was already processed, that's easy...
10089 * just mark it as canceled so that we'll ignore it
10090 * when processing completed jobs. */
10094 /* Finally we have to adjust the storage type of the object
10095 * in order to "UNDO" the operaiton. */
10096 if (o
->storage
== REDIS_VM_LOADING
)
10097 o
->storage
= REDIS_VM_SWAPPED
;
10098 else if (o
->storage
== REDIS_VM_SWAPPING
)
10099 o
->storage
= REDIS_VM_MEMORY
;
10100 unlockThreadedIO();
10101 redisLog(REDIS_DEBUG
,"*** DONE");
10106 unlockThreadedIO();
10107 printf("Not found: %p\n", (void*)o
);
10108 redisAssert(1 != 1); /* We should never reach this */
10111 static void *IOThreadEntryPoint(void *arg
) {
10114 REDIS_NOTUSED(arg
);
10116 pthread_detach(pthread_self());
10118 /* Get a new job to process */
10120 if (listLength(server
.io_newjobs
) == 0) {
10121 /* No new jobs in queue, exit. */
10122 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
10123 (long) pthread_self());
10124 server
.io_active_threads
--;
10125 unlockThreadedIO();
10128 ln
= listFirst(server
.io_newjobs
);
10130 listDelNode(server
.io_newjobs
,ln
);
10131 /* Add the job in the processing queue */
10132 j
->thread
= pthread_self();
10133 listAddNodeTail(server
.io_processing
,j
);
10134 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
10135 unlockThreadedIO();
10136 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
10137 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
10139 /* Process the Job */
10140 if (j
->type
== REDIS_IOJOB_LOAD
) {
10141 vmpointer
*vp
= (vmpointer
*)j
->id
;
10142 j
->val
= vmReadObjectFromSwap(j
->page
,vp
->vtype
);
10143 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
10144 FILE *fp
= fopen("/dev/null","w+");
10145 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
10147 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
10148 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
10152 /* Done: insert the job into the processed queue */
10153 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
10154 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
10156 listDelNode(server
.io_processing
,ln
);
10157 listAddNodeTail(server
.io_processed
,j
);
10158 unlockThreadedIO();
10160 /* Signal the main thread there is new stuff to process */
10161 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
10163 return NULL
; /* never reached */
10166 static void spawnIOThread(void) {
10168 sigset_t mask
, omask
;
10171 sigemptyset(&mask
);
10172 sigaddset(&mask
,SIGCHLD
);
10173 sigaddset(&mask
,SIGHUP
);
10174 sigaddset(&mask
,SIGPIPE
);
10175 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
10176 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
10177 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
10181 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
10182 server
.io_active_threads
++;
10185 /* We need to wait for the last thread to exit before we are able to
10186 * fork() in order to BGSAVE or BGREWRITEAOF. */
10187 static void waitEmptyIOJobsQueue(void) {
10189 int io_processed_len
;
10192 if (listLength(server
.io_newjobs
) == 0 &&
10193 listLength(server
.io_processing
) == 0 &&
10194 server
.io_active_threads
== 0)
10196 unlockThreadedIO();
10199 /* While waiting for empty jobs queue condition we post-process some
10200 * finshed job, as I/O threads may be hanging trying to write against
10201 * the io_ready_pipe_write FD but there are so much pending jobs that
10202 * it's blocking. */
10203 io_processed_len
= listLength(server
.io_processed
);
10204 unlockThreadedIO();
10205 if (io_processed_len
) {
10206 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
10207 usleep(1000); /* 1 millisecond */
10209 usleep(10000); /* 10 milliseconds */
10214 static void vmReopenSwapFile(void) {
10215 /* Note: we don't close the old one as we are in the child process
10216 * and don't want to mess at all with the original file object. */
10217 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
10218 if (server
.vm_fp
== NULL
) {
10219 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
10220 server
.vm_swap_file
);
10223 server
.vm_fd
= fileno(server
.vm_fp
);
10226 /* This function must be called while with threaded IO locked */
10227 static void queueIOJob(iojob
*j
) {
10228 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
10229 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
10230 listAddNodeTail(server
.io_newjobs
,j
);
10231 if (server
.io_active_threads
< server
.vm_max_threads
)
10235 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
10238 j
= zmalloc(sizeof(*j
));
10239 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
10243 j
->id
= j
->val
= val
;
10246 j
->thread
= (pthread_t
) -1;
10247 val
->storage
= REDIS_VM_SWAPPING
;
10251 unlockThreadedIO();
10255 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
10257 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
10258 * If there is not already a job loading the key, it is craeted.
10259 * The key is added to the io_keys list in the client structure, and also
10260 * in the hash table mapping swapped keys to waiting clients, that is,
10261 * server.io_waited_keys. */
10262 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
10263 struct dictEntry
*de
;
10267 /* If the key does not exist or is already in RAM we don't need to
10268 * block the client at all. */
10269 de
= dictFind(c
->db
->dict
,key
->ptr
);
10270 if (de
== NULL
) return 0;
10271 o
= dictGetEntryVal(de
);
10272 if (o
->storage
== REDIS_VM_MEMORY
) {
10274 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
10275 /* We were swapping the key, undo it! */
10276 vmCancelThreadedIOJob(o
);
10280 /* OK: the key is either swapped, or being loaded just now. */
10282 /* Add the key to the list of keys this client is waiting for.
10283 * This maps clients to keys they are waiting for. */
10284 listAddNodeTail(c
->io_keys
,key
);
10287 /* Add the client to the swapped keys => clients waiting map. */
10288 de
= dictFind(c
->db
->io_keys
,key
);
10292 /* For every key we take a list of clients blocked for it */
10294 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
10296 assert(retval
== DICT_OK
);
10298 l
= dictGetEntryVal(de
);
10300 listAddNodeTail(l
,c
);
10302 /* Are we already loading the key from disk? If not create a job */
10303 if (o
->storage
== REDIS_VM_SWAPPED
) {
10305 vmpointer
*vp
= (vmpointer
*)o
;
10307 o
->storage
= REDIS_VM_LOADING
;
10308 j
= zmalloc(sizeof(*j
));
10309 j
->type
= REDIS_IOJOB_LOAD
;
10314 j
->page
= vp
->page
;
10317 j
->thread
= (pthread_t
) -1;
10320 unlockThreadedIO();
10325 /* Preload keys for any command with first, last and step values for
10326 * the command keys prototype, as defined in the command table. */
10327 static void waitForMultipleSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
10329 if (cmd
->vm_firstkey
== 0) return;
10330 last
= cmd
->vm_lastkey
;
10331 if (last
< 0) last
= argc
+last
;
10332 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
) {
10333 redisAssert(j
< argc
);
10334 waitForSwappedKey(c
,argv
[j
]);
10338 /* Preload keys needed for the ZUNIONSTORE and ZINTERSTORE commands.
10339 * Note that the number of keys to preload is user-defined, so we need to
10340 * apply a sanity check against argc. */
10341 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
10343 REDIS_NOTUSED(cmd
);
10345 num
= atoi(argv
[2]->ptr
);
10346 if (num
> (argc
-3)) return;
10347 for (i
= 0; i
< num
; i
++) {
10348 waitForSwappedKey(c
,argv
[3+i
]);
10352 /* Preload keys needed to execute the entire MULTI/EXEC block.
10354 * This function is called by blockClientOnSwappedKeys when EXEC is issued,
10355 * and will block the client when any command requires a swapped out value. */
10356 static void execBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
10358 struct redisCommand
*mcmd
;
10360 REDIS_NOTUSED(cmd
);
10361 REDIS_NOTUSED(argc
);
10362 REDIS_NOTUSED(argv
);
10364 if (!(c
->flags
& REDIS_MULTI
)) return;
10365 for (i
= 0; i
< c
->mstate
.count
; i
++) {
10366 mcmd
= c
->mstate
.commands
[i
].cmd
;
10367 margc
= c
->mstate
.commands
[i
].argc
;
10368 margv
= c
->mstate
.commands
[i
].argv
;
10370 if (mcmd
->vm_preload_proc
!= NULL
) {
10371 mcmd
->vm_preload_proc(c
,mcmd
,margc
,margv
);
10373 waitForMultipleSwappedKeys(c
,mcmd
,margc
,margv
);
10378 /* Is this client attempting to run a command against swapped keys?
10379 * If so, block it ASAP, load the keys in background, then resume it.
10381 * The important idea about this function is that it can fail! If keys will
10382 * still be swapped when the client is resumed, this key lookups will
10383 * just block loading keys from disk. In practical terms this should only
10384 * happen with SORT BY command or if there is a bug in this function.
10386 * Return 1 if the client is marked as blocked, 0 if the client can
10387 * continue as the keys it is going to access appear to be in memory. */
10388 static int blockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
) {
10389 if (cmd
->vm_preload_proc
!= NULL
) {
10390 cmd
->vm_preload_proc(c
,cmd
,c
->argc
,c
->argv
);
10392 waitForMultipleSwappedKeys(c
,cmd
,c
->argc
,c
->argv
);
10395 /* If the client was blocked for at least one key, mark it as blocked. */
10396 if (listLength(c
->io_keys
)) {
10397 c
->flags
|= REDIS_IO_WAIT
;
10398 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
10399 server
.vm_blocked_clients
++;
10406 /* Remove the 'key' from the list of blocked keys for a given client.
10408 * The function returns 1 when there are no longer blocking keys after
10409 * the current one was removed (and the client can be unblocked). */
10410 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
10414 struct dictEntry
*de
;
10416 /* Remove the key from the list of keys this client is waiting for. */
10417 listRewind(c
->io_keys
,&li
);
10418 while ((ln
= listNext(&li
)) != NULL
) {
10419 if (equalStringObjects(ln
->value
,key
)) {
10420 listDelNode(c
->io_keys
,ln
);
10424 assert(ln
!= NULL
);
10426 /* Remove the client form the key => waiting clients map. */
10427 de
= dictFind(c
->db
->io_keys
,key
);
10428 assert(de
!= NULL
);
10429 l
= dictGetEntryVal(de
);
10430 ln
= listSearchKey(l
,c
);
10431 assert(ln
!= NULL
);
10433 if (listLength(l
) == 0)
10434 dictDelete(c
->db
->io_keys
,key
);
10436 return listLength(c
->io_keys
) == 0;
10439 /* Every time we now a key was loaded back in memory, we handle clients
10440 * waiting for this key if any. */
10441 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
10442 struct dictEntry
*de
;
10447 de
= dictFind(db
->io_keys
,key
);
10450 l
= dictGetEntryVal(de
);
10451 len
= listLength(l
);
10452 /* Note: we can't use something like while(listLength(l)) as the list
10453 * can be freed by the calling function when we remove the last element. */
10456 redisClient
*c
= ln
->value
;
10458 if (dontWaitForSwappedKey(c
,key
)) {
10459 /* Put the client in the list of clients ready to go as we
10460 * loaded all the keys about it. */
10461 listAddNodeTail(server
.io_ready_clients
,c
);
10466 /* =========================== Remote Configuration ========================= */
10468 static void configSetCommand(redisClient
*c
) {
10469 robj
*o
= getDecodedObject(c
->argv
[3]);
10472 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
10473 zfree(server
.dbfilename
);
10474 server
.dbfilename
= zstrdup(o
->ptr
);
10475 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
10476 zfree(server
.requirepass
);
10477 server
.requirepass
= zstrdup(o
->ptr
);
10478 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
10479 zfree(server
.masterauth
);
10480 server
.masterauth
= zstrdup(o
->ptr
);
10481 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
10482 if (getLongLongFromObject(o
,&ll
) == REDIS_ERR
||
10483 ll
< 0) goto badfmt
;
10484 server
.maxmemory
= ll
;
10485 } else if (!strcasecmp(c
->argv
[2]->ptr
,"timeout")) {
10486 if (getLongLongFromObject(o
,&ll
) == REDIS_ERR
||
10487 ll
< 0 || ll
> LONG_MAX
) goto badfmt
;
10488 server
.maxidletime
= ll
;
10489 } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendfsync")) {
10490 if (!strcasecmp(o
->ptr
,"no")) {
10491 server
.appendfsync
= APPENDFSYNC_NO
;
10492 } else if (!strcasecmp(o
->ptr
,"everysec")) {
10493 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
10494 } else if (!strcasecmp(o
->ptr
,"always")) {
10495 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
10499 } else if (!strcasecmp(c
->argv
[2]->ptr
,"no-appendfsync-on-rewrite")) {
10500 int yn
= yesnotoi(o
->ptr
);
10502 if (yn
== -1) goto badfmt
;
10503 server
.no_appendfsync_on_rewrite
= yn
;
10504 } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendonly")) {
10505 int old
= server
.appendonly
;
10506 int new = yesnotoi(o
->ptr
);
10508 if (new == -1) goto badfmt
;
10513 if (startAppendOnly() == REDIS_ERR
) {
10514 addReplySds(c
,sdscatprintf(sdsempty(),
10515 "-ERR Unable to turn on AOF. Check server logs.\r\n"));
10521 } else if (!strcasecmp(c
->argv
[2]->ptr
,"save")) {
10523 sds
*v
= sdssplitlen(o
->ptr
,sdslen(o
->ptr
)," ",1,&vlen
);
10525 /* Perform sanity check before setting the new config:
10526 * - Even number of args
10527 * - Seconds >= 1, changes >= 0 */
10529 sdsfreesplitres(v
,vlen
);
10532 for (j
= 0; j
< vlen
; j
++) {
10536 val
= strtoll(v
[j
], &eptr
, 10);
10537 if (eptr
[0] != '\0' ||
10538 ((j
& 1) == 0 && val
< 1) ||
10539 ((j
& 1) == 1 && val
< 0)) {
10540 sdsfreesplitres(v
,vlen
);
10544 /* Finally set the new config */
10545 resetServerSaveParams();
10546 for (j
= 0; j
< vlen
; j
+= 2) {
10550 seconds
= strtoll(v
[j
],NULL
,10);
10551 changes
= strtoll(v
[j
+1],NULL
,10);
10552 appendServerSaveParams(seconds
, changes
);
10554 sdsfreesplitres(v
,vlen
);
10556 addReplySds(c
,sdscatprintf(sdsempty(),
10557 "-ERR not supported CONFIG parameter %s\r\n",
10558 (char*)c
->argv
[2]->ptr
));
10563 addReply(c
,shared
.ok
);
10566 badfmt
: /* Bad format errors */
10567 addReplySds(c
,sdscatprintf(sdsempty(),
10568 "-ERR invalid argument '%s' for CONFIG SET '%s'\r\n",
10570 (char*)c
->argv
[2]->ptr
));
10574 static void configGetCommand(redisClient
*c
) {
10575 robj
*o
= getDecodedObject(c
->argv
[2]);
10576 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
10577 char *pattern
= o
->ptr
;
10580 addReply(c
,lenobj
);
10581 decrRefCount(lenobj
);
10583 if (stringmatch(pattern
,"dbfilename",0)) {
10584 addReplyBulkCString(c
,"dbfilename");
10585 addReplyBulkCString(c
,server
.dbfilename
);
10588 if (stringmatch(pattern
,"requirepass",0)) {
10589 addReplyBulkCString(c
,"requirepass");
10590 addReplyBulkCString(c
,server
.requirepass
);
10593 if (stringmatch(pattern
,"masterauth",0)) {
10594 addReplyBulkCString(c
,"masterauth");
10595 addReplyBulkCString(c
,server
.masterauth
);
10598 if (stringmatch(pattern
,"maxmemory",0)) {
10601 ll2string(buf
,128,server
.maxmemory
);
10602 addReplyBulkCString(c
,"maxmemory");
10603 addReplyBulkCString(c
,buf
);
10606 if (stringmatch(pattern
,"timeout",0)) {
10609 ll2string(buf
,128,server
.maxidletime
);
10610 addReplyBulkCString(c
,"timeout");
10611 addReplyBulkCString(c
,buf
);
10614 if (stringmatch(pattern
,"appendonly",0)) {
10615 addReplyBulkCString(c
,"appendonly");
10616 addReplyBulkCString(c
,server
.appendonly
? "yes" : "no");
10619 if (stringmatch(pattern
,"no-appendfsync-on-rewrite",0)) {
10620 addReplyBulkCString(c
,"no-appendfsync-on-rewrite");
10621 addReplyBulkCString(c
,server
.no_appendfsync_on_rewrite
? "yes" : "no");
10624 if (stringmatch(pattern
,"appendfsync",0)) {
10627 switch(server
.appendfsync
) {
10628 case APPENDFSYNC_NO
: policy
= "no"; break;
10629 case APPENDFSYNC_EVERYSEC
: policy
= "everysec"; break;
10630 case APPENDFSYNC_ALWAYS
: policy
= "always"; break;
10631 default: policy
= "unknown"; break; /* too harmless to panic */
10633 addReplyBulkCString(c
,"appendfsync");
10634 addReplyBulkCString(c
,policy
);
10637 if (stringmatch(pattern
,"save",0)) {
10638 sds buf
= sdsempty();
10641 for (j
= 0; j
< server
.saveparamslen
; j
++) {
10642 buf
= sdscatprintf(buf
,"%ld %d",
10643 server
.saveparams
[j
].seconds
,
10644 server
.saveparams
[j
].changes
);
10645 if (j
!= server
.saveparamslen
-1)
10646 buf
= sdscatlen(buf
," ",1);
10648 addReplyBulkCString(c
,"save");
10649 addReplyBulkCString(c
,buf
);
10654 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
10657 static void configCommand(redisClient
*c
) {
10658 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
10659 if (c
->argc
!= 4) goto badarity
;
10660 configSetCommand(c
);
10661 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
10662 if (c
->argc
!= 3) goto badarity
;
10663 configGetCommand(c
);
10664 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
10665 if (c
->argc
!= 2) goto badarity
;
10666 server
.stat_numcommands
= 0;
10667 server
.stat_numconnections
= 0;
10668 server
.stat_expiredkeys
= 0;
10669 server
.stat_starttime
= time(NULL
);
10670 addReply(c
,shared
.ok
);
10672 addReplySds(c
,sdscatprintf(sdsempty(),
10673 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
10678 addReplySds(c
,sdscatprintf(sdsempty(),
10679 "-ERR Wrong number of arguments for CONFIG %s\r\n",
10680 (char*) c
->argv
[1]->ptr
));
10683 /* =========================== Pubsub implementation ======================== */
10685 static void freePubsubPattern(void *p
) {
10686 pubsubPattern
*pat
= p
;
10688 decrRefCount(pat
->pattern
);
10692 static int listMatchPubsubPattern(void *a
, void *b
) {
10693 pubsubPattern
*pa
= a
, *pb
= b
;
10695 return (pa
->client
== pb
->client
) &&
10696 (equalStringObjects(pa
->pattern
,pb
->pattern
));
10699 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
10700 * 0 if the client was already subscribed to that channel. */
10701 static int pubsubSubscribeChannel(redisClient
*c
, robj
*channel
) {
10702 struct dictEntry
*de
;
10703 list
*clients
= NULL
;
10706 /* Add the channel to the client -> channels hash table */
10707 if (dictAdd(c
->pubsub_channels
,channel
,NULL
) == DICT_OK
) {
10709 incrRefCount(channel
);
10710 /* Add the client to the channel -> list of clients hash table */
10711 de
= dictFind(server
.pubsub_channels
,channel
);
10713 clients
= listCreate();
10714 dictAdd(server
.pubsub_channels
,channel
,clients
);
10715 incrRefCount(channel
);
10717 clients
= dictGetEntryVal(de
);
10719 listAddNodeTail(clients
,c
);
10721 /* Notify the client */
10722 addReply(c
,shared
.mbulk3
);
10723 addReply(c
,shared
.subscribebulk
);
10724 addReplyBulk(c
,channel
);
10725 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
10729 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
10730 * 0 if the client was not subscribed to the specified channel. */
10731 static int pubsubUnsubscribeChannel(redisClient
*c
, robj
*channel
, int notify
) {
10732 struct dictEntry
*de
;
10737 /* Remove the channel from the client -> channels hash table */
10738 incrRefCount(channel
); /* channel may be just a pointer to the same object
10739 we have in the hash tables. Protect it... */
10740 if (dictDelete(c
->pubsub_channels
,channel
) == DICT_OK
) {
10742 /* Remove the client from the channel -> clients list hash table */
10743 de
= dictFind(server
.pubsub_channels
,channel
);
10744 assert(de
!= NULL
);
10745 clients
= dictGetEntryVal(de
);
10746 ln
= listSearchKey(clients
,c
);
10747 assert(ln
!= NULL
);
10748 listDelNode(clients
,ln
);
10749 if (listLength(clients
) == 0) {
10750 /* Free the list and associated hash entry at all if this was
10751 * the latest client, so that it will be possible to abuse
10752 * Redis PUBSUB creating millions of channels. */
10753 dictDelete(server
.pubsub_channels
,channel
);
10756 /* Notify the client */
10758 addReply(c
,shared
.mbulk3
);
10759 addReply(c
,shared
.unsubscribebulk
);
10760 addReplyBulk(c
,channel
);
10761 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+
10762 listLength(c
->pubsub_patterns
));
10765 decrRefCount(channel
); /* it is finally safe to release it */
10769 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */
10770 static int pubsubSubscribePattern(redisClient
*c
, robj
*pattern
) {
10773 if (listSearchKey(c
->pubsub_patterns
,pattern
) == NULL
) {
10775 pubsubPattern
*pat
;
10776 listAddNodeTail(c
->pubsub_patterns
,pattern
);
10777 incrRefCount(pattern
);
10778 pat
= zmalloc(sizeof(*pat
));
10779 pat
->pattern
= getDecodedObject(pattern
);
10781 listAddNodeTail(server
.pubsub_patterns
,pat
);
10783 /* Notify the client */
10784 addReply(c
,shared
.mbulk3
);
10785 addReply(c
,shared
.psubscribebulk
);
10786 addReplyBulk(c
,pattern
);
10787 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
10791 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
10792 * 0 if the client was not subscribed to the specified channel. */
10793 static int pubsubUnsubscribePattern(redisClient
*c
, robj
*pattern
, int notify
) {
10798 incrRefCount(pattern
); /* Protect the object. May be the same we remove */
10799 if ((ln
= listSearchKey(c
->pubsub_patterns
,pattern
)) != NULL
) {
10801 listDelNode(c
->pubsub_patterns
,ln
);
10803 pat
.pattern
= pattern
;
10804 ln
= listSearchKey(server
.pubsub_patterns
,&pat
);
10805 listDelNode(server
.pubsub_patterns
,ln
);
10807 /* Notify the client */
10809 addReply(c
,shared
.mbulk3
);
10810 addReply(c
,shared
.punsubscribebulk
);
10811 addReplyBulk(c
,pattern
);
10812 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+
10813 listLength(c
->pubsub_patterns
));
10815 decrRefCount(pattern
);
10819 /* Unsubscribe from all the channels. Return the number of channels the
10820 * client was subscribed from. */
10821 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
) {
10822 dictIterator
*di
= dictGetIterator(c
->pubsub_channels
);
10826 while((de
= dictNext(di
)) != NULL
) {
10827 robj
*channel
= dictGetEntryKey(de
);
10829 count
+= pubsubUnsubscribeChannel(c
,channel
,notify
);
10831 dictReleaseIterator(di
);
10835 /* Unsubscribe from all the patterns. Return the number of patterns the
10836 * client was subscribed from. */
10837 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
) {
10842 listRewind(c
->pubsub_patterns
,&li
);
10843 while ((ln
= listNext(&li
)) != NULL
) {
10844 robj
*pattern
= ln
->value
;
10846 count
+= pubsubUnsubscribePattern(c
,pattern
,notify
);
10851 /* Publish a message */
10852 static int pubsubPublishMessage(robj
*channel
, robj
*message
) {
10854 struct dictEntry
*de
;
10858 /* Send to clients listening for that channel */
10859 de
= dictFind(server
.pubsub_channels
,channel
);
10861 list
*list
= dictGetEntryVal(de
);
10865 listRewind(list
,&li
);
10866 while ((ln
= listNext(&li
)) != NULL
) {
10867 redisClient
*c
= ln
->value
;
10869 addReply(c
,shared
.mbulk3
);
10870 addReply(c
,shared
.messagebulk
);
10871 addReplyBulk(c
,channel
);
10872 addReplyBulk(c
,message
);
10876 /* Send to clients listening to matching channels */
10877 if (listLength(server
.pubsub_patterns
)) {
10878 listRewind(server
.pubsub_patterns
,&li
);
10879 channel
= getDecodedObject(channel
);
10880 while ((ln
= listNext(&li
)) != NULL
) {
10881 pubsubPattern
*pat
= ln
->value
;
10883 if (stringmatchlen((char*)pat
->pattern
->ptr
,
10884 sdslen(pat
->pattern
->ptr
),
10885 (char*)channel
->ptr
,
10886 sdslen(channel
->ptr
),0)) {
10887 addReply(pat
->client
,shared
.mbulk4
);
10888 addReply(pat
->client
,shared
.pmessagebulk
);
10889 addReplyBulk(pat
->client
,pat
->pattern
);
10890 addReplyBulk(pat
->client
,channel
);
10891 addReplyBulk(pat
->client
,message
);
10895 decrRefCount(channel
);
10900 static void subscribeCommand(redisClient
*c
) {
10903 for (j
= 1; j
< c
->argc
; j
++)
10904 pubsubSubscribeChannel(c
,c
->argv
[j
]);
10907 static void unsubscribeCommand(redisClient
*c
) {
10908 if (c
->argc
== 1) {
10909 pubsubUnsubscribeAllChannels(c
,1);
10914 for (j
= 1; j
< c
->argc
; j
++)
10915 pubsubUnsubscribeChannel(c
,c
->argv
[j
],1);
10919 static void psubscribeCommand(redisClient
*c
) {
10922 for (j
= 1; j
< c
->argc
; j
++)
10923 pubsubSubscribePattern(c
,c
->argv
[j
]);
10926 static void punsubscribeCommand(redisClient
*c
) {
10927 if (c
->argc
== 1) {
10928 pubsubUnsubscribeAllPatterns(c
,1);
10933 for (j
= 1; j
< c
->argc
; j
++)
10934 pubsubUnsubscribePattern(c
,c
->argv
[j
],1);
10938 static void publishCommand(redisClient
*c
) {
10939 int receivers
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]);
10940 addReplyLongLong(c
,receivers
);
10943 /* ===================== WATCH (CAS alike for MULTI/EXEC) ===================
10945 * The implementation uses a per-DB hash table mapping keys to list of clients
10946 * WATCHing those keys, so that given a key that is going to be modified
10947 * we can mark all the associated clients as dirty.
10949 * Also every client contains a list of WATCHed keys so that's possible to
10950 * un-watch such keys when the client is freed or when UNWATCH is called. */
10952 /* In the client->watched_keys list we need to use watchedKey structures
10953 * as in order to identify a key in Redis we need both the key name and the
10955 typedef struct watchedKey
{
10960 /* Watch for the specified key */
10961 static void watchForKey(redisClient
*c
, robj
*key
) {
10962 list
*clients
= NULL
;
10967 /* Check if we are already watching for this key */
10968 listRewind(c
->watched_keys
,&li
);
10969 while((ln
= listNext(&li
))) {
10970 wk
= listNodeValue(ln
);
10971 if (wk
->db
== c
->db
&& equalStringObjects(key
,wk
->key
))
10972 return; /* Key already watched */
10974 /* This key is not already watched in this DB. Let's add it */
10975 clients
= dictFetchValue(c
->db
->watched_keys
,key
);
10977 clients
= listCreate();
10978 dictAdd(c
->db
->watched_keys
,key
,clients
);
10981 listAddNodeTail(clients
,c
);
10982 /* Add the new key to the lits of keys watched by this client */
10983 wk
= zmalloc(sizeof(*wk
));
10987 listAddNodeTail(c
->watched_keys
,wk
);
10990 /* Unwatch all the keys watched by this client. To clean the EXEC dirty
10991 * flag is up to the caller. */
10992 static void unwatchAllKeys(redisClient
*c
) {
10996 if (listLength(c
->watched_keys
) == 0) return;
10997 listRewind(c
->watched_keys
,&li
);
10998 while((ln
= listNext(&li
))) {
11002 /* Lookup the watched key -> clients list and remove the client
11004 wk
= listNodeValue(ln
);
11005 clients
= dictFetchValue(wk
->db
->watched_keys
, wk
->key
);
11006 assert(clients
!= NULL
);
11007 listDelNode(clients
,listSearchKey(clients
,c
));
11008 /* Kill the entry at all if this was the only client */
11009 if (listLength(clients
) == 0)
11010 dictDelete(wk
->db
->watched_keys
, wk
->key
);
11011 /* Remove this watched key from the client->watched list */
11012 listDelNode(c
->watched_keys
,ln
);
11013 decrRefCount(wk
->key
);
11018 /* "Touch" a key, so that if this key is being WATCHed by some client the
11019 * next EXEC will fail. */
11020 static void touchWatchedKey(redisDb
*db
, robj
*key
) {
11025 if (dictSize(db
->watched_keys
) == 0) return;
11026 clients
= dictFetchValue(db
->watched_keys
, key
);
11027 if (!clients
) return;
11029 /* Mark all the clients watching this key as REDIS_DIRTY_CAS */
11030 /* Check if we are already watching for this key */
11031 listRewind(clients
,&li
);
11032 while((ln
= listNext(&li
))) {
11033 redisClient
*c
= listNodeValue(ln
);
11035 c
->flags
|= REDIS_DIRTY_CAS
;
11039 /* On FLUSHDB or FLUSHALL all the watched keys that are present before the
11040 * flush but will be deleted as effect of the flushing operation should
11041 * be touched. "dbid" is the DB that's getting the flush. -1 if it is
11042 * a FLUSHALL operation (all the DBs flushed). */
11043 static void touchWatchedKeysOnFlush(int dbid
) {
11047 /* For every client, check all the waited keys */
11048 listRewind(server
.clients
,&li1
);
11049 while((ln
= listNext(&li1
))) {
11050 redisClient
*c
= listNodeValue(ln
);
11051 listRewind(c
->watched_keys
,&li2
);
11052 while((ln
= listNext(&li2
))) {
11053 watchedKey
*wk
= listNodeValue(ln
);
11055 /* For every watched key matching the specified DB, if the
11056 * key exists, mark the client as dirty, as the key will be
11058 if (dbid
== -1 || wk
->db
->id
== dbid
) {
11059 if (dictFind(wk
->db
->dict
, wk
->key
->ptr
) != NULL
)
11060 c
->flags
|= REDIS_DIRTY_CAS
;
11066 static void watchCommand(redisClient
*c
) {
11069 if (c
->flags
& REDIS_MULTI
) {
11070 addReplySds(c
,sdsnew("-ERR WATCH inside MULTI is not allowed\r\n"));
11073 for (j
= 1; j
< c
->argc
; j
++)
11074 watchForKey(c
,c
->argv
[j
]);
11075 addReply(c
,shared
.ok
);
11078 static void unwatchCommand(redisClient
*c
) {
11080 c
->flags
&= (~REDIS_DIRTY_CAS
);
11081 addReply(c
,shared
.ok
);
11084 /* ================================= Debugging ============================== */
11086 /* Compute the sha1 of string at 's' with 'len' bytes long.
11087 * The SHA1 is then xored againt the string pointed by digest.
11088 * Since xor is commutative, this operation is used in order to
11089 * "add" digests relative to unordered elements.
11091 * So digest(a,b,c,d) will be the same of digest(b,a,c,d) */
11092 static void xorDigest(unsigned char *digest
, void *ptr
, size_t len
) {
11094 unsigned char hash
[20], *s
= ptr
;
11098 SHA1Update(&ctx
,s
,len
);
11099 SHA1Final(hash
,&ctx
);
11101 for (j
= 0; j
< 20; j
++)
11102 digest
[j
] ^= hash
[j
];
11105 static void xorObjectDigest(unsigned char *digest
, robj
*o
) {
11106 o
= getDecodedObject(o
);
11107 xorDigest(digest
,o
->ptr
,sdslen(o
->ptr
));
11111 /* This function instead of just computing the SHA1 and xoring it
11112 * against diget, also perform the digest of "digest" itself and
11113 * replace the old value with the new one.
11115 * So the final digest will be:
11117 * digest = SHA1(digest xor SHA1(data))
11119 * This function is used every time we want to preserve the order so
11120 * that digest(a,b,c,d) will be different than digest(b,c,d,a)
11122 * Also note that mixdigest("foo") followed by mixdigest("bar")
11123 * will lead to a different digest compared to "fo", "obar".
11125 static void mixDigest(unsigned char *digest
, void *ptr
, size_t len
) {
11129 xorDigest(digest
,s
,len
);
11131 SHA1Update(&ctx
,digest
,20);
11132 SHA1Final(digest
,&ctx
);
11135 static void mixObjectDigest(unsigned char *digest
, robj
*o
) {
11136 o
= getDecodedObject(o
);
11137 mixDigest(digest
,o
->ptr
,sdslen(o
->ptr
));
11141 /* Compute the dataset digest. Since keys, sets elements, hashes elements
11142 * are not ordered, we use a trick: every aggregate digest is the xor
11143 * of the digests of their elements. This way the order will not change
11144 * the result. For list instead we use a feedback entering the output digest
11145 * as input in order to ensure that a different ordered list will result in
11146 * a different digest. */
11147 static void computeDatasetDigest(unsigned char *final
) {
11148 unsigned char digest
[20];
11150 dictIterator
*di
= NULL
;
11155 memset(final
,0,20); /* Start with a clean result */
11157 for (j
= 0; j
< server
.dbnum
; j
++) {
11158 redisDb
*db
= server
.db
+j
;
11160 if (dictSize(db
->dict
) == 0) continue;
11161 di
= dictGetIterator(db
->dict
);
11163 /* hash the DB id, so the same dataset moved in a different
11164 * DB will lead to a different digest */
11166 mixDigest(final
,&aux
,sizeof(aux
));
11168 /* Iterate this DB writing every entry */
11169 while((de
= dictNext(di
)) != NULL
) {
11174 memset(digest
,0,20); /* This key-val digest */
11175 key
= dictGetEntryKey(de
);
11176 keyobj
= createStringObject(key
,sdslen(key
));
11178 mixDigest(digest
,key
,sdslen(key
));
11180 /* Make sure the key is loaded if VM is active */
11181 o
= lookupKeyRead(db
,keyobj
);
11183 aux
= htonl(o
->type
);
11184 mixDigest(digest
,&aux
,sizeof(aux
));
11185 expiretime
= getExpire(db
,keyobj
);
11187 /* Save the key and associated value */
11188 if (o
->type
== REDIS_STRING
) {
11189 mixObjectDigest(digest
,o
);
11190 } else if (o
->type
== REDIS_LIST
) {
11191 listTypeIterator
*li
= listTypeInitIterator(o
,0,REDIS_TAIL
);
11192 listTypeEntry entry
;
11193 while(listTypeNext(li
,&entry
)) {
11194 robj
*eleobj
= listTypeGet(&entry
);
11195 mixObjectDigest(digest
,eleobj
);
11196 decrRefCount(eleobj
);
11198 listTypeReleaseIterator(li
);
11199 } else if (o
->type
== REDIS_SET
) {
11200 dict
*set
= o
->ptr
;
11201 dictIterator
*di
= dictGetIterator(set
);
11204 while((de
= dictNext(di
)) != NULL
) {
11205 robj
*eleobj
= dictGetEntryKey(de
);
11207 xorObjectDigest(digest
,eleobj
);
11209 dictReleaseIterator(di
);
11210 } else if (o
->type
== REDIS_ZSET
) {
11212 dictIterator
*di
= dictGetIterator(zs
->dict
);
11215 while((de
= dictNext(di
)) != NULL
) {
11216 robj
*eleobj
= dictGetEntryKey(de
);
11217 double *score
= dictGetEntryVal(de
);
11218 unsigned char eledigest
[20];
11220 snprintf(buf
,sizeof(buf
),"%.17g",*score
);
11221 memset(eledigest
,0,20);
11222 mixObjectDigest(eledigest
,eleobj
);
11223 mixDigest(eledigest
,buf
,strlen(buf
));
11224 xorDigest(digest
,eledigest
,20);
11226 dictReleaseIterator(di
);
11227 } else if (o
->type
== REDIS_HASH
) {
11228 hashTypeIterator
*hi
;
11231 hi
= hashTypeInitIterator(o
);
11232 while (hashTypeNext(hi
) != REDIS_ERR
) {
11233 unsigned char eledigest
[20];
11235 memset(eledigest
,0,20);
11236 obj
= hashTypeCurrent(hi
,REDIS_HASH_KEY
);
11237 mixObjectDigest(eledigest
,obj
);
11239 obj
= hashTypeCurrent(hi
,REDIS_HASH_VALUE
);
11240 mixObjectDigest(eledigest
,obj
);
11242 xorDigest(digest
,eledigest
,20);
11244 hashTypeReleaseIterator(hi
);
11246 redisPanic("Unknown object type");
11248 /* If the key has an expire, add it to the mix */
11249 if (expiretime
!= -1) xorDigest(digest
,"!!expire!!",10);
11250 /* We can finally xor the key-val digest to the final digest */
11251 xorDigest(final
,digest
,20);
11252 decrRefCount(keyobj
);
11254 dictReleaseIterator(di
);
11258 static void debugCommand(redisClient
*c
) {
11259 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
11260 *((char*)-1) = 'x';
11261 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
11262 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
11263 addReply(c
,shared
.err
);
11267 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
11268 addReply(c
,shared
.err
);
11271 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
11272 addReply(c
,shared
.ok
);
11273 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
11275 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
11276 addReply(c
,shared
.err
);
11279 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
11280 addReply(c
,shared
.ok
);
11281 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
11282 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]->ptr
);
11286 addReply(c
,shared
.nokeyerr
);
11289 val
= dictGetEntryVal(de
);
11290 if (!server
.vm_enabled
|| (val
->storage
== REDIS_VM_MEMORY
||
11291 val
->storage
== REDIS_VM_SWAPPING
)) {
11295 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
11296 strenc
= strencoding
[val
->encoding
];
11298 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
11301 addReplySds(c
,sdscatprintf(sdsempty(),
11302 "+Value at:%p refcount:%d "
11303 "encoding:%s serializedlength:%lld\r\n",
11304 (void*)val
, val
->refcount
,
11305 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
11307 vmpointer
*vp
= (vmpointer
*) val
;
11308 addReplySds(c
,sdscatprintf(sdsempty(),
11309 "+Value swapped at: page %llu "
11310 "using %llu pages\r\n",
11311 (unsigned long long) vp
->page
,
11312 (unsigned long long) vp
->usedpages
));
11314 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapin") && c
->argc
== 3) {
11315 lookupKeyRead(c
->db
,c
->argv
[2]);
11316 addReply(c
,shared
.ok
);
11317 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
11318 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]->ptr
);
11322 if (!server
.vm_enabled
) {
11323 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
11327 addReply(c
,shared
.nokeyerr
);
11330 val
= dictGetEntryVal(de
);
11332 if (val
->storage
!= REDIS_VM_MEMORY
) {
11333 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
11334 } else if (val
->refcount
!= 1) {
11335 addReplySds(c
,sdsnew("-ERR Object is shared\r\n"));
11336 } else if ((vp
= vmSwapObjectBlocking(val
)) != NULL
) {
11337 dictGetEntryVal(de
) = vp
;
11338 addReply(c
,shared
.ok
);
11340 addReply(c
,shared
.err
);
11342 } else if (!strcasecmp(c
->argv
[1]->ptr
,"populate") && c
->argc
== 3) {
11347 if (getLongFromObjectOrReply(c
, c
->argv
[2], &keys
, NULL
) != REDIS_OK
)
11349 for (j
= 0; j
< keys
; j
++) {
11350 snprintf(buf
,sizeof(buf
),"key:%lu",j
);
11351 key
= createStringObject(buf
,strlen(buf
));
11352 if (lookupKeyRead(c
->db
,key
) != NULL
) {
11356 snprintf(buf
,sizeof(buf
),"value:%lu",j
);
11357 val
= createStringObject(buf
,strlen(buf
));
11358 dbAdd(c
->db
,key
,val
);
11361 addReply(c
,shared
.ok
);
11362 } else if (!strcasecmp(c
->argv
[1]->ptr
,"digest") && c
->argc
== 2) {
11363 unsigned char digest
[20];
11364 sds d
= sdsnew("+");
11367 computeDatasetDigest(digest
);
11368 for (j
= 0; j
< 20; j
++)
11369 d
= sdscatprintf(d
, "%02x",digest
[j
]);
11371 d
= sdscatlen(d
,"\r\n",2);
11374 addReplySds(c
,sdsnew(
11375 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n"));
11379 static void _redisAssert(char *estr
, char *file
, int line
) {
11380 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
11381 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true",file
,line
,estr
);
11382 #ifdef HAVE_BACKTRACE
11383 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
11384 *((char*)-1) = 'x';
11388 static void _redisPanic(char *msg
, char *file
, int line
) {
11389 redisLog(REDIS_WARNING
,"!!! Software Failure. Press left mouse button to continue");
11390 redisLog(REDIS_WARNING
,"Guru Meditation: %s #%s:%d",msg
,file
,line
);
11391 #ifdef HAVE_BACKTRACE
11392 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
11393 *((char*)-1) = 'x';
11397 /* =================================== Main! ================================ */
11400 int linuxOvercommitMemoryValue(void) {
11401 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
11404 if (!fp
) return -1;
11405 if (fgets(buf
,64,fp
) == NULL
) {
11414 void linuxOvercommitMemoryWarning(void) {
11415 if (linuxOvercommitMemoryValue() == 0) {
11416 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
11419 #endif /* __linux__ */
11421 static void daemonize(void) {
11425 if (fork() != 0) exit(0); /* parent exits */
11426 setsid(); /* create a new session */
11428 /* Every output goes to /dev/null. If Redis is daemonized but
11429 * the 'logfile' is set to 'stdout' in the configuration file
11430 * it will not log at all. */
11431 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
11432 dup2(fd
, STDIN_FILENO
);
11433 dup2(fd
, STDOUT_FILENO
);
11434 dup2(fd
, STDERR_FILENO
);
11435 if (fd
> STDERR_FILENO
) close(fd
);
11437 /* Try to write the pid file */
11438 fp
= fopen(server
.pidfile
,"w");
11440 fprintf(fp
,"%d\n",getpid());
11445 static void version() {
11446 printf("Redis server version %s (%s:%d)\n", REDIS_VERSION
,
11447 redisGitSHA1(), atoi(redisGitDirty()) > 0);
11451 static void usage() {
11452 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
11453 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
11457 int main(int argc
, char **argv
) {
11460 initServerConfig();
11461 sortCommandTable();
11463 if (strcmp(argv
[1], "-v") == 0 ||
11464 strcmp(argv
[1], "--version") == 0) version();
11465 if (strcmp(argv
[1], "--help") == 0) usage();
11466 resetServerSaveParams();
11467 loadServerConfig(argv
[1]);
11468 } else if ((argc
> 2)) {
11471 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
11473 if (server
.daemonize
) daemonize();
11475 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
11477 linuxOvercommitMemoryWarning();
11479 start
= time(NULL
);
11480 if (server
.appendonly
) {
11481 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
11482 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
11484 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
11485 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
11487 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
11488 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
11490 aeDeleteEventLoop(server
.el
);
11494 /* ============================= Backtrace support ========================= */
11496 #ifdef HAVE_BACKTRACE
11497 static char *findFuncName(void *pointer
, unsigned long *offset
);
11499 static void *getMcontextEip(ucontext_t
*uc
) {
11500 #if defined(__FreeBSD__)
11501 return (void*) uc
->uc_mcontext
.mc_eip
;
11502 #elif defined(__dietlibc__)
11503 return (void*) uc
->uc_mcontext
.eip
;
11504 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
11506 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
11508 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
11510 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
11511 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
11512 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
11514 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
11516 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
11517 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
11518 #elif defined(__ia64__) /* Linux IA64 */
11519 return (void*) uc
->uc_mcontext
.sc_ip
;
11525 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
11527 char **messages
= NULL
;
11528 int i
, trace_size
= 0;
11529 unsigned long offset
=0;
11530 ucontext_t
*uc
= (ucontext_t
*) secret
;
11532 REDIS_NOTUSED(info
);
11534 redisLog(REDIS_WARNING
,
11535 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
11536 infostring
= genRedisInfoString();
11537 redisLog(REDIS_WARNING
, "%s",infostring
);
11538 /* It's not safe to sdsfree() the returned string under memory
11539 * corruption conditions. Let it leak as we are going to abort */
11541 trace_size
= backtrace(trace
, 100);
11542 /* overwrite sigaction with caller's address */
11543 if (getMcontextEip(uc
) != NULL
) {
11544 trace
[1] = getMcontextEip(uc
);
11546 messages
= backtrace_symbols(trace
, trace_size
);
11548 for (i
=1; i
<trace_size
; ++i
) {
11549 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
11551 p
= strchr(messages
[i
],'+');
11552 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
11553 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
11555 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
11558 /* free(messages); Don't call free() with possibly corrupted memory. */
11562 static void sigtermHandler(int sig
) {
11563 REDIS_NOTUSED(sig
);
11565 redisLog(REDIS_WARNING
,"SIGTERM received, scheduling shutting down...");
11566 server
.shutdown_asap
= 1;
11569 static void setupSigSegvAction(void) {
11570 struct sigaction act
;
11572 sigemptyset (&act
.sa_mask
);
11573 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
11574 * is used. Otherwise, sa_handler is used */
11575 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
11576 act
.sa_sigaction
= segvHandler
;
11577 sigaction (SIGSEGV
, &act
, NULL
);
11578 sigaction (SIGBUS
, &act
, NULL
);
11579 sigaction (SIGFPE
, &act
, NULL
);
11580 sigaction (SIGILL
, &act
, NULL
);
11581 sigaction (SIGBUS
, &act
, NULL
);
11583 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
;
11584 act
.sa_handler
= sigtermHandler
;
11585 sigaction (SIGTERM
, &act
, NULL
);
11589 #include "staticsymbols.h"
11590 /* This function try to convert a pointer into a function name. It's used in
11591 * oreder to provide a backtrace under segmentation fault that's able to
11592 * display functions declared as static (otherwise the backtrace is useless). */
11593 static char *findFuncName(void *pointer
, unsigned long *offset
){
11595 unsigned long off
, minoff
= 0;
11597 /* Try to match against the Symbol with the smallest offset */
11598 for (i
=0; symsTable
[i
].pointer
; i
++) {
11599 unsigned long lp
= (unsigned long) pointer
;
11601 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
11602 off
=lp
-symsTable
[i
].pointer
;
11603 if (ret
< 0 || off
< minoff
) {
11609 if (ret
== -1) return NULL
;
11611 return symsTable
[ret
].name
;
11613 #else /* HAVE_BACKTRACE */
11614 static void setupSigSegvAction(void) {
11616 #endif /* HAVE_BACKTRACE */