2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "2.1.1"
45 #endif /* HAVE_BACKTRACE */
53 #include <arpa/inet.h>
57 #include <sys/resource.h>
65 #include "solarisfixes.h"
69 #include "ae.h" /* Event driven programming library */
70 #include "sds.h" /* Dynamic safe strings */
71 #include "anet.h" /* Networking the easy way */
72 #include "dict.h" /* Hash tables */
73 #include "adlist.h" /* Linked lists */
74 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
75 #include "lzf.h" /* LZF compression library */
76 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
77 #include "zipmap.h" /* Compact dictionary-alike data structure */
78 #include "ziplist.h" /* Compact list data structure */
79 #include "intset.h" /* Compact integer set structure */
80 #include "sha1.h" /* SHA1 is used for DEBUG DIGEST */
81 #include "release.h" /* Release and/or git repository information */
87 /* Static server configuration */
88 #define REDIS_SERVERPORT 6379 /* TCP port */
89 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
90 #define REDIS_IOBUF_LEN 1024
91 #define REDIS_LOADBUF_LEN 1024
92 #define REDIS_STATIC_ARGS 8
93 #define REDIS_DEFAULT_DBNUM 16
94 #define REDIS_CONFIGLINE_MAX 1024
95 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
96 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
97 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* lookup 10 expires per loop */
98 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
99 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
101 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
102 #define REDIS_WRITEV_THRESHOLD 3
103 /* Max number of iovecs used for each writev call */
104 #define REDIS_WRITEV_IOVEC_COUNT 256
106 /* Hash table parameters */
107 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
110 #define REDIS_CMD_BULK 1 /* Bulk write command */
111 #define REDIS_CMD_INLINE 2 /* Inline command */
112 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
113 this flags will return an error when the 'maxmemory' option is set in the
114 config file and the server is using more than maxmemory bytes of memory.
115 In short this commands are denied on low memory conditions. */
116 #define REDIS_CMD_DENYOOM 4
117 #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */
120 #define REDIS_STRING 0
125 #define REDIS_VMPOINTER 8
127 /* Objects encoding. Some kind of objects like Strings and Hashes can be
128 * internally represented in multiple ways. The 'encoding' field of the object
129 * is set to one of this fields for this object. */
130 #define REDIS_ENCODING_RAW 0 /* Raw representation */
131 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
132 #define REDIS_ENCODING_HT 2 /* Encoded as hash table */
133 #define REDIS_ENCODING_ZIPMAP 3 /* Encoded as zipmap */
134 #define REDIS_ENCODING_LIST 4 /* Encoded as zipmap */
135 #define REDIS_ENCODING_ZIPLIST 5 /* Encoded as ziplist */
136 #define REDIS_ENCODING_INTSET 6 /* Encoded as intset */
138 static char* strencoding
[] = {
139 "raw", "int", "hashtable", "zipmap", "list", "ziplist", "intset"
142 /* Object types only used for dumping to disk */
143 #define REDIS_EXPIRETIME 253
144 #define REDIS_SELECTDB 254
145 #define REDIS_EOF 255
147 /* Defines related to the dump file format. To store 32 bits lengths for short
148 * keys requires a lot of space, so we check the most significant 2 bits of
149 * the first byte to interpreter the length:
151 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
152 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
153 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
154 * 11|000000 this means: specially encoded object will follow. The six bits
155 * number specify the kind of object that follows.
156 * See the REDIS_RDB_ENC_* defines.
158 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
159 * values, will fit inside. */
160 #define REDIS_RDB_6BITLEN 0
161 #define REDIS_RDB_14BITLEN 1
162 #define REDIS_RDB_32BITLEN 2
163 #define REDIS_RDB_ENCVAL 3
164 #define REDIS_RDB_LENERR UINT_MAX
166 /* When a length of a string object stored on disk has the first two bits
167 * set, the remaining two bits specify a special encoding for the object
168 * accordingly to the following defines: */
169 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
170 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
171 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
172 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
174 /* Virtual memory object->where field. */
175 #define REDIS_VM_MEMORY 0 /* The object is on memory */
176 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
177 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
178 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
180 /* Virtual memory static configuration stuff.
181 * Check vmFindContiguousPages() to know more about this magic numbers. */
182 #define REDIS_VM_MAX_NEAR_PAGES 65536
183 #define REDIS_VM_MAX_RANDOM_JUMP 4096
184 #define REDIS_VM_MAX_THREADS 32
185 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
186 /* The following is the *percentage* of completed I/O jobs to process when the
187 * handelr is called. While Virtual Memory I/O operations are performed by
188 * threads, this operations must be processed by the main thread when completed
189 * in order to take effect. */
190 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
193 #define REDIS_SLAVE 1 /* This client is a slave server */
194 #define REDIS_MASTER 2 /* This client is a master server */
195 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
196 #define REDIS_MULTI 8 /* This client is in a MULTI context */
197 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
198 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
199 #define REDIS_DIRTY_CAS 64 /* Watched keys modified. EXEC will fail. */
201 /* Slave replication state - slave side */
202 #define REDIS_REPL_NONE 0 /* No active replication */
203 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
204 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
206 /* Slave replication state - from the point of view of master
207 * Note that in SEND_BULK and ONLINE state the slave receives new updates
208 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
209 * to start the next background saving in order to send updates to it. */
210 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
211 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
212 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
213 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
215 /* List related stuff */
219 /* Sort operations */
220 #define REDIS_SORT_GET 0
221 #define REDIS_SORT_ASC 1
222 #define REDIS_SORT_DESC 2
223 #define REDIS_SORTKEY_MAX 1024
226 #define REDIS_DEBUG 0
227 #define REDIS_VERBOSE 1
228 #define REDIS_NOTICE 2
229 #define REDIS_WARNING 3
231 /* Anti-warning macro... */
232 #define REDIS_NOTUSED(V) ((void) V)
234 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
235 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
237 /* Append only defines */
238 #define APPENDFSYNC_NO 0
239 #define APPENDFSYNC_ALWAYS 1
240 #define APPENDFSYNC_EVERYSEC 2
242 /* Zip structure related defaults */
243 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
244 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
245 #define REDIS_LIST_MAX_ZIPLIST_ENTRIES 1024
246 #define REDIS_LIST_MAX_ZIPLIST_VALUE 32
247 #define REDIS_SET_MAX_INTSET_ENTRIES 4096
249 /* We can print the stacktrace, so our assert is defined this way: */
250 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
251 #define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1)
252 static void _redisAssert(char *estr
, char *file
, int line
);
253 static void _redisPanic(char *msg
, char *file
, int line
);
255 /*================================= Data types ============================== */
257 /* A redis object, that is a type able to hold a string / list / set */
259 /* The actual Redis Object */
260 typedef struct redisObject
{
262 unsigned storage
:2; /* REDIS_VM_MEMORY or REDIS_VM_SWAPPING */
264 unsigned lru
:22; /* lru time (relative to server.lruclock) */
267 /* VM fields, this are only allocated if VM is active, otherwise the
268 * object allocation function will just allocate
269 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
270 * Redis without VM active will not have any overhead. */
273 /* The VM pointer structure - identifies an object in the swap file.
275 * This object is stored in place of the value
276 * object in the main key->value hash table representing a database.
277 * Note that the first fields (type, storage) are the same as the redisObject
278 * structure so that vmPointer strucuters can be accessed even when casted
279 * as redisObject structures.
281 * This is useful as we don't know if a value object is or not on disk, but we
282 * are always able to read obj->storage to check this. For vmPointer
283 * structures "type" is set to REDIS_VMPOINTER (even if without this field
284 * is still possible to check the kind of object from the value of 'storage').*/
285 typedef struct vmPointer
{
287 unsigned storage
:2; /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
289 unsigned int vtype
; /* type of the object stored in the swap file */
290 off_t page
; /* the page at witch the object is stored on disk */
291 off_t usedpages
; /* number of pages used on disk */
294 /* Macro used to initalize a Redis object allocated on the stack.
295 * Note that this macro is taken near the structure definition to make sure
296 * we'll update it when the structure is changed, to avoid bugs like
297 * bug #85 introduced exactly in this way. */
298 #define initStaticStringObject(_var,_ptr) do { \
300 _var.type = REDIS_STRING; \
301 _var.encoding = REDIS_ENCODING_RAW; \
303 _var.storage = REDIS_VM_MEMORY; \
306 typedef struct redisDb
{
307 dict
*dict
; /* The keyspace for this DB */
308 dict
*expires
; /* Timeout of keys with a timeout set */
309 dict
*blocking_keys
; /* Keys with clients waiting for data (BLPOP) */
310 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
311 dict
*watched_keys
; /* WATCHED keys for MULTI/EXEC CAS */
315 /* Client MULTI/EXEC state */
316 typedef struct multiCmd
{
319 struct redisCommand
*cmd
;
322 typedef struct multiState
{
323 multiCmd
*commands
; /* Array of MULTI commands */
324 int count
; /* Total number of MULTI commands */
327 /* With multiplexing we need to take per-clinet state.
328 * Clients are taken in a liked list. */
329 typedef struct redisClient
{
334 robj
**argv
, **mbargv
;
336 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
337 int multibulk
; /* multi bulk command format active */
340 time_t lastinteraction
; /* time of the last interaction, used for timeout */
341 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
342 int slaveseldb
; /* slave selected db, if this client is a slave */
343 int authenticated
; /* when requirepass is non-NULL */
344 int replstate
; /* replication state if this is a slave */
345 int repldbfd
; /* replication DB file descriptor */
346 long repldboff
; /* replication DB file offset */
347 off_t repldbsize
; /* replication DB file size */
348 multiState mstate
; /* MULTI/EXEC state */
349 robj
**blocking_keys
; /* The key we are waiting to terminate a blocking
350 * operation such as BLPOP. Otherwise NULL. */
351 int blocking_keys_num
; /* Number of blocking keys */
352 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
353 * is >= blockingto then the operation timed out. */
354 list
*io_keys
; /* Keys this client is waiting to be loaded from the
355 * swap file in order to continue. */
356 list
*watched_keys
; /* Keys WATCHED for MULTI/EXEC CAS */
357 dict
*pubsub_channels
; /* channels a client is interested in (SUBSCRIBE) */
358 list
*pubsub_patterns
; /* patterns a client is interested in (SUBSCRIBE) */
366 /* Global server state structure */
371 long long dirty
; /* changes to DB from the last save */
373 list
*slaves
, *monitors
;
374 char neterr
[ANET_ERR_LEN
];
376 int cronloops
; /* number of times the cron function run */
377 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
378 time_t lastsave
; /* Unix time of last save succeeede */
379 /* Fields used only for stats */
380 time_t stat_starttime
; /* server start time */
381 long long stat_numcommands
; /* number of processed commands */
382 long long stat_numconnections
; /* number of connections received */
383 long long stat_expiredkeys
; /* number of expired keys */
392 int no_appendfsync_on_rewrite
;
398 pid_t bgsavechildpid
;
399 pid_t bgrewritechildpid
;
400 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
401 sds aofbuf
; /* AOF buffer, written before entering the event loop */
402 struct saveparam
*saveparams
;
407 char *appendfilename
;
411 /* Replication related */
416 redisClient
*master
; /* client that is master for this slave */
418 unsigned int maxclients
;
419 unsigned long long maxmemory
;
420 unsigned int blpop_blocked_clients
;
421 unsigned int vm_blocked_clients
;
422 /* Sort parameters - qsort_r() is only available under BSD so we
423 * have to take this state global, in order to pass it to sortCompare() */
427 /* Virtual memory configuration */
432 unsigned long long vm_max_memory
;
433 /* Zip structure config */
434 size_t hash_max_zipmap_entries
;
435 size_t hash_max_zipmap_value
;
436 size_t list_max_ziplist_entries
;
437 size_t list_max_ziplist_value
;
438 size_t set_max_intset_entries
;
439 /* Virtual memory state */
442 off_t vm_next_page
; /* Next probably empty page */
443 off_t vm_near_pages
; /* Number of pages allocated sequentially */
444 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
445 time_t unixtime
; /* Unix time sampled every second. */
446 /* Virtual memory I/O threads stuff */
447 /* An I/O thread process an element taken from the io_jobs queue and
448 * put the result of the operation in the io_done list. While the
449 * job is being processed, it's put on io_processing queue. */
450 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
451 list
*io_processing
; /* List of VM I/O jobs being processed */
452 list
*io_processed
; /* List of VM I/O jobs already processed */
453 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
454 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
455 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
456 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
457 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
458 int io_active_threads
; /* Number of running I/O threads */
459 int vm_max_threads
; /* Max number of I/O threads running at the same time */
460 /* Our main thread is blocked on the event loop, locking for sockets ready
461 * to be read or written, so when a threaded I/O operation is ready to be
462 * processed by the main thread, the I/O thread will use a unix pipe to
463 * awake the main thread. The followings are the two pipe FDs. */
464 int io_ready_pipe_read
;
465 int io_ready_pipe_write
;
466 /* Virtual memory stats */
467 unsigned long long vm_stats_used_pages
;
468 unsigned long long vm_stats_swapped_objects
;
469 unsigned long long vm_stats_swapouts
;
470 unsigned long long vm_stats_swapins
;
472 dict
*pubsub_channels
; /* Map channels to list of subscribed clients */
473 list
*pubsub_patterns
; /* A list of pubsub_patterns */
476 unsigned lruclock
:22; /* clock incrementing every minute, for LRU */
477 unsigned lruclock_padding
:10;
480 typedef struct pubsubPattern
{
485 typedef void redisCommandProc(redisClient
*c
);
486 typedef void redisVmPreloadProc(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
487 struct redisCommand
{
489 redisCommandProc
*proc
;
492 /* Use a function to determine which keys need to be loaded
493 * in the background prior to executing this command. Takes precedence
494 * over vm_firstkey and others, ignored when NULL */
495 redisVmPreloadProc
*vm_preload_proc
;
496 /* What keys should be loaded in background when calling this command? */
497 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
498 int vm_lastkey
; /* THe last argument that's a key */
499 int vm_keystep
; /* The step between first and last key */
502 struct redisFunctionSym
{
504 unsigned long pointer
;
507 typedef struct _redisSortObject
{
515 typedef struct _redisSortOperation
{
518 } redisSortOperation
;
520 /* ZSETs use a specialized version of Skiplists */
522 typedef struct zskiplistNode
{
523 struct zskiplistNode
**forward
;
524 struct zskiplistNode
*backward
;
530 typedef struct zskiplist
{
531 struct zskiplistNode
*header
, *tail
;
532 unsigned long length
;
536 typedef struct zset
{
541 /* Our shared "common" objects */
543 #define REDIS_SHARED_INTEGERS 10000
544 struct sharedObjectsStruct
{
545 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
546 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
547 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
548 *outofrangeerr
, *plus
,
549 *select0
, *select1
, *select2
, *select3
, *select4
,
550 *select5
, *select6
, *select7
, *select8
, *select9
,
551 *messagebulk
, *pmessagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
,
552 *mbulk4
, *psubscribebulk
, *punsubscribebulk
,
553 *integers
[REDIS_SHARED_INTEGERS
];
556 /* Global vars that are actally used as constants. The following double
557 * values are used for double on-disk serialization, and are initialized
558 * at runtime to avoid strange compiler optimizations. */
560 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
562 /* VM threaded I/O request message */
563 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
564 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
565 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
566 typedef struct iojob
{
567 int type
; /* Request type, REDIS_IOJOB_* */
568 redisDb
*db
;/* Redis database */
569 robj
*key
; /* This I/O request is about swapping this key */
570 robj
*id
; /* Unique identifier of this job:
571 this is the object to swap for REDIS_IOREQ_*_SWAP, or the
572 vmpointer objct for REDIS_IOREQ_LOAD. */
573 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
574 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
575 off_t page
; /* Swap page where to read/write the object */
576 off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */
577 int canceled
; /* True if this command was canceled by blocking side of VM */
578 pthread_t thread
; /* ID of the thread processing this entry */
581 /*================================ Prototypes =============================== */
583 static void freeStringObject(robj
*o
);
584 static void freeListObject(robj
*o
);
585 static void freeSetObject(robj
*o
);
586 static void decrRefCount(void *o
);
587 static robj
*createObject(int type
, void *ptr
);
588 static void freeClient(redisClient
*c
);
589 static int rdbLoad(char *filename
);
590 static void addReply(redisClient
*c
, robj
*obj
);
591 static void addReplySds(redisClient
*c
, sds s
);
592 static void incrRefCount(robj
*o
);
593 static int rdbSaveBackground(char *filename
);
594 static robj
*createStringObject(char *ptr
, size_t len
);
595 static robj
*dupStringObject(robj
*o
);
596 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
);
597 static void replicationFeedMonitors(list
*monitors
, int dictid
, robj
**argv
, int argc
);
598 static void flushAppendOnlyFile(void);
599 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
600 static int syncWithMaster(void);
601 static robj
*tryObjectEncoding(robj
*o
);
602 static robj
*getDecodedObject(robj
*o
);
603 static int removeExpire(redisDb
*db
, robj
*key
);
604 static int expireIfNeeded(redisDb
*db
, robj
*key
);
605 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
606 static int dbDelete(redisDb
*db
, robj
*key
);
607 static time_t getExpire(redisDb
*db
, robj
*key
);
608 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
609 static void updateSlavesWaitingBgsave(int bgsaveerr
);
610 static void freeMemoryIfNeeded(void);
611 static int processCommand(redisClient
*c
);
612 static void setupSigSegvAction(void);
613 static void rdbRemoveTempFile(pid_t childpid
);
614 static void aofRemoveTempFile(pid_t childpid
);
615 static size_t stringObjectLen(robj
*o
);
616 static void processInputBuffer(redisClient
*c
);
617 static zskiplist
*zslCreate(void);
618 static void zslFree(zskiplist
*zsl
);
619 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
620 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
621 static void initClientMultiState(redisClient
*c
);
622 static void freeClientMultiState(redisClient
*c
);
623 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
624 static void unblockClientWaitingData(redisClient
*c
);
625 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
626 static void vmInit(void);
627 static void vmMarkPagesFree(off_t page
, off_t count
);
628 static robj
*vmLoadObject(robj
*o
);
629 static robj
*vmPreviewObject(robj
*o
);
630 static int vmSwapOneObjectBlocking(void);
631 static int vmSwapOneObjectThreaded(void);
632 static int vmCanSwapOut(void);
633 static int tryFreeOneObjectFromFreelist(void);
634 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
635 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
636 static void vmCancelThreadedIOJob(robj
*o
);
637 static void lockThreadedIO(void);
638 static void unlockThreadedIO(void);
639 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
640 static void freeIOJob(iojob
*j
);
641 static void queueIOJob(iojob
*j
);
642 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
643 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
644 static void waitEmptyIOJobsQueue(void);
645 static void vmReopenSwapFile(void);
646 static int vmFreePage(off_t page
);
647 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
648 static void execBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
649 static int blockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
);
650 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
651 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
652 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
653 static struct redisCommand
*lookupCommand(char *name
);
654 static void call(redisClient
*c
, struct redisCommand
*cmd
);
655 static void resetClient(redisClient
*c
);
656 static void convertToRealHash(robj
*o
);
657 static void listTypeConvert(robj
*o
, int enc
);
658 static void setTypeConvert(robj
*o
, int enc
);
659 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
);
660 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
);
661 static void freePubsubPattern(void *p
);
662 static int listMatchPubsubPattern(void *a
, void *b
);
663 static int compareStringObjects(robj
*a
, robj
*b
);
664 static int equalStringObjects(robj
*a
, robj
*b
);
666 static int rewriteAppendOnlyFileBackground(void);
667 static vmpointer
*vmSwapObjectBlocking(robj
*val
);
668 static int prepareForShutdown();
669 static void touchWatchedKey(redisDb
*db
, robj
*key
);
670 static void touchWatchedKeysOnFlush(int dbid
);
671 static void unwatchAllKeys(redisClient
*c
);
673 static void authCommand(redisClient
*c
);
674 static void pingCommand(redisClient
*c
);
675 static void echoCommand(redisClient
*c
);
676 static void setCommand(redisClient
*c
);
677 static void setnxCommand(redisClient
*c
);
678 static void setexCommand(redisClient
*c
);
679 static void getCommand(redisClient
*c
);
680 static void delCommand(redisClient
*c
);
681 static void existsCommand(redisClient
*c
);
682 static void incrCommand(redisClient
*c
);
683 static void decrCommand(redisClient
*c
);
684 static void incrbyCommand(redisClient
*c
);
685 static void decrbyCommand(redisClient
*c
);
686 static void selectCommand(redisClient
*c
);
687 static void randomkeyCommand(redisClient
*c
);
688 static void keysCommand(redisClient
*c
);
689 static void dbsizeCommand(redisClient
*c
);
690 static void lastsaveCommand(redisClient
*c
);
691 static void saveCommand(redisClient
*c
);
692 static void bgsaveCommand(redisClient
*c
);
693 static void bgrewriteaofCommand(redisClient
*c
);
694 static void shutdownCommand(redisClient
*c
);
695 static void moveCommand(redisClient
*c
);
696 static void renameCommand(redisClient
*c
);
697 static void renamenxCommand(redisClient
*c
);
698 static void lpushCommand(redisClient
*c
);
699 static void rpushCommand(redisClient
*c
);
700 static void lpopCommand(redisClient
*c
);
701 static void rpopCommand(redisClient
*c
);
702 static void llenCommand(redisClient
*c
);
703 static void lindexCommand(redisClient
*c
);
704 static void lrangeCommand(redisClient
*c
);
705 static void ltrimCommand(redisClient
*c
);
706 static void typeCommand(redisClient
*c
);
707 static void lsetCommand(redisClient
*c
);
708 static void saddCommand(redisClient
*c
);
709 static void sremCommand(redisClient
*c
);
710 static void smoveCommand(redisClient
*c
);
711 static void sismemberCommand(redisClient
*c
);
712 static void scardCommand(redisClient
*c
);
713 static void spopCommand(redisClient
*c
);
714 static void srandmemberCommand(redisClient
*c
);
715 static void sinterCommand(redisClient
*c
);
716 static void sinterstoreCommand(redisClient
*c
);
717 static void sunionCommand(redisClient
*c
);
718 static void sunionstoreCommand(redisClient
*c
);
719 static void sdiffCommand(redisClient
*c
);
720 static void sdiffstoreCommand(redisClient
*c
);
721 static void syncCommand(redisClient
*c
);
722 static void flushdbCommand(redisClient
*c
);
723 static void flushallCommand(redisClient
*c
);
724 static void sortCommand(redisClient
*c
);
725 static void lremCommand(redisClient
*c
);
726 static void rpoplpushcommand(redisClient
*c
);
727 static void infoCommand(redisClient
*c
);
728 static void mgetCommand(redisClient
*c
);
729 static void monitorCommand(redisClient
*c
);
730 static void expireCommand(redisClient
*c
);
731 static void expireatCommand(redisClient
*c
);
732 static void getsetCommand(redisClient
*c
);
733 static void ttlCommand(redisClient
*c
);
734 static void slaveofCommand(redisClient
*c
);
735 static void debugCommand(redisClient
*c
);
736 static void msetCommand(redisClient
*c
);
737 static void msetnxCommand(redisClient
*c
);
738 static void zaddCommand(redisClient
*c
);
739 static void zincrbyCommand(redisClient
*c
);
740 static void zrangeCommand(redisClient
*c
);
741 static void zrangebyscoreCommand(redisClient
*c
);
742 static void zcountCommand(redisClient
*c
);
743 static void zrevrangeCommand(redisClient
*c
);
744 static void zcardCommand(redisClient
*c
);
745 static void zremCommand(redisClient
*c
);
746 static void zscoreCommand(redisClient
*c
);
747 static void zremrangebyscoreCommand(redisClient
*c
);
748 static void multiCommand(redisClient
*c
);
749 static void execCommand(redisClient
*c
);
750 static void discardCommand(redisClient
*c
);
751 static void blpopCommand(redisClient
*c
);
752 static void brpopCommand(redisClient
*c
);
753 static void appendCommand(redisClient
*c
);
754 static void substrCommand(redisClient
*c
);
755 static void zrankCommand(redisClient
*c
);
756 static void zrevrankCommand(redisClient
*c
);
757 static void hsetCommand(redisClient
*c
);
758 static void hsetnxCommand(redisClient
*c
);
759 static void hgetCommand(redisClient
*c
);
760 static void hmsetCommand(redisClient
*c
);
761 static void hmgetCommand(redisClient
*c
);
762 static void hdelCommand(redisClient
*c
);
763 static void hlenCommand(redisClient
*c
);
764 static void zremrangebyrankCommand(redisClient
*c
);
765 static void zunionstoreCommand(redisClient
*c
);
766 static void zinterstoreCommand(redisClient
*c
);
767 static void hkeysCommand(redisClient
*c
);
768 static void hvalsCommand(redisClient
*c
);
769 static void hgetallCommand(redisClient
*c
);
770 static void hexistsCommand(redisClient
*c
);
771 static void configCommand(redisClient
*c
);
772 static void hincrbyCommand(redisClient
*c
);
773 static void subscribeCommand(redisClient
*c
);
774 static void unsubscribeCommand(redisClient
*c
);
775 static void psubscribeCommand(redisClient
*c
);
776 static void punsubscribeCommand(redisClient
*c
);
777 static void publishCommand(redisClient
*c
);
778 static void watchCommand(redisClient
*c
);
779 static void unwatchCommand(redisClient
*c
);
781 /*================================= Globals ================================= */
784 static struct redisServer server
; /* server global state */
785 static struct redisCommand
*commandTable
;
786 static struct redisCommand readonlyCommandTable
[] = {
787 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
788 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
789 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
790 {"setex",setexCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
791 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
792 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
793 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
794 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
795 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
796 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
797 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
798 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
799 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
800 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
801 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
802 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
803 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
804 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
805 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
806 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
807 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
808 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
809 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
810 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
811 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
812 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
813 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
814 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
815 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
816 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
817 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
818 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
819 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
820 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
821 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
822 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
823 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
824 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
825 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
826 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
827 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
828 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
829 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
830 {"zunionstore",zunionstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
831 {"zinterstore",zinterstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
832 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
833 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
834 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
835 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
836 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
837 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
838 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
839 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
840 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
841 {"hsetnx",hsetnxCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
842 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
843 {"hmset",hmsetCommand
,-4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
844 {"hmget",hmgetCommand
,-3,REDIS_CMD_BULK
,NULL
,1,1,1},
845 {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
846 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
847 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
848 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
849 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
850 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
851 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
852 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
853 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
854 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
855 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
856 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
857 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
858 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
859 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
860 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
861 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
862 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
863 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
864 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
865 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
866 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
867 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
868 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
869 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
870 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
871 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
872 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
873 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
874 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
875 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
876 {"exec",execCommand
,1,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,execBlockClientOnSwappedKeys
,0,0,0},
877 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
878 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
879 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
880 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
881 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
882 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
883 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
884 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
885 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
886 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
887 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
888 {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
889 {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
890 {"psubscribe",psubscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
891 {"punsubscribe",punsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
892 {"publish",publishCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_FORCE_REPLICATION
,NULL
,0,0,0},
893 {"watch",watchCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
894 {"unwatch",unwatchCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}
897 /*============================ Utility functions ============================ */
899 /* Glob-style pattern matching. */
900 static int stringmatchlen(const char *pattern
, int patternLen
,
901 const char *string
, int stringLen
, int nocase
)
906 while (pattern
[1] == '*') {
911 return 1; /* match */
913 if (stringmatchlen(pattern
+1, patternLen
-1,
914 string
, stringLen
, nocase
))
915 return 1; /* match */
919 return 0; /* no match */
923 return 0; /* no match */
933 not = pattern
[0] == '^';
940 if (pattern
[0] == '\\') {
943 if (pattern
[0] == string
[0])
945 } else if (pattern
[0] == ']') {
947 } else if (patternLen
== 0) {
951 } else if (pattern
[1] == '-' && patternLen
>= 3) {
952 int start
= pattern
[0];
953 int end
= pattern
[2];
961 start
= tolower(start
);
967 if (c
>= start
&& c
<= end
)
971 if (pattern
[0] == string
[0])
974 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
984 return 0; /* no match */
990 if (patternLen
>= 2) {
997 if (pattern
[0] != string
[0])
998 return 0; /* no match */
1000 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
1001 return 0; /* no match */
1009 if (stringLen
== 0) {
1010 while(*pattern
== '*') {
1017 if (patternLen
== 0 && stringLen
== 0)
1022 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
1023 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
1026 /* Convert a string representing an amount of memory into the number of
1027 * bytes, so for instance memtoll("1Gi") will return 1073741824 that is
1030 * On parsing error, if *err is not NULL, it's set to 1, otherwise it's
1032 static long long memtoll(const char *p
, int *err
) {
1035 long mul
; /* unit multiplier */
1037 unsigned int digits
;
1040 /* Search the first non digit character. */
1043 while(*u
&& isdigit(*u
)) u
++;
1044 if (*u
== '\0' || !strcasecmp(u
,"b")) {
1046 } else if (!strcasecmp(u
,"k")) {
1048 } else if (!strcasecmp(u
,"kb")) {
1050 } else if (!strcasecmp(u
,"m")) {
1052 } else if (!strcasecmp(u
,"mb")) {
1054 } else if (!strcasecmp(u
,"g")) {
1055 mul
= 1000L*1000*1000;
1056 } else if (!strcasecmp(u
,"gb")) {
1057 mul
= 1024L*1024*1024;
1063 if (digits
>= sizeof(buf
)) {
1067 memcpy(buf
,p
,digits
);
1069 val
= strtoll(buf
,NULL
,10);
1073 /* Convert a long long into a string. Returns the number of
1074 * characters needed to represent the number, that can be shorter if passed
1075 * buffer length is not enough to store the whole number. */
1076 static int ll2string(char *s
, size_t len
, long long value
) {
1078 unsigned long long v
;
1081 if (len
== 0) return 0;
1082 v
= (value
< 0) ? -value
: value
;
1083 p
= buf
+31; /* point to the last character */
1088 if (value
< 0) *p
-- = '-';
1091 if (l
+1 > len
) l
= len
-1; /* Make sure it fits, including the nul term */
1097 static void redisLog(int level
, const char *fmt
, ...) {
1101 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
1105 if (level
>= server
.verbosity
) {
1111 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
1112 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
1113 vfprintf(fp
, fmt
, ap
);
1119 if (server
.logfile
) fclose(fp
);
1122 /*====================== Hash table type implementation ==================== */
1124 /* This is an hash table type that uses the SDS dynamic strings libary as
1125 * keys and radis objects as values (objects can hold SDS strings,
1128 static void dictVanillaFree(void *privdata
, void *val
)
1130 DICT_NOTUSED(privdata
);
1134 static void dictListDestructor(void *privdata
, void *val
)
1136 DICT_NOTUSED(privdata
);
1137 listRelease((list
*)val
);
1140 static int dictSdsKeyCompare(void *privdata
, const void *key1
,
1144 DICT_NOTUSED(privdata
);
1146 l1
= sdslen((sds
)key1
);
1147 l2
= sdslen((sds
)key2
);
1148 if (l1
!= l2
) return 0;
1149 return memcmp(key1
, key2
, l1
) == 0;
1152 static void dictRedisObjectDestructor(void *privdata
, void *val
)
1154 DICT_NOTUSED(privdata
);
1156 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
1160 static void dictSdsDestructor(void *privdata
, void *val
)
1162 DICT_NOTUSED(privdata
);
1167 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1170 const robj
*o1
= key1
, *o2
= key2
;
1171 return dictSdsKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1174 static unsigned int dictObjHash(const void *key
) {
1175 const robj
*o
= key
;
1176 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1179 static unsigned int dictSdsHash(const void *key
) {
1180 return dictGenHashFunction((unsigned char*)key
, sdslen((char*)key
));
1183 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1186 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1189 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1190 o2
->encoding
== REDIS_ENCODING_INT
)
1191 return o1
->ptr
== o2
->ptr
;
1193 o1
= getDecodedObject(o1
);
1194 o2
= getDecodedObject(o2
);
1195 cmp
= dictSdsKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1201 static unsigned int dictEncObjHash(const void *key
) {
1202 robj
*o
= (robj
*) key
;
1204 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1205 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1207 if (o
->encoding
== REDIS_ENCODING_INT
) {
1211 len
= ll2string(buf
,32,(long)o
->ptr
);
1212 return dictGenHashFunction((unsigned char*)buf
, len
);
1216 o
= getDecodedObject(o
);
1217 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1225 static dictType setDictType
= {
1226 dictEncObjHash
, /* hash function */
1229 dictEncObjKeyCompare
, /* key compare */
1230 dictRedisObjectDestructor
, /* key destructor */
1231 NULL
/* val destructor */
1234 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1235 static dictType zsetDictType
= {
1236 dictEncObjHash
, /* hash function */
1239 dictEncObjKeyCompare
, /* key compare */
1240 dictRedisObjectDestructor
, /* key destructor */
1241 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1244 /* Db->dict, keys are sds strings, vals are Redis objects. */
1245 static dictType dbDictType
= {
1246 dictSdsHash
, /* hash function */
1249 dictSdsKeyCompare
, /* key compare */
1250 dictSdsDestructor
, /* key destructor */
1251 dictRedisObjectDestructor
/* val destructor */
1255 static dictType keyptrDictType
= {
1256 dictSdsHash
, /* hash function */
1259 dictSdsKeyCompare
, /* key compare */
1260 dictSdsDestructor
, /* key destructor */
1261 NULL
/* val destructor */
1264 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1265 static dictType hashDictType
= {
1266 dictEncObjHash
, /* hash function */
1269 dictEncObjKeyCompare
, /* key compare */
1270 dictRedisObjectDestructor
, /* key destructor */
1271 dictRedisObjectDestructor
/* val destructor */
1274 /* Keylist hash table type has unencoded redis objects as keys and
1275 * lists as values. It's used for blocking operations (BLPOP) and to
1276 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1277 static dictType keylistDictType
= {
1278 dictObjHash
, /* hash function */
1281 dictObjKeyCompare
, /* key compare */
1282 dictRedisObjectDestructor
, /* key destructor */
1283 dictListDestructor
/* val destructor */
1286 static void version();
1288 /* ========================= Random utility functions ======================= */
1290 /* Redis generally does not try to recover from out of memory conditions
1291 * when allocating objects or strings, it is not clear if it will be possible
1292 * to report this condition to the client since the networking layer itself
1293 * is based on heap allocation for send buffers, so we simply abort.
1294 * At least the code will be simpler to read... */
1295 static void oom(const char *msg
) {
1296 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1301 /* ====================== Redis server networking stuff ===================== */
1302 static void closeTimedoutClients(void) {
1305 time_t now
= time(NULL
);
1308 listRewind(server
.clients
,&li
);
1309 while ((ln
= listNext(&li
)) != NULL
) {
1310 c
= listNodeValue(ln
);
1311 if (server
.maxidletime
&&
1312 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1313 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1314 dictSize(c
->pubsub_channels
) == 0 && /* no timeout for pubsub */
1315 listLength(c
->pubsub_patterns
) == 0 &&
1316 (now
- c
->lastinteraction
> server
.maxidletime
))
1318 redisLog(REDIS_VERBOSE
,"Closing idle client");
1320 } else if (c
->flags
& REDIS_BLOCKED
) {
1321 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1322 addReply(c
,shared
.nullmultibulk
);
1323 unblockClientWaitingData(c
);
1329 static int htNeedsResize(dict
*dict
) {
1330 long long size
, used
;
1332 size
= dictSlots(dict
);
1333 used
= dictSize(dict
);
1334 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1335 (used
*100/size
< REDIS_HT_MINFILL
));
1338 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1339 * we resize the hash table to save memory */
1340 static void tryResizeHashTables(void) {
1343 for (j
= 0; j
< server
.dbnum
; j
++) {
1344 if (htNeedsResize(server
.db
[j
].dict
))
1345 dictResize(server
.db
[j
].dict
);
1346 if (htNeedsResize(server
.db
[j
].expires
))
1347 dictResize(server
.db
[j
].expires
);
1351 /* Our hash table implementation performs rehashing incrementally while
1352 * we write/read from the hash table. Still if the server is idle, the hash
1353 * table will use two tables for a long time. So we try to use 1 millisecond
1354 * of CPU time at every serverCron() loop in order to rehash some key. */
1355 static void incrementallyRehash(void) {
1358 for (j
= 0; j
< server
.dbnum
; j
++) {
1359 if (dictIsRehashing(server
.db
[j
].dict
)) {
1360 dictRehashMilliseconds(server
.db
[j
].dict
,1);
1361 break; /* already used our millisecond for this loop... */
1366 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1367 void backgroundSaveDoneHandler(int statloc
) {
1368 int exitcode
= WEXITSTATUS(statloc
);
1369 int bysignal
= WIFSIGNALED(statloc
);
1371 if (!bysignal
&& exitcode
== 0) {
1372 redisLog(REDIS_NOTICE
,
1373 "Background saving terminated with success");
1375 server
.lastsave
= time(NULL
);
1376 } else if (!bysignal
&& exitcode
!= 0) {
1377 redisLog(REDIS_WARNING
, "Background saving error");
1379 redisLog(REDIS_WARNING
,
1380 "Background saving terminated by signal %d", WTERMSIG(statloc
));
1381 rdbRemoveTempFile(server
.bgsavechildpid
);
1383 server
.bgsavechildpid
= -1;
1384 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1385 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1386 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1389 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1391 void backgroundRewriteDoneHandler(int statloc
) {
1392 int exitcode
= WEXITSTATUS(statloc
);
1393 int bysignal
= WIFSIGNALED(statloc
);
1395 if (!bysignal
&& exitcode
== 0) {
1399 redisLog(REDIS_NOTICE
,
1400 "Background append only file rewriting terminated with success");
1401 /* Now it's time to flush the differences accumulated by the parent */
1402 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1403 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1405 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1408 /* Flush our data... */
1409 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1410 (signed) sdslen(server
.bgrewritebuf
)) {
1411 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1415 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1416 /* Now our work is to rename the temp file into the stable file. And
1417 * switch the file descriptor used by the server for append only. */
1418 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1419 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1423 /* Mission completed... almost */
1424 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1425 if (server
.appendfd
!= -1) {
1426 /* If append only is actually enabled... */
1427 close(server
.appendfd
);
1428 server
.appendfd
= fd
;
1429 if (server
.appendfsync
!= APPENDFSYNC_NO
) aof_fsync(fd
);
1430 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1431 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1433 /* If append only is disabled we just generate a dump in this
1434 * format. Why not? */
1437 } else if (!bysignal
&& exitcode
!= 0) {
1438 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1440 redisLog(REDIS_WARNING
,
1441 "Background append only file rewriting terminated by signal %d",
1445 sdsfree(server
.bgrewritebuf
);
1446 server
.bgrewritebuf
= sdsempty();
1447 aofRemoveTempFile(server
.bgrewritechildpid
);
1448 server
.bgrewritechildpid
= -1;
1451 /* This function is called once a background process of some kind terminates,
1452 * as we want to avoid resizing the hash tables when there is a child in order
1453 * to play well with copy-on-write (otherwise when a resize happens lots of
1454 * memory pages are copied). The goal of this function is to update the ability
1455 * for dict.c to resize the hash tables accordingly to the fact we have o not
1456 * running childs. */
1457 static void updateDictResizePolicy(void) {
1458 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1)
1461 dictDisableResize();
1464 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1465 int j
, loops
= server
.cronloops
++;
1466 REDIS_NOTUSED(eventLoop
);
1468 REDIS_NOTUSED(clientData
);
1470 /* We take a cached value of the unix time in the global state because
1471 * with virtual memory and aging there is to store the current time
1472 * in objects at every object access, and accuracy is not needed.
1473 * To access a global var is faster than calling time(NULL) */
1474 server
.unixtime
= time(NULL
);
1475 /* We have just 21 bits per object for LRU information.
1476 * So we use an (eventually wrapping) LRU clock with minutes resolution.
1478 * When we need to select what object to swap, we compute the minimum
1479 * time distance between the current lruclock and the object last access
1480 * lruclock info. Even if clocks will wrap on overflow, there is
1481 * the interesting property that we are sure that at least
1482 * ABS(A-B) minutes passed between current time and timestamp B.
1484 * This is not precise but we don't need at all precision, but just
1485 * something statistically reasonable.
1487 server
.lruclock
= (time(NULL
)/60)&((1<<21)-1);
1489 /* We received a SIGTERM, shutting down here in a safe way, as it is
1490 * not ok doing so inside the signal handler. */
1491 if (server
.shutdown_asap
) {
1492 if (prepareForShutdown() == REDIS_OK
) exit(0);
1493 redisLog(REDIS_WARNING
,"SIGTERM received but errors trying to shut down the server, check the logs for more information");
1496 /* Show some info about non-empty databases */
1497 for (j
= 0; j
< server
.dbnum
; j
++) {
1498 long long size
, used
, vkeys
;
1500 size
= dictSlots(server
.db
[j
].dict
);
1501 used
= dictSize(server
.db
[j
].dict
);
1502 vkeys
= dictSize(server
.db
[j
].expires
);
1503 if (!(loops
% 50) && (used
|| vkeys
)) {
1504 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1505 /* dictPrintStats(server.dict); */
1509 /* We don't want to resize the hash tables while a bacground saving
1510 * is in progress: the saving child is created using fork() that is
1511 * implemented with a copy-on-write semantic in most modern systems, so
1512 * if we resize the HT while there is the saving child at work actually
1513 * a lot of memory movements in the parent will cause a lot of pages
1515 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1) {
1516 if (!(loops
% 10)) tryResizeHashTables();
1517 if (server
.activerehashing
) incrementallyRehash();
1520 /* Show information about connected clients */
1521 if (!(loops
% 50)) {
1522 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use",
1523 listLength(server
.clients
)-listLength(server
.slaves
),
1524 listLength(server
.slaves
),
1525 zmalloc_used_memory());
1528 /* Close connections of timedout clients */
1529 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1530 closeTimedoutClients();
1532 /* Check if a background saving or AOF rewrite in progress terminated */
1533 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1537 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1538 if (pid
== server
.bgsavechildpid
) {
1539 backgroundSaveDoneHandler(statloc
);
1541 backgroundRewriteDoneHandler(statloc
);
1543 updateDictResizePolicy();
1546 /* If there is not a background saving in progress check if
1547 * we have to save now */
1548 time_t now
= time(NULL
);
1549 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1550 struct saveparam
*sp
= server
.saveparams
+j
;
1552 if (server
.dirty
>= sp
->changes
&&
1553 now
-server
.lastsave
> sp
->seconds
) {
1554 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1555 sp
->changes
, sp
->seconds
);
1556 rdbSaveBackground(server
.dbfilename
);
1562 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1563 * will use few CPU cycles if there are few expiring keys, otherwise
1564 * it will get more aggressive to avoid that too much memory is used by
1565 * keys that can be removed from the keyspace. */
1566 for (j
= 0; j
< server
.dbnum
; j
++) {
1568 redisDb
*db
= server
.db
+j
;
1570 /* Continue to expire if at the end of the cycle more than 25%
1571 * of the keys were expired. */
1573 long num
= dictSize(db
->expires
);
1574 time_t now
= time(NULL
);
1577 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1578 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1583 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1584 t
= (time_t) dictGetEntryVal(de
);
1586 sds key
= dictGetEntryKey(de
);
1587 robj
*keyobj
= createStringObject(key
,sdslen(key
));
1589 dbDelete(db
,keyobj
);
1590 decrRefCount(keyobj
);
1592 server
.stat_expiredkeys
++;
1595 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1598 /* Swap a few keys on disk if we are over the memory limit and VM
1599 * is enbled. Try to free objects from the free list first. */
1600 if (vmCanSwapOut()) {
1601 while (server
.vm_enabled
&& zmalloc_used_memory() >
1602 server
.vm_max_memory
)
1606 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1607 retval
= (server
.vm_max_threads
== 0) ?
1608 vmSwapOneObjectBlocking() :
1609 vmSwapOneObjectThreaded();
1610 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1611 zmalloc_used_memory() >
1612 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1614 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1616 /* Note that when using threade I/O we free just one object,
1617 * because anyway when the I/O thread in charge to swap this
1618 * object out will finish, the handler of completed jobs
1619 * will try to swap more objects if we are still out of memory. */
1620 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1624 /* Check if we should connect to a MASTER */
1625 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1626 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1627 if (syncWithMaster() == REDIS_OK
) {
1628 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1629 if (server
.appendonly
) rewriteAppendOnlyFileBackground();
1635 /* This function gets called every time Redis is entering the
1636 * main loop of the event driven library, that is, before to sleep
1637 * for ready file descriptors. */
1638 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1639 REDIS_NOTUSED(eventLoop
);
1641 /* Awake clients that got all the swapped keys they requested */
1642 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1646 listRewind(server
.io_ready_clients
,&li
);
1647 while((ln
= listNext(&li
))) {
1648 redisClient
*c
= ln
->value
;
1649 struct redisCommand
*cmd
;
1651 /* Resume the client. */
1652 listDelNode(server
.io_ready_clients
,ln
);
1653 c
->flags
&= (~REDIS_IO_WAIT
);
1654 server
.vm_blocked_clients
--;
1655 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1656 readQueryFromClient
, c
);
1657 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1658 assert(cmd
!= NULL
);
1661 /* There may be more data to process in the input buffer. */
1662 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1663 processInputBuffer(c
);
1666 /* Write the AOF buffer on disk */
1667 flushAppendOnlyFile();
1670 static void createSharedObjects(void) {
1673 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1674 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1675 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1676 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1677 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1678 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1679 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1680 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1681 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1682 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1683 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1684 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1685 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1686 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1687 "-ERR no such key\r\n"));
1688 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1689 "-ERR syntax error\r\n"));
1690 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1691 "-ERR source and destination objects are the same\r\n"));
1692 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1693 "-ERR index out of range\r\n"));
1694 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1695 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1696 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1697 shared
.select0
= createStringObject("select 0\r\n",10);
1698 shared
.select1
= createStringObject("select 1\r\n",10);
1699 shared
.select2
= createStringObject("select 2\r\n",10);
1700 shared
.select3
= createStringObject("select 3\r\n",10);
1701 shared
.select4
= createStringObject("select 4\r\n",10);
1702 shared
.select5
= createStringObject("select 5\r\n",10);
1703 shared
.select6
= createStringObject("select 6\r\n",10);
1704 shared
.select7
= createStringObject("select 7\r\n",10);
1705 shared
.select8
= createStringObject("select 8\r\n",10);
1706 shared
.select9
= createStringObject("select 9\r\n",10);
1707 shared
.messagebulk
= createStringObject("$7\r\nmessage\r\n",13);
1708 shared
.pmessagebulk
= createStringObject("$8\r\npmessage\r\n",14);
1709 shared
.subscribebulk
= createStringObject("$9\r\nsubscribe\r\n",15);
1710 shared
.unsubscribebulk
= createStringObject("$11\r\nunsubscribe\r\n",18);
1711 shared
.psubscribebulk
= createStringObject("$10\r\npsubscribe\r\n",17);
1712 shared
.punsubscribebulk
= createStringObject("$12\r\npunsubscribe\r\n",19);
1713 shared
.mbulk3
= createStringObject("*3\r\n",4);
1714 shared
.mbulk4
= createStringObject("*4\r\n",4);
1715 for (j
= 0; j
< REDIS_SHARED_INTEGERS
; j
++) {
1716 shared
.integers
[j
] = createObject(REDIS_STRING
,(void*)(long)j
);
1717 shared
.integers
[j
]->encoding
= REDIS_ENCODING_INT
;
1721 static void appendServerSaveParams(time_t seconds
, int changes
) {
1722 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1723 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1724 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1725 server
.saveparamslen
++;
1728 static void resetServerSaveParams() {
1729 zfree(server
.saveparams
);
1730 server
.saveparams
= NULL
;
1731 server
.saveparamslen
= 0;
1734 static void initServerConfig() {
1735 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1736 server
.port
= REDIS_SERVERPORT
;
1737 server
.verbosity
= REDIS_VERBOSE
;
1738 server
.maxidletime
= REDIS_MAXIDLETIME
;
1739 server
.saveparams
= NULL
;
1740 server
.logfile
= NULL
; /* NULL = log on standard output */
1741 server
.bindaddr
= NULL
;
1742 server
.glueoutputbuf
= 1;
1743 server
.daemonize
= 0;
1744 server
.appendonly
= 0;
1745 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1746 server
.no_appendfsync_on_rewrite
= 0;
1747 server
.lastfsync
= time(NULL
);
1748 server
.appendfd
= -1;
1749 server
.appendseldb
= -1; /* Make sure the first time will not match */
1750 server
.pidfile
= zstrdup("/var/run/redis.pid");
1751 server
.dbfilename
= zstrdup("dump.rdb");
1752 server
.appendfilename
= zstrdup("appendonly.aof");
1753 server
.requirepass
= NULL
;
1754 server
.rdbcompression
= 1;
1755 server
.activerehashing
= 1;
1756 server
.maxclients
= 0;
1757 server
.blpop_blocked_clients
= 0;
1758 server
.maxmemory
= 0;
1759 server
.vm_enabled
= 0;
1760 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1761 server
.vm_page_size
= 256; /* 256 bytes per page */
1762 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1763 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1764 server
.vm_max_threads
= 4;
1765 server
.vm_blocked_clients
= 0;
1766 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1767 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1768 server
.list_max_ziplist_entries
= REDIS_LIST_MAX_ZIPLIST_ENTRIES
;
1769 server
.list_max_ziplist_value
= REDIS_LIST_MAX_ZIPLIST_VALUE
;
1770 server
.set_max_intset_entries
= REDIS_SET_MAX_INTSET_ENTRIES
;
1771 server
.shutdown_asap
= 0;
1773 resetServerSaveParams();
1775 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1776 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1777 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1778 /* Replication related */
1780 server
.masterauth
= NULL
;
1781 server
.masterhost
= NULL
;
1782 server
.masterport
= 6379;
1783 server
.master
= NULL
;
1784 server
.replstate
= REDIS_REPL_NONE
;
1786 /* Double constants initialization */
1788 R_PosInf
= 1.0/R_Zero
;
1789 R_NegInf
= -1.0/R_Zero
;
1790 R_Nan
= R_Zero
/R_Zero
;
1793 static void initServer() {
1796 signal(SIGHUP
, SIG_IGN
);
1797 signal(SIGPIPE
, SIG_IGN
);
1798 setupSigSegvAction();
1800 server
.devnull
= fopen("/dev/null","w");
1801 if (server
.devnull
== NULL
) {
1802 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1805 server
.clients
= listCreate();
1806 server
.slaves
= listCreate();
1807 server
.monitors
= listCreate();
1808 server
.objfreelist
= listCreate();
1809 createSharedObjects();
1810 server
.el
= aeCreateEventLoop();
1811 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1812 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1813 if (server
.fd
== -1) {
1814 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1817 for (j
= 0; j
< server
.dbnum
; j
++) {
1818 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1819 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1820 server
.db
[j
].blocking_keys
= dictCreate(&keylistDictType
,NULL
);
1821 server
.db
[j
].watched_keys
= dictCreate(&keylistDictType
,NULL
);
1822 if (server
.vm_enabled
)
1823 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1824 server
.db
[j
].id
= j
;
1826 server
.pubsub_channels
= dictCreate(&keylistDictType
,NULL
);
1827 server
.pubsub_patterns
= listCreate();
1828 listSetFreeMethod(server
.pubsub_patterns
,freePubsubPattern
);
1829 listSetMatchMethod(server
.pubsub_patterns
,listMatchPubsubPattern
);
1830 server
.cronloops
= 0;
1831 server
.bgsavechildpid
= -1;
1832 server
.bgrewritechildpid
= -1;
1833 server
.bgrewritebuf
= sdsempty();
1834 server
.aofbuf
= sdsempty();
1835 server
.lastsave
= time(NULL
);
1837 server
.stat_numcommands
= 0;
1838 server
.stat_numconnections
= 0;
1839 server
.stat_expiredkeys
= 0;
1840 server
.stat_starttime
= time(NULL
);
1841 server
.unixtime
= time(NULL
);
1842 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1843 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1844 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1846 if (server
.appendonly
) {
1847 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1848 if (server
.appendfd
== -1) {
1849 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1855 if (server
.vm_enabled
) vmInit();
1858 /* Empty the whole database */
1859 static long long emptyDb() {
1861 long long removed
= 0;
1863 for (j
= 0; j
< server
.dbnum
; j
++) {
1864 removed
+= dictSize(server
.db
[j
].dict
);
1865 dictEmpty(server
.db
[j
].dict
);
1866 dictEmpty(server
.db
[j
].expires
);
1871 static int yesnotoi(char *s
) {
1872 if (!strcasecmp(s
,"yes")) return 1;
1873 else if (!strcasecmp(s
,"no")) return 0;
1877 /* I agree, this is a very rudimental way to load a configuration...
1878 will improve later if the config gets more complex */
1879 static void loadServerConfig(char *filename
) {
1881 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1885 if (filename
[0] == '-' && filename
[1] == '\0')
1888 if ((fp
= fopen(filename
,"r")) == NULL
) {
1889 redisLog(REDIS_WARNING
, "Fatal error, can't open config file '%s'", filename
);
1894 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1900 line
= sdstrim(line
," \t\r\n");
1902 /* Skip comments and blank lines*/
1903 if (line
[0] == '#' || line
[0] == '\0') {
1908 /* Split into arguments */
1909 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1910 sdstolower(argv
[0]);
1912 /* Execute config directives */
1913 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1914 server
.maxidletime
= atoi(argv
[1]);
1915 if (server
.maxidletime
< 0) {
1916 err
= "Invalid timeout value"; goto loaderr
;
1918 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1919 server
.port
= atoi(argv
[1]);
1920 if (server
.port
< 1 || server
.port
> 65535) {
1921 err
= "Invalid port"; goto loaderr
;
1923 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1924 server
.bindaddr
= zstrdup(argv
[1]);
1925 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1926 int seconds
= atoi(argv
[1]);
1927 int changes
= atoi(argv
[2]);
1928 if (seconds
< 1 || changes
< 0) {
1929 err
= "Invalid save parameters"; goto loaderr
;
1931 appendServerSaveParams(seconds
,changes
);
1932 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1933 if (chdir(argv
[1]) == -1) {
1934 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1935 argv
[1], strerror(errno
));
1938 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1939 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1940 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1941 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1942 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1944 err
= "Invalid log level. Must be one of debug, notice, warning";
1947 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1950 server
.logfile
= zstrdup(argv
[1]);
1951 if (!strcasecmp(server
.logfile
,"stdout")) {
1952 zfree(server
.logfile
);
1953 server
.logfile
= NULL
;
1955 if (server
.logfile
) {
1956 /* Test if we are able to open the file. The server will not
1957 * be able to abort just for this problem later... */
1958 logfp
= fopen(server
.logfile
,"a");
1959 if (logfp
== NULL
) {
1960 err
= sdscatprintf(sdsempty(),
1961 "Can't open the log file: %s", strerror(errno
));
1966 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1967 server
.dbnum
= atoi(argv
[1]);
1968 if (server
.dbnum
< 1) {
1969 err
= "Invalid number of databases"; goto loaderr
;
1971 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1972 loadServerConfig(argv
[1]);
1973 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1974 server
.maxclients
= atoi(argv
[1]);
1975 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1976 server
.maxmemory
= memtoll(argv
[1],NULL
);
1977 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1978 server
.masterhost
= sdsnew(argv
[1]);
1979 server
.masterport
= atoi(argv
[2]);
1980 server
.replstate
= REDIS_REPL_CONNECT
;
1981 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1982 server
.masterauth
= zstrdup(argv
[1]);
1983 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1984 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1985 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1987 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1988 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1989 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1991 } else if (!strcasecmp(argv
[0],"activerehashing") && argc
== 2) {
1992 if ((server
.activerehashing
= yesnotoi(argv
[1])) == -1) {
1993 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1995 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1996 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1997 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1999 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
2000 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
2001 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
2003 } else if (!strcasecmp(argv
[0],"appendfilename") && argc
== 2) {
2004 zfree(server
.appendfilename
);
2005 server
.appendfilename
= zstrdup(argv
[1]);
2006 } else if (!strcasecmp(argv
[0],"no-appendfsync-on-rewrite")
2008 if ((server
.no_appendfsync_on_rewrite
= yesnotoi(argv
[1])) == -1) {
2009 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
2011 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
2012 if (!strcasecmp(argv
[1],"no")) {
2013 server
.appendfsync
= APPENDFSYNC_NO
;
2014 } else if (!strcasecmp(argv
[1],"always")) {
2015 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
2016 } else if (!strcasecmp(argv
[1],"everysec")) {
2017 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
2019 err
= "argument must be 'no', 'always' or 'everysec'";
2022 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
2023 server
.requirepass
= zstrdup(argv
[1]);
2024 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
2025 zfree(server
.pidfile
);
2026 server
.pidfile
= zstrdup(argv
[1]);
2027 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
2028 zfree(server
.dbfilename
);
2029 server
.dbfilename
= zstrdup(argv
[1]);
2030 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
2031 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
2032 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
2034 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
2035 zfree(server
.vm_swap_file
);
2036 server
.vm_swap_file
= zstrdup(argv
[1]);
2037 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
2038 server
.vm_max_memory
= memtoll(argv
[1],NULL
);
2039 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
2040 server
.vm_page_size
= memtoll(argv
[1], NULL
);
2041 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
2042 server
.vm_pages
= memtoll(argv
[1], NULL
);
2043 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
2044 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
2045 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
2046 server
.hash_max_zipmap_entries
= memtoll(argv
[1], NULL
);
2047 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
2048 server
.hash_max_zipmap_value
= memtoll(argv
[1], NULL
);
2049 } else if (!strcasecmp(argv
[0],"list-max-ziplist-entries") && argc
== 2){
2050 server
.list_max_ziplist_entries
= memtoll(argv
[1], NULL
);
2051 } else if (!strcasecmp(argv
[0],"list-max-ziplist-value") && argc
== 2){
2052 server
.list_max_ziplist_value
= memtoll(argv
[1], NULL
);
2053 } else if (!strcasecmp(argv
[0],"set-max-intset-entries") && argc
== 2){
2054 server
.set_max_intset_entries
= memtoll(argv
[1], NULL
);
2056 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
2058 for (j
= 0; j
< argc
; j
++)
2063 if (fp
!= stdin
) fclose(fp
);
2067 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
2068 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
2069 fprintf(stderr
, ">>> '%s'\n", line
);
2070 fprintf(stderr
, "%s\n", err
);
2074 static void freeClientArgv(redisClient
*c
) {
2077 for (j
= 0; j
< c
->argc
; j
++)
2078 decrRefCount(c
->argv
[j
]);
2079 for (j
= 0; j
< c
->mbargc
; j
++)
2080 decrRefCount(c
->mbargv
[j
]);
2085 static void freeClient(redisClient
*c
) {
2088 /* Note that if the client we are freeing is blocked into a blocking
2089 * call, we have to set querybuf to NULL *before* to call
2090 * unblockClientWaitingData() to avoid processInputBuffer() will get
2091 * called. Also it is important to remove the file events after
2092 * this, because this call adds the READABLE event. */
2093 sdsfree(c
->querybuf
);
2095 if (c
->flags
& REDIS_BLOCKED
)
2096 unblockClientWaitingData(c
);
2098 /* UNWATCH all the keys */
2100 listRelease(c
->watched_keys
);
2101 /* Unsubscribe from all the pubsub channels */
2102 pubsubUnsubscribeAllChannels(c
,0);
2103 pubsubUnsubscribeAllPatterns(c
,0);
2104 dictRelease(c
->pubsub_channels
);
2105 listRelease(c
->pubsub_patterns
);
2106 /* Obvious cleanup */
2107 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
2108 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2109 listRelease(c
->reply
);
2112 /* Remove from the list of clients */
2113 ln
= listSearchKey(server
.clients
,c
);
2114 redisAssert(ln
!= NULL
);
2115 listDelNode(server
.clients
,ln
);
2116 /* Remove from the list of clients that are now ready to be restarted
2117 * after waiting for swapped keys */
2118 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
2119 ln
= listSearchKey(server
.io_ready_clients
,c
);
2121 listDelNode(server
.io_ready_clients
,ln
);
2122 server
.vm_blocked_clients
--;
2125 /* Remove from the list of clients waiting for swapped keys */
2126 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
2127 ln
= listFirst(c
->io_keys
);
2128 dontWaitForSwappedKey(c
,ln
->value
);
2130 listRelease(c
->io_keys
);
2131 /* Master/slave cleanup */
2132 if (c
->flags
& REDIS_SLAVE
) {
2133 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
2135 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
2136 ln
= listSearchKey(l
,c
);
2137 redisAssert(ln
!= NULL
);
2140 if (c
->flags
& REDIS_MASTER
) {
2141 server
.master
= NULL
;
2142 server
.replstate
= REDIS_REPL_CONNECT
;
2144 /* Release memory */
2147 freeClientMultiState(c
);
2151 #define GLUEREPLY_UP_TO (1024)
2152 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
2154 char buf
[GLUEREPLY_UP_TO
];
2159 listRewind(c
->reply
,&li
);
2160 while((ln
= listNext(&li
))) {
2164 objlen
= sdslen(o
->ptr
);
2165 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
2166 memcpy(buf
+copylen
,o
->ptr
,objlen
);
2168 listDelNode(c
->reply
,ln
);
2170 if (copylen
== 0) return;
2174 /* Now the output buffer is empty, add the new single element */
2175 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
2176 listAddNodeHead(c
->reply
,o
);
2179 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2180 redisClient
*c
= privdata
;
2181 int nwritten
= 0, totwritten
= 0, objlen
;
2184 REDIS_NOTUSED(mask
);
2186 /* Use writev() if we have enough buffers to send */
2187 if (!server
.glueoutputbuf
&&
2188 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
2189 !(c
->flags
& REDIS_MASTER
))
2191 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
2195 while(listLength(c
->reply
)) {
2196 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
2197 glueReplyBuffersIfNeeded(c
);
2199 o
= listNodeValue(listFirst(c
->reply
));
2200 objlen
= sdslen(o
->ptr
);
2203 listDelNode(c
->reply
,listFirst(c
->reply
));
2207 if (c
->flags
& REDIS_MASTER
) {
2208 /* Don't reply to a master */
2209 nwritten
= objlen
- c
->sentlen
;
2211 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
2212 if (nwritten
<= 0) break;
2214 c
->sentlen
+= nwritten
;
2215 totwritten
+= nwritten
;
2216 /* If we fully sent the object on head go to the next one */
2217 if (c
->sentlen
== objlen
) {
2218 listDelNode(c
->reply
,listFirst(c
->reply
));
2221 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
2222 * bytes, in a single threaded server it's a good idea to serve
2223 * other clients as well, even if a very large request comes from
2224 * super fast link that is always able to accept data (in real world
2225 * scenario think about 'KEYS *' against the loopback interfae) */
2226 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
2228 if (nwritten
== -1) {
2229 if (errno
== EAGAIN
) {
2232 redisLog(REDIS_VERBOSE
,
2233 "Error writing to client: %s", strerror(errno
));
2238 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
2239 if (listLength(c
->reply
) == 0) {
2241 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2245 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
2247 redisClient
*c
= privdata
;
2248 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
2250 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
2251 int offset
, ion
= 0;
2253 REDIS_NOTUSED(mask
);
2256 while (listLength(c
->reply
)) {
2257 offset
= c
->sentlen
;
2261 /* fill-in the iov[] array */
2262 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
2263 o
= listNodeValue(node
);
2264 objlen
= sdslen(o
->ptr
);
2266 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
2269 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2270 break; /* no more iovecs */
2272 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2273 iov
[ion
].iov_len
= objlen
- offset
;
2274 willwrite
+= objlen
- offset
;
2275 offset
= 0; /* just for the first item */
2282 /* write all collected blocks at once */
2283 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2284 if (errno
!= EAGAIN
) {
2285 redisLog(REDIS_VERBOSE
,
2286 "Error writing to client: %s", strerror(errno
));
2293 totwritten
+= nwritten
;
2294 offset
= c
->sentlen
;
2296 /* remove written robjs from c->reply */
2297 while (nwritten
&& listLength(c
->reply
)) {
2298 o
= listNodeValue(listFirst(c
->reply
));
2299 objlen
= sdslen(o
->ptr
);
2301 if(nwritten
>= objlen
- offset
) {
2302 listDelNode(c
->reply
, listFirst(c
->reply
));
2303 nwritten
-= objlen
- offset
;
2307 c
->sentlen
+= nwritten
;
2315 c
->lastinteraction
= time(NULL
);
2317 if (listLength(c
->reply
) == 0) {
2319 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2323 static int qsortRedisCommands(const void *r1
, const void *r2
) {
2325 ((struct redisCommand
*)r1
)->name
,
2326 ((struct redisCommand
*)r2
)->name
);
2329 static void sortCommandTable() {
2330 /* Copy and sort the read-only version of the command table */
2331 commandTable
= (struct redisCommand
*)malloc(sizeof(readonlyCommandTable
));
2332 memcpy(commandTable
,readonlyCommandTable
,sizeof(readonlyCommandTable
));
2334 sizeof(readonlyCommandTable
)/sizeof(struct redisCommand
),
2335 sizeof(struct redisCommand
),qsortRedisCommands
);
2338 static struct redisCommand
*lookupCommand(char *name
) {
2339 struct redisCommand tmp
= {name
,NULL
,0,0,NULL
,0,0,0};
2343 sizeof(readonlyCommandTable
)/sizeof(struct redisCommand
),
2344 sizeof(struct redisCommand
),
2345 qsortRedisCommands
);
2348 /* resetClient prepare the client to process the next command */
2349 static void resetClient(redisClient
*c
) {
2355 /* Call() is the core of Redis execution of a command */
2356 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2359 dirty
= server
.dirty
;
2361 dirty
= server
.dirty
-dirty
;
2363 if (server
.appendonly
&& dirty
)
2364 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2365 if ((dirty
|| cmd
->flags
& REDIS_CMD_FORCE_REPLICATION
) &&
2366 listLength(server
.slaves
))
2367 replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
);
2368 if (listLength(server
.monitors
))
2369 replicationFeedMonitors(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
);
2370 server
.stat_numcommands
++;
2373 /* If this function gets called we already read a whole
2374 * command, argments are in the client argv/argc fields.
2375 * processCommand() execute the command or prepare the
2376 * server for a bulk read from the client.
2378 * If 1 is returned the client is still alive and valid and
2379 * and other operations can be performed by the caller. Otherwise
2380 * if 0 is returned the client was destroied (i.e. after QUIT). */
2381 static int processCommand(redisClient
*c
) {
2382 struct redisCommand
*cmd
;
2384 /* Free some memory if needed (maxmemory setting) */
2385 if (server
.maxmemory
) freeMemoryIfNeeded();
2387 /* Handle the multi bulk command type. This is an alternative protocol
2388 * supported by Redis in order to receive commands that are composed of
2389 * multiple binary-safe "bulk" arguments. The latency of processing is
2390 * a bit higher but this allows things like multi-sets, so if this
2391 * protocol is used only for MSET and similar commands this is a big win. */
2392 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2393 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2394 if (c
->multibulk
<= 0) {
2398 decrRefCount(c
->argv
[c
->argc
-1]);
2402 } else if (c
->multibulk
) {
2403 if (c
->bulklen
== -1) {
2404 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2405 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2409 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2410 decrRefCount(c
->argv
[0]);
2411 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2413 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2418 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2422 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2423 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2427 if (c
->multibulk
== 0) {
2431 /* Here we need to swap the multi-bulk argc/argv with the
2432 * normal argc/argv of the client structure. */
2434 c
->argv
= c
->mbargv
;
2435 c
->mbargv
= auxargv
;
2438 c
->argc
= c
->mbargc
;
2439 c
->mbargc
= auxargc
;
2441 /* We need to set bulklen to something different than -1
2442 * in order for the code below to process the command without
2443 * to try to read the last argument of a bulk command as
2444 * a special argument. */
2446 /* continue below and process the command */
2453 /* -- end of multi bulk commands processing -- */
2455 /* The QUIT command is handled as a special case. Normal command
2456 * procs are unable to close the client connection safely */
2457 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2462 /* Now lookup the command and check ASAP about trivial error conditions
2463 * such wrong arity, bad command name and so forth. */
2464 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2467 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2468 (char*)c
->argv
[0]->ptr
));
2471 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2472 (c
->argc
< -cmd
->arity
)) {
2474 sdscatprintf(sdsempty(),
2475 "-ERR wrong number of arguments for '%s' command\r\n",
2479 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2480 /* This is a bulk command, we have to read the last argument yet. */
2481 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2483 decrRefCount(c
->argv
[c
->argc
-1]);
2484 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2486 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2491 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2492 /* It is possible that the bulk read is already in the
2493 * buffer. Check this condition and handle it accordingly.
2494 * This is just a fast path, alternative to call processInputBuffer().
2495 * It's a good idea since the code is small and this condition
2496 * happens most of the times. */
2497 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2498 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2500 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2502 /* Otherwise return... there is to read the last argument
2503 * from the socket. */
2507 /* Let's try to encode the bulk object to save space. */
2508 if (cmd
->flags
& REDIS_CMD_BULK
)
2509 c
->argv
[c
->argc
-1] = tryObjectEncoding(c
->argv
[c
->argc
-1]);
2511 /* Check if the user is authenticated */
2512 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2513 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2518 /* Handle the maxmemory directive */
2519 if (server
.maxmemory
&& (cmd
->flags
& REDIS_CMD_DENYOOM
) &&
2520 zmalloc_used_memory() > server
.maxmemory
)
2522 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2527 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
2528 if ((dictSize(c
->pubsub_channels
) > 0 || listLength(c
->pubsub_patterns
) > 0)
2530 cmd
->proc
!= subscribeCommand
&& cmd
->proc
!= unsubscribeCommand
&&
2531 cmd
->proc
!= psubscribeCommand
&& cmd
->proc
!= punsubscribeCommand
) {
2532 addReplySds(c
,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n"));
2537 /* Exec the command */
2538 if (c
->flags
& REDIS_MULTI
&&
2539 cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
&&
2540 cmd
->proc
!= multiCommand
&& cmd
->proc
!= watchCommand
)
2542 queueMultiCommand(c
,cmd
);
2543 addReply(c
,shared
.queued
);
2545 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2546 blockClientOnSwappedKeys(c
,cmd
)) return 1;
2550 /* Prepare the client for the next command */
2555 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
) {
2560 /* We need 1+(ARGS*3) objects since commands are using the new protocol
2561 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
2562 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
2563 robj
*static_outv
[REDIS_STATIC_ARGS
*3+1];
2566 if (argc
<= REDIS_STATIC_ARGS
) {
2569 outv
= zmalloc(sizeof(robj
*)*(argc
*3+1));
2572 lenobj
= createObject(REDIS_STRING
,
2573 sdscatprintf(sdsempty(), "*%d\r\n", argc
));
2574 lenobj
->refcount
= 0;
2575 outv
[outc
++] = lenobj
;
2576 for (j
= 0; j
< argc
; j
++) {
2577 lenobj
= createObject(REDIS_STRING
,
2578 sdscatprintf(sdsempty(),"$%lu\r\n",
2579 (unsigned long) stringObjectLen(argv
[j
])));
2580 lenobj
->refcount
= 0;
2581 outv
[outc
++] = lenobj
;
2582 outv
[outc
++] = argv
[j
];
2583 outv
[outc
++] = shared
.crlf
;
2586 /* Increment all the refcounts at start and decrement at end in order to
2587 * be sure to free objects if there is no slave in a replication state
2588 * able to be feed with commands */
2589 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2590 listRewind(slaves
,&li
);
2591 while((ln
= listNext(&li
))) {
2592 redisClient
*slave
= ln
->value
;
2594 /* Don't feed slaves that are still waiting for BGSAVE to start */
2595 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2597 /* Feed all the other slaves, MONITORs and so on */
2598 if (slave
->slaveseldb
!= dictid
) {
2602 case 0: selectcmd
= shared
.select0
; break;
2603 case 1: selectcmd
= shared
.select1
; break;
2604 case 2: selectcmd
= shared
.select2
; break;
2605 case 3: selectcmd
= shared
.select3
; break;
2606 case 4: selectcmd
= shared
.select4
; break;
2607 case 5: selectcmd
= shared
.select5
; break;
2608 case 6: selectcmd
= shared
.select6
; break;
2609 case 7: selectcmd
= shared
.select7
; break;
2610 case 8: selectcmd
= shared
.select8
; break;
2611 case 9: selectcmd
= shared
.select9
; break;
2613 selectcmd
= createObject(REDIS_STRING
,
2614 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2615 selectcmd
->refcount
= 0;
2618 addReply(slave
,selectcmd
);
2619 slave
->slaveseldb
= dictid
;
2621 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2623 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2624 if (outv
!= static_outv
) zfree(outv
);
2627 static sds
sdscatrepr(sds s
, char *p
, size_t len
) {
2628 s
= sdscatlen(s
,"\"",1);
2633 s
= sdscatprintf(s
,"\\%c",*p
);
2635 case '\n': s
= sdscatlen(s
,"\\n",1); break;
2636 case '\r': s
= sdscatlen(s
,"\\r",1); break;
2637 case '\t': s
= sdscatlen(s
,"\\t",1); break;
2638 case '\a': s
= sdscatlen(s
,"\\a",1); break;
2639 case '\b': s
= sdscatlen(s
,"\\b",1); break;
2642 s
= sdscatprintf(s
,"%c",*p
);
2644 s
= sdscatprintf(s
,"\\x%02x",(unsigned char)*p
);
2649 return sdscatlen(s
,"\"",1);
2652 static void replicationFeedMonitors(list
*monitors
, int dictid
, robj
**argv
, int argc
) {
2656 sds cmdrepr
= sdsnew("+");
2660 gettimeofday(&tv
,NULL
);
2661 cmdrepr
= sdscatprintf(cmdrepr
,"%ld.%ld ",(long)tv
.tv_sec
,(long)tv
.tv_usec
);
2662 if (dictid
!= 0) cmdrepr
= sdscatprintf(cmdrepr
,"(db %d) ", dictid
);
2664 for (j
= 0; j
< argc
; j
++) {
2665 if (argv
[j
]->encoding
== REDIS_ENCODING_INT
) {
2666 cmdrepr
= sdscatprintf(cmdrepr
, "%ld", (long)argv
[j
]->ptr
);
2668 cmdrepr
= sdscatrepr(cmdrepr
,(char*)argv
[j
]->ptr
,
2669 sdslen(argv
[j
]->ptr
));
2672 cmdrepr
= sdscatlen(cmdrepr
," ",1);
2674 cmdrepr
= sdscatlen(cmdrepr
,"\r\n",2);
2675 cmdobj
= createObject(REDIS_STRING
,cmdrepr
);
2677 listRewind(monitors
,&li
);
2678 while((ln
= listNext(&li
))) {
2679 redisClient
*monitor
= ln
->value
;
2680 addReply(monitor
,cmdobj
);
2682 decrRefCount(cmdobj
);
2685 static void processInputBuffer(redisClient
*c
) {
2687 /* Before to process the input buffer, make sure the client is not
2688 * waitig for a blocking operation such as BLPOP. Note that the first
2689 * iteration the client is never blocked, otherwise the processInputBuffer
2690 * would not be called at all, but after the execution of the first commands
2691 * in the input buffer the client may be blocked, and the "goto again"
2692 * will try to reiterate. The following line will make it return asap. */
2693 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2694 if (c
->bulklen
== -1) {
2695 /* Read the first line of the query */
2696 char *p
= strchr(c
->querybuf
,'\n');
2703 query
= c
->querybuf
;
2704 c
->querybuf
= sdsempty();
2705 querylen
= 1+(p
-(query
));
2706 if (sdslen(query
) > querylen
) {
2707 /* leave data after the first line of the query in the buffer */
2708 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2710 *p
= '\0'; /* remove "\n" */
2711 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2712 sdsupdatelen(query
);
2714 /* Now we can split the query in arguments */
2715 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2718 if (c
->argv
) zfree(c
->argv
);
2719 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2721 for (j
= 0; j
< argc
; j
++) {
2722 if (sdslen(argv
[j
])) {
2723 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2731 /* Execute the command. If the client is still valid
2732 * after processCommand() return and there is something
2733 * on the query buffer try to process the next command. */
2734 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2736 /* Nothing to process, argc == 0. Just process the query
2737 * buffer if it's not empty or return to the caller */
2738 if (sdslen(c
->querybuf
)) goto again
;
2741 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2742 redisLog(REDIS_VERBOSE
, "Client protocol error");
2747 /* Bulk read handling. Note that if we are at this point
2748 the client already sent a command terminated with a newline,
2749 we are reading the bulk data that is actually the last
2750 argument of the command. */
2751 int qbl
= sdslen(c
->querybuf
);
2753 if (c
->bulklen
<= qbl
) {
2754 /* Copy everything but the final CRLF as final argument */
2755 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2757 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2758 /* Process the command. If the client is still valid after
2759 * the processing and there is more data in the buffer
2760 * try to parse it. */
2761 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2767 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2768 redisClient
*c
= (redisClient
*) privdata
;
2769 char buf
[REDIS_IOBUF_LEN
];
2772 REDIS_NOTUSED(mask
);
2774 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2776 if (errno
== EAGAIN
) {
2779 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2783 } else if (nread
== 0) {
2784 redisLog(REDIS_VERBOSE
, "Client closed connection");
2789 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2790 c
->lastinteraction
= time(NULL
);
2794 processInputBuffer(c
);
2797 static int selectDb(redisClient
*c
, int id
) {
2798 if (id
< 0 || id
>= server
.dbnum
)
2800 c
->db
= &server
.db
[id
];
2804 static void *dupClientReplyValue(void *o
) {
2805 incrRefCount((robj
*)o
);
2809 static int listMatchObjects(void *a
, void *b
) {
2810 return equalStringObjects(a
,b
);
2813 static redisClient
*createClient(int fd
) {
2814 redisClient
*c
= zmalloc(sizeof(*c
));
2816 anetNonBlock(NULL
,fd
);
2817 anetTcpNoDelay(NULL
,fd
);
2818 if (!c
) return NULL
;
2821 c
->querybuf
= sdsempty();
2830 c
->lastinteraction
= time(NULL
);
2831 c
->authenticated
= 0;
2832 c
->replstate
= REDIS_REPL_NONE
;
2833 c
->reply
= listCreate();
2834 listSetFreeMethod(c
->reply
,decrRefCount
);
2835 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2836 c
->blocking_keys
= NULL
;
2837 c
->blocking_keys_num
= 0;
2838 c
->io_keys
= listCreate();
2839 c
->watched_keys
= listCreate();
2840 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2841 c
->pubsub_channels
= dictCreate(&setDictType
,NULL
);
2842 c
->pubsub_patterns
= listCreate();
2843 listSetFreeMethod(c
->pubsub_patterns
,decrRefCount
);
2844 listSetMatchMethod(c
->pubsub_patterns
,listMatchObjects
);
2845 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2846 readQueryFromClient
, c
) == AE_ERR
) {
2850 listAddNodeTail(server
.clients
,c
);
2851 initClientMultiState(c
);
2855 static void addReply(redisClient
*c
, robj
*obj
) {
2856 if (listLength(c
->reply
) == 0 &&
2857 (c
->replstate
== REDIS_REPL_NONE
||
2858 c
->replstate
== REDIS_REPL_ONLINE
) &&
2859 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2860 sendReplyToClient
, c
) == AE_ERR
) return;
2862 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2863 obj
= dupStringObject(obj
);
2864 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2866 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2869 static void addReplySds(redisClient
*c
, sds s
) {
2870 robj
*o
= createObject(REDIS_STRING
,s
);
2875 static void addReplyDouble(redisClient
*c
, double d
) {
2878 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2879 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2880 (unsigned long) strlen(buf
),buf
));
2883 static void addReplyLongLong(redisClient
*c
, long long ll
) {
2888 addReply(c
,shared
.czero
);
2890 } else if (ll
== 1) {
2891 addReply(c
,shared
.cone
);
2895 len
= ll2string(buf
+1,sizeof(buf
)-1,ll
);
2898 addReplySds(c
,sdsnewlen(buf
,len
+3));
2901 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2906 addReply(c
,shared
.czero
);
2908 } else if (ul
== 1) {
2909 addReply(c
,shared
.cone
);
2912 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2913 addReplySds(c
,sdsnewlen(buf
,len
));
2916 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2920 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2921 len
= sdslen(obj
->ptr
);
2923 long n
= (long)obj
->ptr
;
2925 /* Compute how many bytes will take this integer as a radix 10 string */
2931 while((n
= n
/10) != 0) {
2936 intlen
= ll2string(buf
+1,sizeof(buf
)-1,(long long)len
);
2937 buf
[intlen
+1] = '\r';
2938 buf
[intlen
+2] = '\n';
2939 addReplySds(c
,sdsnewlen(buf
,intlen
+3));
2942 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2943 addReplyBulkLen(c
,obj
);
2945 addReply(c
,shared
.crlf
);
2948 static void addReplyBulkSds(redisClient
*c
, sds s
) {
2949 robj
*o
= createStringObject(s
, sdslen(s
));
2954 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2955 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2957 addReply(c
,shared
.nullbulk
);
2959 robj
*o
= createStringObject(s
,strlen(s
));
2965 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2970 REDIS_NOTUSED(mask
);
2971 REDIS_NOTUSED(privdata
);
2973 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2974 if (cfd
== AE_ERR
) {
2975 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2978 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2979 if ((c
= createClient(cfd
)) == NULL
) {
2980 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2981 close(cfd
); /* May be already closed, just ingore errors */
2984 /* If maxclient directive is set and this is one client more... close the
2985 * connection. Note that we create the client instead to check before
2986 * for this condition, since now the socket is already set in nonblocking
2987 * mode and we can send an error for free using the Kernel I/O */
2988 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2989 char *err
= "-ERR max number of clients reached\r\n";
2991 /* That's a best effort error message, don't check write errors */
2992 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2993 /* Nothing to do, Just to avoid the warning... */
2998 server
.stat_numconnections
++;
3001 /* ======================= Redis objects implementation ===================== */
3003 static robj
*createObject(int type
, void *ptr
) {
3006 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
3007 if (listLength(server
.objfreelist
)) {
3008 listNode
*head
= listFirst(server
.objfreelist
);
3009 o
= listNodeValue(head
);
3010 listDelNode(server
.objfreelist
,head
);
3011 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
3013 if (server
.vm_enabled
)
3014 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
3015 o
= zmalloc(sizeof(*o
));
3018 o
->encoding
= REDIS_ENCODING_RAW
;
3021 if (server
.vm_enabled
) {
3022 /* Note that this code may run in the context of an I/O thread
3023 * and accessing server.lruclock in theory is an error
3024 * (no locks). But in practice this is safe, and even if we read
3025 * garbage Redis will not fail. */
3026 o
->lru
= server
.lruclock
;
3027 o
->storage
= REDIS_VM_MEMORY
;
3032 static robj
*createStringObject(char *ptr
, size_t len
) {
3033 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
3036 static robj
*createStringObjectFromLongLong(long long value
) {
3038 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
3039 incrRefCount(shared
.integers
[value
]);
3040 o
= shared
.integers
[value
];
3042 if (value
>= LONG_MIN
&& value
<= LONG_MAX
) {
3043 o
= createObject(REDIS_STRING
, NULL
);
3044 o
->encoding
= REDIS_ENCODING_INT
;
3045 o
->ptr
= (void*)((long)value
);
3047 o
= createObject(REDIS_STRING
,sdsfromlonglong(value
));
3053 static robj
*dupStringObject(robj
*o
) {
3054 assert(o
->encoding
== REDIS_ENCODING_RAW
);
3055 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
3058 static robj
*createListObject(void) {
3059 list
*l
= listCreate();
3060 robj
*o
= createObject(REDIS_LIST
,l
);
3061 listSetFreeMethod(l
,decrRefCount
);
3062 o
->encoding
= REDIS_ENCODING_LIST
;
3066 static robj
*createZiplistObject(void) {
3067 unsigned char *zl
= ziplistNew();
3068 robj
*o
= createObject(REDIS_LIST
,zl
);
3069 o
->encoding
= REDIS_ENCODING_ZIPLIST
;
3073 static robj
*createSetObject(void) {
3074 dict
*d
= dictCreate(&setDictType
,NULL
);
3075 robj
*o
= createObject(REDIS_SET
,d
);
3076 o
->encoding
= REDIS_ENCODING_HT
;
3080 static robj
*createIntsetObject(void) {
3081 intset
*is
= intsetNew();
3082 robj
*o
= createObject(REDIS_SET
,is
);
3083 o
->encoding
= REDIS_ENCODING_INTSET
;
3087 static robj
*createHashObject(void) {
3088 /* All the Hashes start as zipmaps. Will be automatically converted
3089 * into hash tables if there are enough elements or big elements
3091 unsigned char *zm
= zipmapNew();
3092 robj
*o
= createObject(REDIS_HASH
,zm
);
3093 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
3097 static robj
*createZsetObject(void) {
3098 zset
*zs
= zmalloc(sizeof(*zs
));
3100 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
3101 zs
->zsl
= zslCreate();
3102 return createObject(REDIS_ZSET
,zs
);
3105 static void freeStringObject(robj
*o
) {
3106 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3111 static void freeListObject(robj
*o
) {
3112 switch (o
->encoding
) {
3113 case REDIS_ENCODING_LIST
:
3114 listRelease((list
*) o
->ptr
);
3116 case REDIS_ENCODING_ZIPLIST
:
3120 redisPanic("Unknown list encoding type");
3124 static void freeSetObject(robj
*o
) {
3125 switch (o
->encoding
) {
3126 case REDIS_ENCODING_HT
:
3127 dictRelease((dict
*) o
->ptr
);
3129 case REDIS_ENCODING_INTSET
:
3133 redisPanic("Unknown set encoding type");
3137 static void freeZsetObject(robj
*o
) {
3140 dictRelease(zs
->dict
);
3145 static void freeHashObject(robj
*o
) {
3146 switch (o
->encoding
) {
3147 case REDIS_ENCODING_HT
:
3148 dictRelease((dict
*) o
->ptr
);
3150 case REDIS_ENCODING_ZIPMAP
:
3154 redisPanic("Unknown hash encoding type");
3159 static void incrRefCount(robj
*o
) {
3163 static void decrRefCount(void *obj
) {
3166 /* Object is a swapped out value, or in the process of being loaded. */
3167 if (server
.vm_enabled
&&
3168 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
3170 vmpointer
*vp
= obj
;
3171 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(o
);
3172 vmMarkPagesFree(vp
->page
,vp
->usedpages
);
3173 server
.vm_stats_swapped_objects
--;
3178 if (o
->refcount
<= 0) redisPanic("decrRefCount against refcount <= 0");
3179 /* Object is in memory, or in the process of being swapped out.
3181 * If the object is being swapped out, abort the operation on
3182 * decrRefCount even if the refcount does not drop to 0: the object
3183 * is referenced at least two times, as value of the key AND as
3184 * job->val in the iojob. So if we don't invalidate the iojob, when it is
3185 * done but the relevant key was removed in the meantime, the
3186 * complete jobs handler will not find the key about the job and the
3187 * assert will fail. */
3188 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
3189 vmCancelThreadedIOJob(o
);
3190 if (--(o
->refcount
) == 0) {
3192 case REDIS_STRING
: freeStringObject(o
); break;
3193 case REDIS_LIST
: freeListObject(o
); break;
3194 case REDIS_SET
: freeSetObject(o
); break;
3195 case REDIS_ZSET
: freeZsetObject(o
); break;
3196 case REDIS_HASH
: freeHashObject(o
); break;
3197 default: redisPanic("Unknown object type"); break;
3199 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
3200 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
3201 !listAddNodeHead(server
.objfreelist
,o
))
3203 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
3207 static int checkType(redisClient
*c
, robj
*o
, int type
) {
3208 if (o
->type
!= type
) {
3209 addReply(c
,shared
.wrongtypeerr
);
3215 /* Check if the nul-terminated string 's' can be represented by a long
3216 * (that is, is a number that fits into long without any other space or
3217 * character before or after the digits).
3219 * If so, the function returns REDIS_OK and *longval is set to the value
3220 * of the number. Otherwise REDIS_ERR is returned */
3221 static int isStringRepresentableAsLong(sds s
, long *longval
) {
3222 char buf
[32], *endptr
;
3226 value
= strtol(s
, &endptr
, 10);
3227 if (endptr
[0] != '\0') return REDIS_ERR
;
3228 slen
= ll2string(buf
,32,value
);
3230 /* If the number converted back into a string is not identical
3231 * then it's not possible to encode the string as integer */
3232 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
3233 if (longval
) *longval
= value
;
3237 /* Try to encode a string object in order to save space */
3238 static robj
*tryObjectEncoding(robj
*o
) {
3242 if (o
->encoding
!= REDIS_ENCODING_RAW
)
3243 return o
; /* Already encoded */
3245 /* It's not safe to encode shared objects: shared objects can be shared
3246 * everywhere in the "object space" of Redis. Encoded objects can only
3247 * appear as "values" (and not, for instance, as keys) */
3248 if (o
->refcount
> 1) return o
;
3250 /* Currently we try to encode only strings */
3251 redisAssert(o
->type
== REDIS_STRING
);
3253 /* Check if we can represent this string as a long integer */
3254 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return o
;
3256 /* Ok, this object can be encoded */
3257 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
3259 incrRefCount(shared
.integers
[value
]);
3260 return shared
.integers
[value
];
3262 o
->encoding
= REDIS_ENCODING_INT
;
3264 o
->ptr
= (void*) value
;
3269 /* Get a decoded version of an encoded object (returned as a new object).
3270 * If the object is already raw-encoded just increment the ref count. */
3271 static robj
*getDecodedObject(robj
*o
) {
3274 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3278 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
3281 ll2string(buf
,32,(long)o
->ptr
);
3282 dec
= createStringObject(buf
,strlen(buf
));
3285 redisPanic("Unknown encoding type");
3289 /* Compare two string objects via strcmp() or alike.
3290 * Note that the objects may be integer-encoded. In such a case we
3291 * use ll2string() to get a string representation of the numbers on the stack
3292 * and compare the strings, it's much faster than calling getDecodedObject().
3294 * Important note: if objects are not integer encoded, but binary-safe strings,
3295 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
3297 static int compareStringObjects(robj
*a
, robj
*b
) {
3298 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
3299 char bufa
[128], bufb
[128], *astr
, *bstr
;
3302 if (a
== b
) return 0;
3303 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
3304 ll2string(bufa
,sizeof(bufa
),(long) a
->ptr
);
3310 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
3311 ll2string(bufb
,sizeof(bufb
),(long) b
->ptr
);
3317 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
3320 /* Equal string objects return 1 if the two objects are the same from the
3321 * point of view of a string comparison, otherwise 0 is returned. Note that
3322 * this function is faster then checking for (compareStringObject(a,b) == 0)
3323 * because it can perform some more optimization. */
3324 static int equalStringObjects(robj
*a
, robj
*b
) {
3325 if (a
->encoding
!= REDIS_ENCODING_RAW
&& b
->encoding
!= REDIS_ENCODING_RAW
){
3326 return a
->ptr
== b
->ptr
;
3328 return compareStringObjects(a
,b
) == 0;
3332 static size_t stringObjectLen(robj
*o
) {
3333 redisAssert(o
->type
== REDIS_STRING
);
3334 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3335 return sdslen(o
->ptr
);
3339 return ll2string(buf
,32,(long)o
->ptr
);
3343 static int getDoubleFromObject(robj
*o
, double *target
) {
3350 redisAssert(o
->type
== REDIS_STRING
);
3351 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3352 value
= strtod(o
->ptr
, &eptr
);
3353 if (eptr
[0] != '\0') return REDIS_ERR
;
3354 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3355 value
= (long)o
->ptr
;
3357 redisPanic("Unknown string encoding");
3365 static int getDoubleFromObjectOrReply(redisClient
*c
, robj
*o
, double *target
, const char *msg
) {
3367 if (getDoubleFromObject(o
, &value
) != REDIS_OK
) {
3369 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3371 addReplySds(c
, sdsnew("-ERR value is not a double\r\n"));
3380 static int getLongLongFromObject(robj
*o
, long long *target
) {
3387 redisAssert(o
->type
== REDIS_STRING
);
3388 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3389 value
= strtoll(o
->ptr
, &eptr
, 10);
3390 if (eptr
[0] != '\0') return REDIS_ERR
;
3391 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3392 value
= (long)o
->ptr
;
3394 redisPanic("Unknown string encoding");
3398 if (target
) *target
= value
;
3402 static int getLongLongFromObjectOrReply(redisClient
*c
, robj
*o
, long long *target
, const char *msg
) {
3404 if (getLongLongFromObject(o
, &value
) != REDIS_OK
) {
3406 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3408 addReplySds(c
, sdsnew("-ERR value is not an integer\r\n"));
3417 static int getLongFromObjectOrReply(redisClient
*c
, robj
*o
, long *target
, const char *msg
) {
3420 if (getLongLongFromObjectOrReply(c
, o
, &value
, msg
) != REDIS_OK
) return REDIS_ERR
;
3421 if (value
< LONG_MIN
|| value
> LONG_MAX
) {
3423 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3425 addReplySds(c
, sdsnew("-ERR value is out of range\r\n"));
3434 /* =========================== Keyspace access API ========================== */
3436 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
3437 dictEntry
*de
= dictFind(db
->dict
,key
->ptr
);
3439 robj
*val
= dictGetEntryVal(de
);
3441 if (server
.vm_enabled
) {
3442 if (val
->storage
== REDIS_VM_MEMORY
||
3443 val
->storage
== REDIS_VM_SWAPPING
)
3445 /* If we were swapping the object out, cancel the operation */
3446 if (val
->storage
== REDIS_VM_SWAPPING
)
3447 vmCancelThreadedIOJob(val
);
3448 /* Update the access time for the aging algorithm. */
3449 val
->lru
= server
.lruclock
;
3451 int notify
= (val
->storage
== REDIS_VM_LOADING
);
3453 /* Our value was swapped on disk. Bring it at home. */
3454 redisAssert(val
->type
== REDIS_VMPOINTER
);
3455 val
= vmLoadObject(val
);
3456 dictGetEntryVal(de
) = val
;
3458 /* Clients blocked by the VM subsystem may be waiting for
3460 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
3469 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
3470 expireIfNeeded(db
,key
);
3471 return lookupKey(db
,key
);
3474 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
3475 deleteIfVolatile(db
,key
);
3476 touchWatchedKey(db
,key
);
3477 return lookupKey(db
,key
);
3480 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3481 robj
*o
= lookupKeyRead(c
->db
, key
);
3482 if (!o
) addReply(c
,reply
);
3486 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3487 robj
*o
= lookupKeyWrite(c
->db
, key
);
3488 if (!o
) addReply(c
,reply
);
3492 /* Add the key to the DB. If the key already exists REDIS_ERR is returned,
3493 * otherwise REDIS_OK is returned, and the caller should increment the
3494 * refcount of 'val'. */
3495 static int dbAdd(redisDb
*db
, robj
*key
, robj
*val
) {
3496 /* Perform a lookup before adding the key, as we need to copy the
3498 if (dictFind(db
->dict
, key
->ptr
) != NULL
) {
3501 sds copy
= sdsdup(key
->ptr
);
3502 dictAdd(db
->dict
, copy
, val
);
3507 /* If the key does not exist, this is just like dbAdd(). Otherwise
3508 * the value associated to the key is replaced with the new one.
3510 * On update (key already existed) 0 is returned. Otherwise 1. */
3511 static int dbReplace(redisDb
*db
, robj
*key
, robj
*val
) {
3512 if (dictFind(db
->dict
,key
->ptr
) == NULL
) {
3513 sds copy
= sdsdup(key
->ptr
);
3514 dictAdd(db
->dict
, copy
, val
);
3517 dictReplace(db
->dict
, key
->ptr
, val
);
3522 static int dbExists(redisDb
*db
, robj
*key
) {
3523 return dictFind(db
->dict
,key
->ptr
) != NULL
;
3526 /* Return a random key, in form of a Redis object.
3527 * If there are no keys, NULL is returned.
3529 * The function makes sure to return keys not already expired. */
3530 static robj
*dbRandomKey(redisDb
*db
) {
3531 struct dictEntry
*de
;
3537 de
= dictGetRandomKey(db
->dict
);
3538 if (de
== NULL
) return NULL
;
3540 key
= dictGetEntryKey(de
);
3541 keyobj
= createStringObject(key
,sdslen(key
));
3542 if (dictFind(db
->expires
,key
)) {
3543 if (expireIfNeeded(db
,keyobj
)) {
3544 decrRefCount(keyobj
);
3545 continue; /* search for another key. This expired. */
3552 /* Delete a key, value, and associated expiration entry if any, from the DB */
3553 static int dbDelete(redisDb
*db
, robj
*key
) {
3556 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
->ptr
);
3557 retval
= dictDelete(db
->dict
,key
->ptr
);
3559 return retval
== DICT_OK
;
3562 /*============================ RDB saving/loading =========================== */
3564 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3565 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3569 static int rdbSaveTime(FILE *fp
, time_t t
) {
3570 int32_t t32
= (int32_t) t
;
3571 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3575 /* check rdbLoadLen() comments for more info */
3576 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3577 unsigned char buf
[2];
3580 /* Save a 6 bit len */
3581 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3582 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3583 } else if (len
< (1<<14)) {
3584 /* Save a 14 bit len */
3585 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3587 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3589 /* Save a 32 bit len */
3590 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3591 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3593 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3598 /* Encode 'value' as an integer if possible (if integer will fit the
3599 * supported range). If the function sucessful encoded the integer
3600 * then the (up to 5 bytes) encoded representation is written in the
3601 * string pointed by 'enc' and the length is returned. Otherwise
3603 static int rdbEncodeInteger(long long value
, unsigned char *enc
) {
3604 /* Finally check if it fits in our ranges */
3605 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3606 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3607 enc
[1] = value
&0xFF;
3609 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3610 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3611 enc
[1] = value
&0xFF;
3612 enc
[2] = (value
>>8)&0xFF;
3614 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3615 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3616 enc
[1] = value
&0xFF;
3617 enc
[2] = (value
>>8)&0xFF;
3618 enc
[3] = (value
>>16)&0xFF;
3619 enc
[4] = (value
>>24)&0xFF;
3626 /* String objects in the form "2391" "-100" without any space and with a
3627 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3628 * encoded as integers to save space */
3629 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3631 char *endptr
, buf
[32];
3633 /* Check if it's possible to encode this value as a number */
3634 value
= strtoll(s
, &endptr
, 10);
3635 if (endptr
[0] != '\0') return 0;
3636 ll2string(buf
,32,value
);
3638 /* If the number converted back into a string is not identical
3639 * then it's not possible to encode the string as integer */
3640 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3642 return rdbEncodeInteger(value
,enc
);
3645 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3646 size_t comprlen
, outlen
;
3650 /* We require at least four bytes compression for this to be worth it */
3651 if (len
<= 4) return 0;
3653 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3654 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3655 if (comprlen
== 0) {
3659 /* Data compressed! Let's save it on disk */
3660 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3661 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3662 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3663 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3664 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3673 /* Save a string objet as [len][data] on disk. If the object is a string
3674 * representation of an integer value we try to safe it in a special form */
3675 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3678 /* Try integer encoding */
3680 unsigned char buf
[5];
3681 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3682 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3687 /* Try LZF compression - under 20 bytes it's unable to compress even
3688 * aaaaaaaaaaaaaaaaaa so skip it */
3689 if (server
.rdbcompression
&& len
> 20) {
3692 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3693 if (retval
== -1) return -1;
3694 if (retval
> 0) return 0;
3695 /* retval == 0 means data can't be compressed, save the old way */
3698 /* Store verbatim */
3699 if (rdbSaveLen(fp
,len
) == -1) return -1;
3700 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3704 /* Save a long long value as either an encoded string or a string. */
3705 static int rdbSaveLongLongAsStringObject(FILE *fp
, long long value
) {
3706 unsigned char buf
[32];
3707 int enclen
= rdbEncodeInteger(value
,buf
);
3709 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3711 /* Encode as string */
3712 enclen
= ll2string((char*)buf
,32,value
);
3713 redisAssert(enclen
< 32);
3714 if (rdbSaveLen(fp
,enclen
) == -1) return -1;
3715 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3720 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3721 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3722 /* Avoid to decode the object, then encode it again, if the
3723 * object is alrady integer encoded. */
3724 if (obj
->encoding
== REDIS_ENCODING_INT
) {
3725 return rdbSaveLongLongAsStringObject(fp
,(long)obj
->ptr
);
3727 redisAssert(obj
->encoding
== REDIS_ENCODING_RAW
);
3728 return rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3732 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3733 * 8 bit integer specifing the length of the representation.
3734 * This 8 bit integer has special values in order to specify the following
3740 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3741 unsigned char buf
[128];
3747 } else if (!isfinite(val
)) {
3749 buf
[0] = (val
< 0) ? 255 : 254;
3751 #if (DBL_MANT_DIG >= 52) && (LLONG_MAX == 0x7fffffffffffffffLL)
3752 /* Check if the float is in a safe range to be casted into a
3753 * long long. We are assuming that long long is 64 bit here.
3754 * Also we are assuming that there are no implementations around where
3755 * double has precision < 52 bit.
3757 * Under this assumptions we test if a double is inside an interval
3758 * where casting to long long is safe. Then using two castings we
3759 * make sure the decimal part is zero. If all this is true we use
3760 * integer printing function that is much faster. */
3761 double min
= -4503599627370495; /* (2^52)-1 */
3762 double max
= 4503599627370496; /* -(2^52) */
3763 if (val
> min
&& val
< max
&& val
== ((double)((long long)val
)))
3764 ll2string((char*)buf
+1,sizeof(buf
),(long long)val
);
3767 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3768 buf
[0] = strlen((char*)buf
+1);
3771 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3775 /* Save a Redis object. */
3776 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3777 if (o
->type
== REDIS_STRING
) {
3778 /* Save a string value */
3779 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3780 } else if (o
->type
== REDIS_LIST
) {
3781 /* Save a list value */
3782 if (o
->encoding
== REDIS_ENCODING_ZIPLIST
) {
3784 unsigned char *vstr
;
3788 if (rdbSaveLen(fp
,ziplistLen(o
->ptr
)) == -1) return -1;
3789 p
= ziplistIndex(o
->ptr
,0);
3790 while(ziplistGet(p
,&vstr
,&vlen
,&vlong
)) {
3792 if (rdbSaveRawString(fp
,vstr
,vlen
) == -1)
3795 if (rdbSaveLongLongAsStringObject(fp
,vlong
) == -1)
3798 p
= ziplistNext(o
->ptr
,p
);
3800 } else if (o
->encoding
== REDIS_ENCODING_LIST
) {
3801 list
*list
= o
->ptr
;
3805 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3806 listRewind(list
,&li
);
3807 while((ln
= listNext(&li
))) {
3808 robj
*eleobj
= listNodeValue(ln
);
3809 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3812 redisPanic("Unknown list encoding");
3814 } else if (o
->type
== REDIS_SET
) {
3815 /* Save a set value */
3816 if (o
->encoding
== REDIS_ENCODING_HT
) {
3818 dictIterator
*di
= dictGetIterator(set
);
3821 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3822 while((de
= dictNext(di
)) != NULL
) {
3823 robj
*eleobj
= dictGetEntryKey(de
);
3824 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3826 dictReleaseIterator(di
);
3827 } else if (o
->encoding
== REDIS_ENCODING_INTSET
) {
3828 intset
*is
= o
->ptr
;
3832 if (rdbSaveLen(fp
,intsetLen(is
)) == -1) return -1;
3833 while(intsetGet(is
,i
++,&llval
)) {
3834 if (rdbSaveLongLongAsStringObject(fp
,llval
) == -1) return -1;
3837 redisPanic("Unknown set encoding");
3839 } else if (o
->type
== REDIS_ZSET
) {
3840 /* Save a set value */
3842 dictIterator
*di
= dictGetIterator(zs
->dict
);
3845 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3846 while((de
= dictNext(di
)) != NULL
) {
3847 robj
*eleobj
= dictGetEntryKey(de
);
3848 double *score
= dictGetEntryVal(de
);
3850 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3851 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3853 dictReleaseIterator(di
);
3854 } else if (o
->type
== REDIS_HASH
) {
3855 /* Save a hash value */
3856 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3857 unsigned char *p
= zipmapRewind(o
->ptr
);
3858 unsigned int count
= zipmapLen(o
->ptr
);
3859 unsigned char *key
, *val
;
3860 unsigned int klen
, vlen
;
3862 if (rdbSaveLen(fp
,count
) == -1) return -1;
3863 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3864 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3865 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3868 dictIterator
*di
= dictGetIterator(o
->ptr
);
3871 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3872 while((de
= dictNext(di
)) != NULL
) {
3873 robj
*key
= dictGetEntryKey(de
);
3874 robj
*val
= dictGetEntryVal(de
);
3876 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3877 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3879 dictReleaseIterator(di
);
3882 redisPanic("Unknown object type");
3887 /* Return the length the object will have on disk if saved with
3888 * the rdbSaveObject() function. Currently we use a trick to get
3889 * this length with very little changes to the code. In the future
3890 * we could switch to a faster solution. */
3891 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3892 if (fp
== NULL
) fp
= server
.devnull
;
3894 assert(rdbSaveObject(fp
,o
) != 1);
3898 /* Return the number of pages required to save this object in the swap file */
3899 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3900 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3902 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3905 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3906 static int rdbSave(char *filename
) {
3907 dictIterator
*di
= NULL
;
3912 time_t now
= time(NULL
);
3914 /* Wait for I/O therads to terminate, just in case this is a
3915 * foreground-saving, to avoid seeking the swap file descriptor at the
3917 if (server
.vm_enabled
)
3918 waitEmptyIOJobsQueue();
3920 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3921 fp
= fopen(tmpfile
,"w");
3923 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3926 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3927 for (j
= 0; j
< server
.dbnum
; j
++) {
3928 redisDb
*db
= server
.db
+j
;
3930 if (dictSize(d
) == 0) continue;
3931 di
= dictGetIterator(d
);
3937 /* Write the SELECT DB opcode */
3938 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3939 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3941 /* Iterate this DB writing every entry */
3942 while((de
= dictNext(di
)) != NULL
) {
3943 sds keystr
= dictGetEntryKey(de
);
3944 robj key
, *o
= dictGetEntryVal(de
);
3947 initStaticStringObject(key
,keystr
);
3948 expiretime
= getExpire(db
,&key
);
3950 /* Save the expire time */
3951 if (expiretime
!= -1) {
3952 /* If this key is already expired skip it */
3953 if (expiretime
< now
) continue;
3954 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3955 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3957 /* Save the key and associated value. This requires special
3958 * handling if the value is swapped out. */
3959 if (!server
.vm_enabled
|| o
->storage
== REDIS_VM_MEMORY
||
3960 o
->storage
== REDIS_VM_SWAPPING
) {
3961 /* Save type, key, value */
3962 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3963 if (rdbSaveStringObject(fp
,&key
) == -1) goto werr
;
3964 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3966 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3968 /* Get a preview of the object in memory */
3969 po
= vmPreviewObject(o
);
3970 /* Save type, key, value */
3971 if (rdbSaveType(fp
,po
->type
) == -1) goto werr
;
3972 if (rdbSaveStringObject(fp
,&key
) == -1) goto werr
;
3973 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3974 /* Remove the loaded object from memory */
3978 dictReleaseIterator(di
);
3981 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3983 /* Make sure data will not remain on the OS's output buffers */
3988 /* Use RENAME to make sure the DB file is changed atomically only
3989 * if the generate DB file is ok. */
3990 if (rename(tmpfile
,filename
) == -1) {
3991 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3995 redisLog(REDIS_NOTICE
,"DB saved on disk");
3997 server
.lastsave
= time(NULL
);
4003 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
4004 if (di
) dictReleaseIterator(di
);
4008 static int rdbSaveBackground(char *filename
) {
4011 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
4012 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
4013 if ((childpid
= fork()) == 0) {
4015 if (server
.vm_enabled
) vmReopenSwapFile();
4017 if (rdbSave(filename
) == REDIS_OK
) {
4024 if (childpid
== -1) {
4025 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
4029 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
4030 server
.bgsavechildpid
= childpid
;
4031 updateDictResizePolicy();
4034 return REDIS_OK
; /* unreached */
4037 static void rdbRemoveTempFile(pid_t childpid
) {
4040 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
4044 static int rdbLoadType(FILE *fp
) {
4046 if (fread(&type
,1,1,fp
) == 0) return -1;
4050 static time_t rdbLoadTime(FILE *fp
) {
4052 if (fread(&t32
,4,1,fp
) == 0) return -1;
4053 return (time_t) t32
;
4056 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
4057 * of this file for a description of how this are stored on disk.
4059 * isencoded is set to 1 if the readed length is not actually a length but
4060 * an "encoding type", check the above comments for more info */
4061 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
4062 unsigned char buf
[2];
4066 if (isencoded
) *isencoded
= 0;
4067 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
4068 type
= (buf
[0]&0xC0)>>6;
4069 if (type
== REDIS_RDB_6BITLEN
) {
4070 /* Read a 6 bit len */
4072 } else if (type
== REDIS_RDB_ENCVAL
) {
4073 /* Read a 6 bit len encoding type */
4074 if (isencoded
) *isencoded
= 1;
4076 } else if (type
== REDIS_RDB_14BITLEN
) {
4077 /* Read a 14 bit len */
4078 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
4079 return ((buf
[0]&0x3F)<<8)|buf
[1];
4081 /* Read a 32 bit len */
4082 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
4087 /* Load an integer-encoded object from file 'fp', with the specified
4088 * encoding type 'enctype'. If encode is true the function may return
4089 * an integer-encoded object as reply, otherwise the returned object
4090 * will always be encoded as a raw string. */
4091 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
, int encode
) {
4092 unsigned char enc
[4];
4095 if (enctype
== REDIS_RDB_ENC_INT8
) {
4096 if (fread(enc
,1,1,fp
) == 0) return NULL
;
4097 val
= (signed char)enc
[0];
4098 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
4100 if (fread(enc
,2,1,fp
) == 0) return NULL
;
4101 v
= enc
[0]|(enc
[1]<<8);
4103 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
4105 if (fread(enc
,4,1,fp
) == 0) return NULL
;
4106 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
4109 val
= 0; /* anti-warning */
4110 redisPanic("Unknown RDB integer encoding type");
4113 return createStringObjectFromLongLong(val
);
4115 return createObject(REDIS_STRING
,sdsfromlonglong(val
));
4118 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
4119 unsigned int len
, clen
;
4120 unsigned char *c
= NULL
;
4123 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4124 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4125 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
4126 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
4127 if (fread(c
,clen
,1,fp
) == 0) goto err
;
4128 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
4130 return createObject(REDIS_STRING
,val
);
4137 static robj
*rdbGenericLoadStringObject(FILE*fp
, int encode
) {
4142 len
= rdbLoadLen(fp
,&isencoded
);
4145 case REDIS_RDB_ENC_INT8
:
4146 case REDIS_RDB_ENC_INT16
:
4147 case REDIS_RDB_ENC_INT32
:
4148 return rdbLoadIntegerObject(fp
,len
,encode
);
4149 case REDIS_RDB_ENC_LZF
:
4150 return rdbLoadLzfStringObject(fp
);
4152 redisPanic("Unknown RDB encoding type");
4156 if (len
== REDIS_RDB_LENERR
) return NULL
;
4157 val
= sdsnewlen(NULL
,len
);
4158 if (len
&& fread(val
,len
,1,fp
) == 0) {
4162 return createObject(REDIS_STRING
,val
);
4165 static robj
*rdbLoadStringObject(FILE *fp
) {
4166 return rdbGenericLoadStringObject(fp
,0);
4169 static robj
*rdbLoadEncodedStringObject(FILE *fp
) {
4170 return rdbGenericLoadStringObject(fp
,1);
4173 /* For information about double serialization check rdbSaveDoubleValue() */
4174 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
4178 if (fread(&len
,1,1,fp
) == 0) return -1;
4180 case 255: *val
= R_NegInf
; return 0;
4181 case 254: *val
= R_PosInf
; return 0;
4182 case 253: *val
= R_Nan
; return 0;
4184 if (fread(buf
,len
,1,fp
) == 0) return -1;
4186 sscanf(buf
, "%lg", val
);
4191 /* Load a Redis object of the specified type from the specified file.
4192 * On success a newly allocated object is returned, otherwise NULL. */
4193 static robj
*rdbLoadObject(int type
, FILE *fp
) {
4194 robj
*o
, *ele
, *dec
;
4197 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
4198 if (type
== REDIS_STRING
) {
4199 /* Read string value */
4200 if ((o
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4201 o
= tryObjectEncoding(o
);
4202 } else if (type
== REDIS_LIST
) {
4203 /* Read list value */
4204 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4206 /* Use a real list when there are too many entries */
4207 if (len
> server
.list_max_ziplist_entries
) {
4208 o
= createListObject();
4210 o
= createZiplistObject();
4213 /* Load every single element of the list */
4215 if ((ele
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4217 /* If we are using a ziplist and the value is too big, convert
4218 * the object to a real list. */
4219 if (o
->encoding
== REDIS_ENCODING_ZIPLIST
&&
4220 ele
->encoding
== REDIS_ENCODING_RAW
&&
4221 sdslen(ele
->ptr
) > server
.list_max_ziplist_value
)
4222 listTypeConvert(o
,REDIS_ENCODING_LIST
);
4224 if (o
->encoding
== REDIS_ENCODING_ZIPLIST
) {
4225 dec
= getDecodedObject(ele
);
4226 o
->ptr
= ziplistPush(o
->ptr
,dec
->ptr
,sdslen(dec
->ptr
),REDIS_TAIL
);
4230 ele
= tryObjectEncoding(ele
);
4231 listAddNodeTail(o
->ptr
,ele
);
4234 } else if (type
== REDIS_SET
) {
4235 /* Read list/set value */
4236 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4238 /* Use a regular set when there are too many entries. */
4239 if (len
> server
.set_max_intset_entries
) {
4240 o
= createSetObject();
4241 /* It's faster to expand the dict to the right size asap in order
4242 * to avoid rehashing */
4243 if (len
> DICT_HT_INITIAL_SIZE
)
4244 dictExpand(o
->ptr
,len
);
4246 o
= createIntsetObject();
4249 /* Load every single element of the list/set */
4252 if ((ele
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4253 ele
= tryObjectEncoding(ele
);
4255 if (o
->encoding
== REDIS_ENCODING_INTSET
) {
4256 /* Fetch integer value from element */
4257 if (getLongLongFromObject(ele
,&llval
) == REDIS_OK
) {
4258 o
->ptr
= intsetAdd(o
->ptr
,llval
,NULL
);
4260 setTypeConvert(o
,REDIS_ENCODING_HT
);
4264 /* This will also be called when the set was just converted
4265 * to regular hashtable encoded set */
4266 if (o
->encoding
== REDIS_ENCODING_HT
) {
4267 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
4270 } else if (type
== REDIS_ZSET
) {
4271 /* Read list/set value */
4275 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4276 o
= createZsetObject();
4278 /* Load every single element of the list/set */
4281 double *score
= zmalloc(sizeof(double));
4283 if ((ele
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4284 ele
= tryObjectEncoding(ele
);
4285 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
4286 dictAdd(zs
->dict
,ele
,score
);
4287 zslInsert(zs
->zsl
,*score
,ele
);
4288 incrRefCount(ele
); /* added to skiplist */
4290 } else if (type
== REDIS_HASH
) {
4293 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4294 o
= createHashObject();
4295 /* Too many entries? Use an hash table. */
4296 if (hashlen
> server
.hash_max_zipmap_entries
)
4297 convertToRealHash(o
);
4298 /* Load every key/value, then set it into the zipmap or hash
4299 * table, as needed. */
4303 if ((key
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4304 if ((val
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4305 /* If we are using a zipmap and there are too big values
4306 * the object is converted to real hash table encoding. */
4307 if (o
->encoding
!= REDIS_ENCODING_HT
&&
4308 (sdslen(key
->ptr
) > server
.hash_max_zipmap_value
||
4309 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
))
4311 convertToRealHash(o
);
4314 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
4315 unsigned char *zm
= o
->ptr
;
4317 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
4318 val
->ptr
,sdslen(val
->ptr
),NULL
);
4323 key
= tryObjectEncoding(key
);
4324 val
= tryObjectEncoding(val
);
4325 dictAdd((dict
*)o
->ptr
,key
,val
);
4329 redisPanic("Unknown object type");
4334 static int rdbLoad(char *filename
) {
4337 int type
, retval
, rdbver
;
4338 int swap_all_values
= 0;
4339 redisDb
*db
= server
.db
+0;
4341 time_t expiretime
, now
= time(NULL
);
4343 fp
= fopen(filename
,"r");
4344 if (!fp
) return REDIS_ERR
;
4345 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
4347 if (memcmp(buf
,"REDIS",5) != 0) {
4349 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
4352 rdbver
= atoi(buf
+5);
4355 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
4364 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
4365 if (type
== REDIS_EXPIRETIME
) {
4366 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
4367 /* We read the time so we need to read the object type again */
4368 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
4370 if (type
== REDIS_EOF
) break;
4371 /* Handle SELECT DB opcode as a special case */
4372 if (type
== REDIS_SELECTDB
) {
4373 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
4375 if (dbid
>= (unsigned)server
.dbnum
) {
4376 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
4379 db
= server
.db
+dbid
;
4383 if ((key
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
4385 if ((val
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
4386 /* Check if the key already expired */
4387 if (expiretime
!= -1 && expiretime
< now
) {
4392 /* Add the new object in the hash table */
4393 retval
= dbAdd(db
,key
,val
);
4394 if (retval
== REDIS_ERR
) {
4395 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", key
->ptr
);
4398 /* Set the expire time if needed */
4399 if (expiretime
!= -1) setExpire(db
,key
,expiretime
);
4401 /* Handle swapping while loading big datasets when VM is on */
4403 /* If we detecter we are hopeless about fitting something in memory
4404 * we just swap every new key on disk. Directly...
4405 * Note that's important to check for this condition before resorting
4406 * to random sampling, otherwise we may try to swap already
4408 if (swap_all_values
) {
4409 dictEntry
*de
= dictFind(db
->dict
,key
->ptr
);
4411 /* de may be NULL since the key already expired */
4414 val
= dictGetEntryVal(de
);
4416 if (val
->refcount
== 1 &&
4417 (vp
= vmSwapObjectBlocking(val
)) != NULL
)
4418 dictGetEntryVal(de
) = vp
;
4425 /* Flush data on disk once 32 MB of additional RAM are used... */
4427 if ((zmalloc_used_memory() - server
.vm_max_memory
) > 1024*1024*32)
4430 /* If we have still some hope of having some value fitting memory
4431 * then we try random sampling. */
4432 if (!swap_all_values
&& server
.vm_enabled
&& force_swapout
) {
4433 while (zmalloc_used_memory() > server
.vm_max_memory
) {
4434 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
4436 if (zmalloc_used_memory() > server
.vm_max_memory
)
4437 swap_all_values
= 1; /* We are already using too much mem */
4443 eoferr
: /* unexpected end of file is handled here with a fatal exit */
4444 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
4446 return REDIS_ERR
; /* Just to avoid warning */
4449 /*================================== Shutdown =============================== */
4450 static int prepareForShutdown() {
4451 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4452 /* Kill the saving child if there is a background saving in progress.
4453 We want to avoid race conditions, for instance our saving child may
4454 overwrite the synchronous saving did by SHUTDOWN. */
4455 if (server
.bgsavechildpid
!= -1) {
4456 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4457 kill(server
.bgsavechildpid
,SIGKILL
);
4458 rdbRemoveTempFile(server
.bgsavechildpid
);
4460 if (server
.appendonly
) {
4461 /* Append only file: fsync() the AOF and exit */
4462 aof_fsync(server
.appendfd
);
4463 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4465 /* Snapshotting. Perform a SYNC SAVE and exit */
4466 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4467 if (server
.daemonize
)
4468 unlink(server
.pidfile
);
4469 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4471 /* Ooops.. error saving! The best we can do is to continue
4472 * operating. Note that if there was a background saving process,
4473 * in the next cron() Redis will be notified that the background
4474 * saving aborted, handling special stuff like slaves pending for
4475 * synchronization... */
4476 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4480 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4484 /*================================== Commands =============================== */
4486 static void authCommand(redisClient
*c
) {
4487 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
4488 c
->authenticated
= 1;
4489 addReply(c
,shared
.ok
);
4491 c
->authenticated
= 0;
4492 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
4496 static void pingCommand(redisClient
*c
) {
4497 addReply(c
,shared
.pong
);
4500 static void echoCommand(redisClient
*c
) {
4501 addReplyBulk(c
,c
->argv
[1]);
4504 /*=================================== Strings =============================== */
4506 static void setGenericCommand(redisClient
*c
, int nx
, robj
*key
, robj
*val
, robj
*expire
) {
4508 long seconds
= 0; /* initialized to avoid an harmness warning */
4511 if (getLongFromObjectOrReply(c
, expire
, &seconds
, NULL
) != REDIS_OK
)
4514 addReplySds(c
,sdsnew("-ERR invalid expire time in SETEX\r\n"));
4519 touchWatchedKey(c
->db
,key
);
4520 if (nx
) deleteIfVolatile(c
->db
,key
);
4521 retval
= dbAdd(c
->db
,key
,val
);
4522 if (retval
== REDIS_ERR
) {
4524 dbReplace(c
->db
,key
,val
);
4527 addReply(c
,shared
.czero
);
4534 removeExpire(c
->db
,key
);
4535 if (expire
) setExpire(c
->db
,key
,time(NULL
)+seconds
);
4536 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4539 static void setCommand(redisClient
*c
) {
4540 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[2],NULL
);
4543 static void setnxCommand(redisClient
*c
) {
4544 setGenericCommand(c
,1,c
->argv
[1],c
->argv
[2],NULL
);
4547 static void setexCommand(redisClient
*c
) {
4548 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[3],c
->argv
[2]);
4551 static int getGenericCommand(redisClient
*c
) {
4554 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
4557 if (o
->type
!= REDIS_STRING
) {
4558 addReply(c
,shared
.wrongtypeerr
);
4566 static void getCommand(redisClient
*c
) {
4567 getGenericCommand(c
);
4570 static void getsetCommand(redisClient
*c
) {
4571 if (getGenericCommand(c
) == REDIS_ERR
) return;
4572 dbReplace(c
->db
,c
->argv
[1],c
->argv
[2]);
4573 incrRefCount(c
->argv
[2]);
4575 removeExpire(c
->db
,c
->argv
[1]);
4578 static void mgetCommand(redisClient
*c
) {
4581 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
4582 for (j
= 1; j
< c
->argc
; j
++) {
4583 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
4585 addReply(c
,shared
.nullbulk
);
4587 if (o
->type
!= REDIS_STRING
) {
4588 addReply(c
,shared
.nullbulk
);
4596 static void msetGenericCommand(redisClient
*c
, int nx
) {
4597 int j
, busykeys
= 0;
4599 if ((c
->argc
% 2) == 0) {
4600 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
4603 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
4604 * set nothing at all if at least one already key exists. */
4606 for (j
= 1; j
< c
->argc
; j
+= 2) {
4607 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
4613 addReply(c
, shared
.czero
);
4617 for (j
= 1; j
< c
->argc
; j
+= 2) {
4618 c
->argv
[j
+1] = tryObjectEncoding(c
->argv
[j
+1]);
4619 dbReplace(c
->db
,c
->argv
[j
],c
->argv
[j
+1]);
4620 incrRefCount(c
->argv
[j
+1]);
4621 removeExpire(c
->db
,c
->argv
[j
]);
4623 server
.dirty
+= (c
->argc
-1)/2;
4624 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4627 static void msetCommand(redisClient
*c
) {
4628 msetGenericCommand(c
,0);
4631 static void msetnxCommand(redisClient
*c
) {
4632 msetGenericCommand(c
,1);
4635 static void incrDecrCommand(redisClient
*c
, long long incr
) {
4639 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4640 if (o
!= NULL
&& checkType(c
,o
,REDIS_STRING
)) return;
4641 if (getLongLongFromObjectOrReply(c
,o
,&value
,NULL
) != REDIS_OK
) return;
4644 o
= createStringObjectFromLongLong(value
);
4645 dbReplace(c
->db
,c
->argv
[1],o
);
4647 addReply(c
,shared
.colon
);
4649 addReply(c
,shared
.crlf
);
4652 static void incrCommand(redisClient
*c
) {
4653 incrDecrCommand(c
,1);
4656 static void decrCommand(redisClient
*c
) {
4657 incrDecrCommand(c
,-1);
4660 static void incrbyCommand(redisClient
*c
) {
4663 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4664 incrDecrCommand(c
,incr
);
4667 static void decrbyCommand(redisClient
*c
) {
4670 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4671 incrDecrCommand(c
,-incr
);
4674 static void appendCommand(redisClient
*c
) {
4679 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4681 /* Create the key */
4682 retval
= dbAdd(c
->db
,c
->argv
[1],c
->argv
[2]);
4683 incrRefCount(c
->argv
[2]);
4684 totlen
= stringObjectLen(c
->argv
[2]);
4686 if (o
->type
!= REDIS_STRING
) {
4687 addReply(c
,shared
.wrongtypeerr
);
4690 /* If the object is specially encoded or shared we have to make
4692 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
4693 robj
*decoded
= getDecodedObject(o
);
4695 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
4696 decrRefCount(decoded
);
4697 dbReplace(c
->db
,c
->argv
[1],o
);
4700 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
4701 o
->ptr
= sdscatlen(o
->ptr
,
4702 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
4704 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
4705 (unsigned long) c
->argv
[2]->ptr
);
4707 totlen
= sdslen(o
->ptr
);
4710 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
4713 static void substrCommand(redisClient
*c
) {
4715 long start
= atoi(c
->argv
[2]->ptr
);
4716 long end
= atoi(c
->argv
[3]->ptr
);
4717 size_t rangelen
, strlen
;
4720 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4721 checkType(c
,o
,REDIS_STRING
)) return;
4723 o
= getDecodedObject(o
);
4724 strlen
= sdslen(o
->ptr
);
4726 /* convert negative indexes */
4727 if (start
< 0) start
= strlen
+start
;
4728 if (end
< 0) end
= strlen
+end
;
4729 if (start
< 0) start
= 0;
4730 if (end
< 0) end
= 0;
4732 /* indexes sanity checks */
4733 if (start
> end
|| (size_t)start
>= strlen
) {
4734 /* Out of range start or start > end result in null reply */
4735 addReply(c
,shared
.nullbulk
);
4739 if ((size_t)end
>= strlen
) end
= strlen
-1;
4740 rangelen
= (end
-start
)+1;
4742 /* Return the result */
4743 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4744 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4745 addReplySds(c
,range
);
4746 addReply(c
,shared
.crlf
);
4750 /* ========================= Type agnostic commands ========================= */
4752 static void delCommand(redisClient
*c
) {
4755 for (j
= 1; j
< c
->argc
; j
++) {
4756 if (dbDelete(c
->db
,c
->argv
[j
])) {
4757 touchWatchedKey(c
->db
,c
->argv
[j
]);
4762 addReplyLongLong(c
,deleted
);
4765 static void existsCommand(redisClient
*c
) {
4766 expireIfNeeded(c
->db
,c
->argv
[1]);
4767 if (dbExists(c
->db
,c
->argv
[1])) {
4768 addReply(c
, shared
.cone
);
4770 addReply(c
, shared
.czero
);
4774 static void selectCommand(redisClient
*c
) {
4775 int id
= atoi(c
->argv
[1]->ptr
);
4777 if (selectDb(c
,id
) == REDIS_ERR
) {
4778 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4780 addReply(c
,shared
.ok
);
4784 static void randomkeyCommand(redisClient
*c
) {
4787 if ((key
= dbRandomKey(c
->db
)) == NULL
) {
4788 addReply(c
,shared
.nullbulk
);
4792 addReplyBulk(c
,key
);
4796 static void keysCommand(redisClient
*c
) {
4799 sds pattern
= c
->argv
[1]->ptr
;
4800 int plen
= sdslen(pattern
);
4801 unsigned long numkeys
= 0;
4802 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4804 di
= dictGetIterator(c
->db
->dict
);
4806 decrRefCount(lenobj
);
4807 while((de
= dictNext(di
)) != NULL
) {
4808 sds key
= dictGetEntryKey(de
);
4811 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4812 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4813 keyobj
= createStringObject(key
,sdslen(key
));
4814 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4815 addReplyBulk(c
,keyobj
);
4818 decrRefCount(keyobj
);
4821 dictReleaseIterator(di
);
4822 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4825 static void dbsizeCommand(redisClient
*c
) {
4827 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4830 static void lastsaveCommand(redisClient
*c
) {
4832 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4835 static void typeCommand(redisClient
*c
) {
4839 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4844 case REDIS_STRING
: type
= "+string"; break;
4845 case REDIS_LIST
: type
= "+list"; break;
4846 case REDIS_SET
: type
= "+set"; break;
4847 case REDIS_ZSET
: type
= "+zset"; break;
4848 case REDIS_HASH
: type
= "+hash"; break;
4849 default: type
= "+unknown"; break;
4852 addReplySds(c
,sdsnew(type
));
4853 addReply(c
,shared
.crlf
);
4856 static void saveCommand(redisClient
*c
) {
4857 if (server
.bgsavechildpid
!= -1) {
4858 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4861 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4862 addReply(c
,shared
.ok
);
4864 addReply(c
,shared
.err
);
4868 static void bgsaveCommand(redisClient
*c
) {
4869 if (server
.bgsavechildpid
!= -1) {
4870 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4873 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4874 char *status
= "+Background saving started\r\n";
4875 addReplySds(c
,sdsnew(status
));
4877 addReply(c
,shared
.err
);
4881 static void shutdownCommand(redisClient
*c
) {
4882 if (prepareForShutdown() == REDIS_OK
)
4884 addReplySds(c
, sdsnew("-ERR Errors trying to SHUTDOWN. Check logs.\r\n"));
4887 static void renameGenericCommand(redisClient
*c
, int nx
) {
4890 /* To use the same key as src and dst is probably an error */
4891 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4892 addReply(c
,shared
.sameobjecterr
);
4896 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4900 deleteIfVolatile(c
->db
,c
->argv
[2]);
4901 if (dbAdd(c
->db
,c
->argv
[2],o
) == REDIS_ERR
) {
4904 addReply(c
,shared
.czero
);
4907 dbReplace(c
->db
,c
->argv
[2],o
);
4909 dbDelete(c
->db
,c
->argv
[1]);
4910 touchWatchedKey(c
->db
,c
->argv
[2]);
4912 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4915 static void renameCommand(redisClient
*c
) {
4916 renameGenericCommand(c
,0);
4919 static void renamenxCommand(redisClient
*c
) {
4920 renameGenericCommand(c
,1);
4923 static void moveCommand(redisClient
*c
) {
4928 /* Obtain source and target DB pointers */
4931 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4932 addReply(c
,shared
.outofrangeerr
);
4936 selectDb(c
,srcid
); /* Back to the source DB */
4938 /* If the user is moving using as target the same
4939 * DB as the source DB it is probably an error. */
4941 addReply(c
,shared
.sameobjecterr
);
4945 /* Check if the element exists and get a reference */
4946 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4948 addReply(c
,shared
.czero
);
4952 /* Try to add the element to the target DB */
4953 deleteIfVolatile(dst
,c
->argv
[1]);
4954 if (dbAdd(dst
,c
->argv
[1],o
) == REDIS_ERR
) {
4955 addReply(c
,shared
.czero
);
4960 /* OK! key moved, free the entry in the source DB */
4961 dbDelete(src
,c
->argv
[1]);
4963 addReply(c
,shared
.cone
);
4966 /* =================================== Lists ================================ */
4969 /* Check the argument length to see if it requires us to convert the ziplist
4970 * to a real list. Only check raw-encoded objects because integer encoded
4971 * objects are never too long. */
4972 static void listTypeTryConversion(robj
*subject
, robj
*value
) {
4973 if (subject
->encoding
!= REDIS_ENCODING_ZIPLIST
) return;
4974 if (value
->encoding
== REDIS_ENCODING_RAW
&&
4975 sdslen(value
->ptr
) > server
.list_max_ziplist_value
)
4976 listTypeConvert(subject
,REDIS_ENCODING_LIST
);
4979 static void listTypePush(robj
*subject
, robj
*value
, int where
) {
4980 /* Check if we need to convert the ziplist */
4981 listTypeTryConversion(subject
,value
);
4982 if (subject
->encoding
== REDIS_ENCODING_ZIPLIST
&&
4983 ziplistLen(subject
->ptr
) > server
.list_max_ziplist_entries
)
4984 listTypeConvert(subject
,REDIS_ENCODING_LIST
);
4986 if (subject
->encoding
== REDIS_ENCODING_ZIPLIST
) {
4987 int pos
= (where
== REDIS_HEAD
) ? ZIPLIST_HEAD
: ZIPLIST_TAIL
;
4988 value
= getDecodedObject(value
);
4989 subject
->ptr
= ziplistPush(subject
->ptr
,value
->ptr
,sdslen(value
->ptr
),pos
);
4990 decrRefCount(value
);
4991 } else if (subject
->encoding
== REDIS_ENCODING_LIST
) {
4992 if (where
== REDIS_HEAD
) {
4993 listAddNodeHead(subject
->ptr
,value
);
4995 listAddNodeTail(subject
->ptr
,value
);
4997 incrRefCount(value
);
4999 redisPanic("Unknown list encoding");
5003 static robj
*listTypePop(robj
*subject
, int where
) {
5005 if (subject
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5007 unsigned char *vstr
;
5010 int pos
= (where
== REDIS_HEAD
) ? 0 : -1;
5011 p
= ziplistIndex(subject
->ptr
,pos
);
5012 if (ziplistGet(p
,&vstr
,&vlen
,&vlong
)) {
5014 value
= createStringObject((char*)vstr
,vlen
);
5016 value
= createStringObjectFromLongLong(vlong
);
5018 /* We only need to delete an element when it exists */
5019 subject
->ptr
= ziplistDelete(subject
->ptr
,&p
);
5021 } else if (subject
->encoding
== REDIS_ENCODING_LIST
) {
5022 list
*list
= subject
->ptr
;
5024 if (where
== REDIS_HEAD
) {
5025 ln
= listFirst(list
);
5027 ln
= listLast(list
);
5030 value
= listNodeValue(ln
);
5031 incrRefCount(value
);
5032 listDelNode(list
,ln
);
5035 redisPanic("Unknown list encoding");
5040 static unsigned long listTypeLength(robj
*subject
) {
5041 if (subject
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5042 return ziplistLen(subject
->ptr
);
5043 } else if (subject
->encoding
== REDIS_ENCODING_LIST
) {
5044 return listLength((list
*)subject
->ptr
);
5046 redisPanic("Unknown list encoding");
5050 /* Structure to hold set iteration abstraction. */
5053 unsigned char encoding
;
5054 unsigned char direction
; /* Iteration direction */
5059 /* Structure for an entry while iterating over a list. */
5061 listTypeIterator
*li
;
5062 unsigned char *zi
; /* Entry in ziplist */
5063 listNode
*ln
; /* Entry in linked list */
5066 /* Initialize an iterator at the specified index. */
5067 static listTypeIterator
*listTypeInitIterator(robj
*subject
, int index
, unsigned char direction
) {
5068 listTypeIterator
*li
= zmalloc(sizeof(listTypeIterator
));
5069 li
->subject
= subject
;
5070 li
->encoding
= subject
->encoding
;
5071 li
->direction
= direction
;
5072 if (li
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5073 li
->zi
= ziplistIndex(subject
->ptr
,index
);
5074 } else if (li
->encoding
== REDIS_ENCODING_LIST
) {
5075 li
->ln
= listIndex(subject
->ptr
,index
);
5077 redisPanic("Unknown list encoding");
5082 /* Clean up the iterator. */
5083 static void listTypeReleaseIterator(listTypeIterator
*li
) {
5087 /* Stores pointer to current the entry in the provided entry structure
5088 * and advances the position of the iterator. Returns 1 when the current
5089 * entry is in fact an entry, 0 otherwise. */
5090 static int listTypeNext(listTypeIterator
*li
, listTypeEntry
*entry
) {
5091 /* Protect from converting when iterating */
5092 redisAssert(li
->subject
->encoding
== li
->encoding
);
5095 if (li
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5097 if (entry
->zi
!= NULL
) {
5098 if (li
->direction
== REDIS_TAIL
)
5099 li
->zi
= ziplistNext(li
->subject
->ptr
,li
->zi
);
5101 li
->zi
= ziplistPrev(li
->subject
->ptr
,li
->zi
);
5104 } else if (li
->encoding
== REDIS_ENCODING_LIST
) {
5106 if (entry
->ln
!= NULL
) {
5107 if (li
->direction
== REDIS_TAIL
)
5108 li
->ln
= li
->ln
->next
;
5110 li
->ln
= li
->ln
->prev
;
5114 redisPanic("Unknown list encoding");
5119 /* Return entry or NULL at the current position of the iterator. */
5120 static robj
*listTypeGet(listTypeEntry
*entry
) {
5121 listTypeIterator
*li
= entry
->li
;
5123 if (li
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5124 unsigned char *vstr
;
5127 redisAssert(entry
->zi
!= NULL
);
5128 if (ziplistGet(entry
->zi
,&vstr
,&vlen
,&vlong
)) {
5130 value
= createStringObject((char*)vstr
,vlen
);
5132 value
= createStringObjectFromLongLong(vlong
);
5135 } else if (li
->encoding
== REDIS_ENCODING_LIST
) {
5136 redisAssert(entry
->ln
!= NULL
);
5137 value
= listNodeValue(entry
->ln
);
5138 incrRefCount(value
);
5140 redisPanic("Unknown list encoding");
5145 /* Compare the given object with the entry at the current position. */
5146 static int listTypeEqual(listTypeEntry
*entry
, robj
*o
) {
5147 listTypeIterator
*li
= entry
->li
;
5148 if (li
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5149 redisAssert(o
->encoding
== REDIS_ENCODING_RAW
);
5150 return ziplistCompare(entry
->zi
,o
->ptr
,sdslen(o
->ptr
));
5151 } else if (li
->encoding
== REDIS_ENCODING_LIST
) {
5152 return equalStringObjects(o
,listNodeValue(entry
->ln
));
5154 redisPanic("Unknown list encoding");
5158 /* Delete the element pointed to. */
5159 static void listTypeDelete(listTypeEntry
*entry
) {
5160 listTypeIterator
*li
= entry
->li
;
5161 if (li
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5162 unsigned char *p
= entry
->zi
;
5163 li
->subject
->ptr
= ziplistDelete(li
->subject
->ptr
,&p
);
5165 /* Update position of the iterator depending on the direction */
5166 if (li
->direction
== REDIS_TAIL
)
5169 li
->zi
= ziplistPrev(li
->subject
->ptr
,p
);
5170 } else if (entry
->li
->encoding
== REDIS_ENCODING_LIST
) {
5172 if (li
->direction
== REDIS_TAIL
)
5173 next
= entry
->ln
->next
;
5175 next
= entry
->ln
->prev
;
5176 listDelNode(li
->subject
->ptr
,entry
->ln
);
5179 redisPanic("Unknown list encoding");
5183 static void listTypeConvert(robj
*subject
, int enc
) {
5184 listTypeIterator
*li
;
5185 listTypeEntry entry
;
5186 redisAssert(subject
->type
== REDIS_LIST
);
5188 if (enc
== REDIS_ENCODING_LIST
) {
5189 list
*l
= listCreate();
5190 listSetFreeMethod(l
,decrRefCount
);
5192 /* listTypeGet returns a robj with incremented refcount */
5193 li
= listTypeInitIterator(subject
,0,REDIS_TAIL
);
5194 while (listTypeNext(li
,&entry
)) listAddNodeTail(l
,listTypeGet(&entry
));
5195 listTypeReleaseIterator(li
);
5197 subject
->encoding
= REDIS_ENCODING_LIST
;
5198 zfree(subject
->ptr
);
5201 redisPanic("Unsupported list conversion");
5205 static void pushGenericCommand(redisClient
*c
, int where
) {
5206 robj
*lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5208 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
5209 addReply(c
,shared
.cone
);
5212 lobj
= createZiplistObject();
5213 dbAdd(c
->db
,c
->argv
[1],lobj
);
5215 if (lobj
->type
!= REDIS_LIST
) {
5216 addReply(c
,shared
.wrongtypeerr
);
5219 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
5220 addReply(c
,shared
.cone
);
5224 listTypePush(lobj
,c
->argv
[2],where
);
5225 addReplyLongLong(c
,listTypeLength(lobj
));
5229 static void lpushCommand(redisClient
*c
) {
5230 pushGenericCommand(c
,REDIS_HEAD
);
5233 static void rpushCommand(redisClient
*c
) {
5234 pushGenericCommand(c
,REDIS_TAIL
);
5237 static void llenCommand(redisClient
*c
) {
5238 robj
*o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
);
5239 if (o
== NULL
|| checkType(c
,o
,REDIS_LIST
)) return;
5240 addReplyUlong(c
,listTypeLength(o
));
5243 static void lindexCommand(redisClient
*c
) {
5244 robj
*o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
);
5245 if (o
== NULL
|| checkType(c
,o
,REDIS_LIST
)) return;
5246 int index
= atoi(c
->argv
[2]->ptr
);
5249 if (o
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5251 unsigned char *vstr
;
5254 p
= ziplistIndex(o
->ptr
,index
);
5255 if (ziplistGet(p
,&vstr
,&vlen
,&vlong
)) {
5257 value
= createStringObject((char*)vstr
,vlen
);
5259 value
= createStringObjectFromLongLong(vlong
);
5261 addReplyBulk(c
,value
);
5262 decrRefCount(value
);
5264 addReply(c
,shared
.nullbulk
);
5266 } else if (o
->encoding
== REDIS_ENCODING_LIST
) {
5267 listNode
*ln
= listIndex(o
->ptr
,index
);
5269 value
= listNodeValue(ln
);
5270 addReplyBulk(c
,value
);
5272 addReply(c
,shared
.nullbulk
);
5275 redisPanic("Unknown list encoding");
5279 static void lsetCommand(redisClient
*c
) {
5280 robj
*o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
);
5281 if (o
== NULL
|| checkType(c
,o
,REDIS_LIST
)) return;
5282 int index
= atoi(c
->argv
[2]->ptr
);
5283 robj
*value
= c
->argv
[3];
5285 listTypeTryConversion(o
,value
);
5286 if (o
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5287 unsigned char *p
, *zl
= o
->ptr
;
5288 p
= ziplistIndex(zl
,index
);
5290 addReply(c
,shared
.outofrangeerr
);
5292 o
->ptr
= ziplistDelete(o
->ptr
,&p
);
5293 value
= getDecodedObject(value
);
5294 o
->ptr
= ziplistInsert(o
->ptr
,p
,value
->ptr
,sdslen(value
->ptr
));
5295 decrRefCount(value
);
5296 addReply(c
,shared
.ok
);
5299 } else if (o
->encoding
== REDIS_ENCODING_LIST
) {
5300 listNode
*ln
= listIndex(o
->ptr
,index
);
5302 addReply(c
,shared
.outofrangeerr
);
5304 decrRefCount((robj
*)listNodeValue(ln
));
5305 listNodeValue(ln
) = value
;
5306 incrRefCount(value
);
5307 addReply(c
,shared
.ok
);
5311 redisPanic("Unknown list encoding");
5315 static void popGenericCommand(redisClient
*c
, int where
) {
5316 robj
*o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
);
5317 if (o
== NULL
|| checkType(c
,o
,REDIS_LIST
)) return;
5319 robj
*value
= listTypePop(o
,where
);
5320 if (value
== NULL
) {
5321 addReply(c
,shared
.nullbulk
);
5323 addReplyBulk(c
,value
);
5324 decrRefCount(value
);
5325 if (listTypeLength(o
) == 0) dbDelete(c
->db
,c
->argv
[1]);
5330 static void lpopCommand(redisClient
*c
) {
5331 popGenericCommand(c
,REDIS_HEAD
);
5334 static void rpopCommand(redisClient
*c
) {
5335 popGenericCommand(c
,REDIS_TAIL
);
5338 static void lrangeCommand(redisClient
*c
) {
5340 int start
= atoi(c
->argv
[2]->ptr
);
5341 int end
= atoi(c
->argv
[3]->ptr
);
5344 listTypeEntry entry
;
5346 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
5347 || checkType(c
,o
,REDIS_LIST
)) return;
5348 llen
= listTypeLength(o
);
5350 /* convert negative indexes */
5351 if (start
< 0) start
= llen
+start
;
5352 if (end
< 0) end
= llen
+end
;
5353 if (start
< 0) start
= 0;
5354 if (end
< 0) end
= 0;
5356 /* indexes sanity checks */
5357 if (start
> end
|| start
>= llen
) {
5358 /* Out of range start or start > end result in empty list */
5359 addReply(c
,shared
.emptymultibulk
);
5362 if (end
>= llen
) end
= llen
-1;
5363 rangelen
= (end
-start
)+1;
5365 /* Return the result in form of a multi-bulk reply */
5366 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
5367 listTypeIterator
*li
= listTypeInitIterator(o
,start
,REDIS_TAIL
);
5368 for (j
= 0; j
< rangelen
; j
++) {
5369 redisAssert(listTypeNext(li
,&entry
));
5370 value
= listTypeGet(&entry
);
5371 addReplyBulk(c
,value
);
5372 decrRefCount(value
);
5374 listTypeReleaseIterator(li
);
5377 static void ltrimCommand(redisClient
*c
) {
5379 int start
= atoi(c
->argv
[2]->ptr
);
5380 int end
= atoi(c
->argv
[3]->ptr
);
5382 int j
, ltrim
, rtrim
;
5386 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
5387 checkType(c
,o
,REDIS_LIST
)) return;
5388 llen
= listTypeLength(o
);
5390 /* convert negative indexes */
5391 if (start
< 0) start
= llen
+start
;
5392 if (end
< 0) end
= llen
+end
;
5393 if (start
< 0) start
= 0;
5394 if (end
< 0) end
= 0;
5396 /* indexes sanity checks */
5397 if (start
> end
|| start
>= llen
) {
5398 /* Out of range start or start > end result in empty list */
5402 if (end
>= llen
) end
= llen
-1;
5407 /* Remove list elements to perform the trim */
5408 if (o
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5409 o
->ptr
= ziplistDeleteRange(o
->ptr
,0,ltrim
);
5410 o
->ptr
= ziplistDeleteRange(o
->ptr
,-rtrim
,rtrim
);
5411 } else if (o
->encoding
== REDIS_ENCODING_LIST
) {
5413 for (j
= 0; j
< ltrim
; j
++) {
5414 ln
= listFirst(list
);
5415 listDelNode(list
,ln
);
5417 for (j
= 0; j
< rtrim
; j
++) {
5418 ln
= listLast(list
);
5419 listDelNode(list
,ln
);
5422 redisPanic("Unknown list encoding");
5424 if (listTypeLength(o
) == 0) dbDelete(c
->db
,c
->argv
[1]);
5426 addReply(c
,shared
.ok
);
5429 static void lremCommand(redisClient
*c
) {
5430 robj
*subject
, *obj
= c
->argv
[3];
5431 int toremove
= atoi(c
->argv
[2]->ptr
);
5433 listTypeEntry entry
;
5435 subject
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
);
5436 if (subject
== NULL
|| checkType(c
,subject
,REDIS_LIST
)) return;
5438 /* Make sure obj is raw when we're dealing with a ziplist */
5439 if (subject
->encoding
== REDIS_ENCODING_ZIPLIST
)
5440 obj
= getDecodedObject(obj
);
5442 listTypeIterator
*li
;
5444 toremove
= -toremove
;
5445 li
= listTypeInitIterator(subject
,-1,REDIS_HEAD
);
5447 li
= listTypeInitIterator(subject
,0,REDIS_TAIL
);
5450 while (listTypeNext(li
,&entry
)) {
5451 if (listTypeEqual(&entry
,obj
)) {
5452 listTypeDelete(&entry
);
5455 if (toremove
&& removed
== toremove
) break;
5458 listTypeReleaseIterator(li
);
5460 /* Clean up raw encoded object */
5461 if (subject
->encoding
== REDIS_ENCODING_ZIPLIST
)
5464 if (listTypeLength(subject
) == 0) dbDelete(c
->db
,c
->argv
[1]);
5465 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
5468 /* This is the semantic of this command:
5469 * RPOPLPUSH srclist dstlist:
5470 * IF LLEN(srclist) > 0
5471 * element = RPOP srclist
5472 * LPUSH dstlist element
5479 * The idea is to be able to get an element from a list in a reliable way
5480 * since the element is not just returned but pushed against another list
5481 * as well. This command was originally proposed by Ezra Zygmuntowicz.
5483 static void rpoplpushcommand(redisClient
*c
) {
5485 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5486 checkType(c
,sobj
,REDIS_LIST
)) return;
5488 if (listTypeLength(sobj
) == 0) {
5489 addReply(c
,shared
.nullbulk
);
5491 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
5492 if (dobj
&& checkType(c
,dobj
,REDIS_LIST
)) return;
5493 value
= listTypePop(sobj
,REDIS_TAIL
);
5495 /* Add the element to the target list (unless it's directly
5496 * passed to some BLPOP-ing client */
5497 if (!handleClientsWaitingListPush(c
,c
->argv
[2],value
)) {
5498 /* Create the list if the key does not exist */
5500 dobj
= createZiplistObject();
5501 dbAdd(c
->db
,c
->argv
[2],dobj
);
5503 listTypePush(dobj
,value
,REDIS_HEAD
);
5506 /* Send the element to the client as reply as well */
5507 addReplyBulk(c
,value
);
5509 /* listTypePop returns an object with its refcount incremented */
5510 decrRefCount(value
);
5512 /* Delete the source list when it is empty */
5513 if (listTypeLength(sobj
) == 0) dbDelete(c
->db
,c
->argv
[1]);
5518 /* ==================================== Sets ================================ */
5520 /* Factory method to return a set that *can* hold "value". When the object has
5521 * an integer-encodable value, an intset will be returned. Otherwise a regular
5523 static robj
*setTypeCreate(robj
*value
) {
5524 if (getLongLongFromObject(value
,NULL
) == REDIS_OK
)
5525 return createIntsetObject();
5526 return createSetObject();
5529 static int setTypeAdd(robj
*subject
, robj
*value
) {
5531 if (subject
->encoding
== REDIS_ENCODING_HT
) {
5532 if (dictAdd(subject
->ptr
,value
,NULL
) == DICT_OK
) {
5533 incrRefCount(value
);
5536 } else if (subject
->encoding
== REDIS_ENCODING_INTSET
) {
5537 if (getLongLongFromObject(value
,&llval
) == REDIS_OK
) {
5538 uint8_t success
= 0;
5539 subject
->ptr
= intsetAdd(subject
->ptr
,llval
,&success
);
5541 /* Convert to regular set when the intset contains
5542 * too many entries. */
5543 if (intsetLen(subject
->ptr
) > server
.set_max_intset_entries
)
5544 setTypeConvert(subject
,REDIS_ENCODING_HT
);
5548 /* Failed to get integer from object, convert to regular set. */
5549 setTypeConvert(subject
,REDIS_ENCODING_HT
);
5551 /* The set *was* an intset and this value is not integer
5552 * encodable, so dictAdd should always work. */
5553 redisAssert(dictAdd(subject
->ptr
,value
,NULL
) == DICT_OK
);
5554 incrRefCount(value
);
5558 redisPanic("Unknown set encoding");
5563 static int setTypeRemove(robj
*subject
, robj
*value
) {
5565 if (subject
->encoding
== REDIS_ENCODING_HT
) {
5566 if (dictDelete(subject
->ptr
,value
) == DICT_OK
) {
5567 if (htNeedsResize(subject
->ptr
)) dictResize(subject
->ptr
);
5570 } else if (subject
->encoding
== REDIS_ENCODING_INTSET
) {
5571 if (getLongLongFromObject(value
,&llval
) == REDIS_OK
) {
5573 subject
->ptr
= intsetRemove(subject
->ptr
,llval
,&success
);
5574 if (success
) return 1;
5577 redisPanic("Unknown set encoding");
5582 static int setTypeIsMember(robj
*subject
, robj
*value
) {
5584 if (subject
->encoding
== REDIS_ENCODING_HT
) {
5585 return dictFind((dict
*)subject
->ptr
,value
) != NULL
;
5586 } else if (subject
->encoding
== REDIS_ENCODING_INTSET
) {
5587 if (getLongLongFromObject(value
,&llval
) == REDIS_OK
) {
5588 return intsetFind((intset
*)subject
->ptr
,llval
);
5591 redisPanic("Unknown set encoding");
5596 /* Structure to hold set iteration abstraction. */
5600 int ii
; /* intset iterator */
5604 static setIterator
*setTypeInitIterator(robj
*subject
) {
5605 setIterator
*si
= zmalloc(sizeof(setIterator
));
5606 si
->subject
= subject
;
5607 si
->encoding
= subject
->encoding
;
5608 if (si
->encoding
== REDIS_ENCODING_HT
) {
5609 si
->di
= dictGetIterator(subject
->ptr
);
5610 } else if (si
->encoding
== REDIS_ENCODING_INTSET
) {
5613 redisPanic("Unknown set encoding");
5618 static void setTypeReleaseIterator(setIterator
*si
) {
5619 if (si
->encoding
== REDIS_ENCODING_HT
)
5620 dictReleaseIterator(si
->di
);
5624 /* Move to the next entry in the set. Returns the object at the current
5625 * position, or NULL when the end is reached. This object will have its
5626 * refcount incremented, so the caller needs to take care of this. */
5627 static robj
*setTypeNext(setIterator
*si
) {
5629 if (si
->encoding
== REDIS_ENCODING_HT
) {
5630 dictEntry
*de
= dictNext(si
->di
);
5632 ret
= dictGetEntryKey(de
);
5635 } else if (si
->encoding
== REDIS_ENCODING_INTSET
) {
5637 if (intsetGet(si
->subject
->ptr
,si
->ii
++,&llval
))
5638 ret
= createStringObjectFromLongLong(llval
);
5644 /* Return random element from set. The returned object will always have
5645 * an incremented refcount. */
5646 robj
*setTypeRandomElement(robj
*subject
) {
5648 if (subject
->encoding
== REDIS_ENCODING_HT
) {
5649 dictEntry
*de
= dictGetRandomKey(subject
->ptr
);
5650 ret
= dictGetEntryKey(de
);
5652 } else if (subject
->encoding
== REDIS_ENCODING_INTSET
) {
5653 long long llval
= intsetRandom(subject
->ptr
);
5654 ret
= createStringObjectFromLongLong(llval
);
5656 redisPanic("Unknown set encoding");
5661 static unsigned long setTypeSize(robj
*subject
) {
5662 if (subject
->encoding
== REDIS_ENCODING_HT
) {
5663 return dictSize((dict
*)subject
->ptr
);
5664 } else if (subject
->encoding
== REDIS_ENCODING_INTSET
) {
5665 return intsetLen((intset
*)subject
->ptr
);
5667 redisPanic("Unknown set encoding");
5671 static void setTypeConvert(robj
*subject
, int enc
) {
5674 redisAssert(subject
->type
== REDIS_SET
);
5676 if (enc
== REDIS_ENCODING_HT
) {
5677 dict
*d
= dictCreate(&setDictType
,NULL
);
5679 /* setTypeGet returns a robj with incremented refcount */
5680 si
= setTypeInitIterator(subject
);
5681 while ((element
= setTypeNext(si
)) != NULL
)
5682 redisAssert(dictAdd(d
,element
,NULL
) == DICT_OK
);
5683 setTypeReleaseIterator(si
);
5685 subject
->encoding
= REDIS_ENCODING_HT
;
5686 zfree(subject
->ptr
);
5689 redisPanic("Unsupported set conversion");
5693 static void saddCommand(redisClient
*c
) {
5696 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5698 set
= setTypeCreate(c
->argv
[2]);
5699 dbAdd(c
->db
,c
->argv
[1],set
);
5701 if (set
->type
!= REDIS_SET
) {
5702 addReply(c
,shared
.wrongtypeerr
);
5706 if (setTypeAdd(set
,c
->argv
[2])) {
5708 addReply(c
,shared
.cone
);
5710 addReply(c
,shared
.czero
);
5714 static void sremCommand(redisClient
*c
) {
5717 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5718 checkType(c
,set
,REDIS_SET
)) return;
5720 if (setTypeRemove(set
,c
->argv
[2])) {
5721 if (setTypeSize(set
) == 0) dbDelete(c
->db
,c
->argv
[1]);
5723 addReply(c
,shared
.cone
);
5725 addReply(c
,shared
.czero
);
5729 static void smoveCommand(redisClient
*c
) {
5730 robj
*srcset
, *dstset
, *ele
;
5731 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5732 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
5735 /* If the source key does not exist return 0 */
5736 if (srcset
== NULL
) {
5737 addReply(c
,shared
.czero
);
5741 /* If the source key has the wrong type, or the destination key
5742 * is set and has the wrong type, return with an error. */
5743 if (checkType(c
,srcset
,REDIS_SET
) ||
5744 (dstset
&& checkType(c
,dstset
,REDIS_SET
))) return;
5746 /* If srcset and dstset are equal, SMOVE is a no-op */
5747 if (srcset
== dstset
) {
5748 addReply(c
,shared
.cone
);
5752 /* If the element cannot be removed from the src set, return 0. */
5753 if (!setTypeRemove(srcset
,ele
)) {
5754 addReply(c
,shared
.czero
);
5758 /* Remove the src set from the database when empty */
5759 if (setTypeSize(srcset
) == 0) dbDelete(c
->db
,c
->argv
[1]);
5762 /* Create the destination set when it doesn't exist */
5764 dstset
= setTypeCreate(ele
);
5765 dbAdd(c
->db
,c
->argv
[2],dstset
);
5768 /* An extra key has changed when ele was successfully added to dstset */
5769 if (setTypeAdd(dstset
,ele
)) server
.dirty
++;
5770 addReply(c
,shared
.cone
);
5773 static void sismemberCommand(redisClient
*c
) {
5776 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5777 checkType(c
,set
,REDIS_SET
)) return;
5779 if (setTypeIsMember(set
,c
->argv
[2]))
5780 addReply(c
,shared
.cone
);
5782 addReply(c
,shared
.czero
);
5785 static void scardCommand(redisClient
*c
) {
5788 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5789 checkType(c
,o
,REDIS_SET
)) return;
5791 addReplyUlong(c
,setTypeSize(o
));
5794 static void spopCommand(redisClient
*c
) {
5797 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5798 checkType(c
,set
,REDIS_SET
)) return;
5800 ele
= setTypeRandomElement(set
);
5802 addReply(c
,shared
.nullbulk
);
5804 setTypeRemove(set
,ele
);
5805 addReplyBulk(c
,ele
);
5807 if (setTypeSize(set
) == 0) dbDelete(c
->db
,c
->argv
[1]);
5812 static void srandmemberCommand(redisClient
*c
) {
5815 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5816 checkType(c
,set
,REDIS_SET
)) return;
5818 ele
= setTypeRandomElement(set
);
5820 addReply(c
,shared
.nullbulk
);
5822 addReplyBulk(c
,ele
);
5827 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
5828 return setTypeSize(*(robj
**)s1
)-setTypeSize(*(robj
**)s2
);
5831 static void sinterGenericCommand(redisClient
*c
, robj
**setkeys
, unsigned long setnum
, robj
*dstkey
) {
5832 robj
**sets
= zmalloc(sizeof(robj
*)*setnum
);
5834 robj
*ele
, *lenobj
= NULL
, *dstset
= NULL
;
5835 unsigned long j
, cardinality
= 0;
5837 for (j
= 0; j
< setnum
; j
++) {
5838 robj
*setobj
= dstkey
?
5839 lookupKeyWrite(c
->db
,setkeys
[j
]) :
5840 lookupKeyRead(c
->db
,setkeys
[j
]);
5844 if (dbDelete(c
->db
,dstkey
))
5846 addReply(c
,shared
.czero
);
5848 addReply(c
,shared
.emptymultibulk
);
5852 if (checkType(c
,setobj
,REDIS_SET
)) {
5858 /* Sort sets from the smallest to largest, this will improve our
5859 * algorithm's performace */
5860 qsort(sets
,setnum
,sizeof(robj
*),qsortCompareSetsByCardinality
);
5862 /* The first thing we should output is the total number of elements...
5863 * since this is a multi-bulk write, but at this stage we don't know
5864 * the intersection set size, so we use a trick, append an empty object
5865 * to the output list and save the pointer to later modify it with the
5868 lenobj
= createObject(REDIS_STRING
,NULL
);
5870 decrRefCount(lenobj
);
5872 /* If we have a target key where to store the resulting set
5873 * create this key with an empty set inside */
5874 dstset
= createIntsetObject();
5877 /* Iterate all the elements of the first (smallest) set, and test
5878 * the element against all the other sets, if at least one set does
5879 * not include the element it is discarded */
5880 si
= setTypeInitIterator(sets
[0]);
5881 while((ele
= setTypeNext(si
)) != NULL
) {
5882 for (j
= 1; j
< setnum
; j
++)
5883 if (!setTypeIsMember(sets
[j
],ele
)) break;
5885 /* Only take action when all sets contain the member */
5888 addReplyBulk(c
,ele
);
5891 setTypeAdd(dstset
,ele
);
5896 setTypeReleaseIterator(si
);
5899 /* Store the resulting set into the target, if the intersection
5900 * is not an empty set. */
5901 dbDelete(c
->db
,dstkey
);
5902 if (setTypeSize(dstset
) > 0) {
5903 dbAdd(c
->db
,dstkey
,dstset
);
5904 addReplyLongLong(c
,setTypeSize(dstset
));
5906 decrRefCount(dstset
);
5907 addReply(c
,shared
.czero
);
5911 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
5916 static void sinterCommand(redisClient
*c
) {
5917 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
5920 static void sinterstoreCommand(redisClient
*c
) {
5921 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
5924 #define REDIS_OP_UNION 0
5925 #define REDIS_OP_DIFF 1
5926 #define REDIS_OP_INTER 2
5928 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setkeys
, int setnum
, robj
*dstkey
, int op
) {
5929 robj
**sets
= zmalloc(sizeof(robj
*)*setnum
);
5931 robj
*ele
, *dstset
= NULL
;
5932 int j
, cardinality
= 0;
5934 for (j
= 0; j
< setnum
; j
++) {
5935 robj
*setobj
= dstkey
?
5936 lookupKeyWrite(c
->db
,setkeys
[j
]) :
5937 lookupKeyRead(c
->db
,setkeys
[j
]);
5942 if (checkType(c
,setobj
,REDIS_SET
)) {
5949 /* We need a temp set object to store our union. If the dstkey
5950 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
5951 * this set object will be the resulting object to set into the target key*/
5952 dstset
= createIntsetObject();
5954 /* Iterate all the elements of all the sets, add every element a single
5955 * time to the result set */
5956 for (j
= 0; j
< setnum
; j
++) {
5957 if (op
== REDIS_OP_DIFF
&& j
== 0 && !sets
[j
]) break; /* result set is empty */
5958 if (!sets
[j
]) continue; /* non existing keys are like empty sets */
5960 si
= setTypeInitIterator(sets
[j
]);
5961 while((ele
= setTypeNext(si
)) != NULL
) {
5962 if (op
== REDIS_OP_UNION
|| j
== 0) {
5963 if (setTypeAdd(dstset
,ele
)) {
5966 } else if (op
== REDIS_OP_DIFF
) {
5967 if (setTypeRemove(dstset
,ele
)) {
5973 setTypeReleaseIterator(si
);
5975 /* Exit when result set is empty. */
5976 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
5979 /* Output the content of the resulting set, if not in STORE mode */
5981 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
5982 si
= setTypeInitIterator(dstset
);
5983 while((ele
= setTypeNext(si
)) != NULL
) {
5984 addReplyBulk(c
,ele
);
5987 setTypeReleaseIterator(si
);
5988 decrRefCount(dstset
);
5990 /* If we have a target key where to store the resulting set
5991 * create this key with the result set inside */
5992 dbDelete(c
->db
,dstkey
);
5993 if (setTypeSize(dstset
) > 0) {
5994 dbAdd(c
->db
,dstkey
,dstset
);
5995 addReplyLongLong(c
,setTypeSize(dstset
));
5997 decrRefCount(dstset
);
5998 addReply(c
,shared
.czero
);
6005 static void sunionCommand(redisClient
*c
) {
6006 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
6009 static void sunionstoreCommand(redisClient
*c
) {
6010 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
6013 static void sdiffCommand(redisClient
*c
) {
6014 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
6017 static void sdiffstoreCommand(redisClient
*c
) {
6018 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
6021 /* ==================================== ZSets =============================== */
6023 /* ZSETs are ordered sets using two data structures to hold the same elements
6024 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
6027 * The elements are added to an hash table mapping Redis objects to scores.
6028 * At the same time the elements are added to a skip list mapping scores
6029 * to Redis objects (so objects are sorted by scores in this "view"). */
6031 /* This skiplist implementation is almost a C translation of the original
6032 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
6033 * Alternative to Balanced Trees", modified in three ways:
6034 * a) this implementation allows for repeated values.
6035 * b) the comparison is not just by key (our 'score') but by satellite data.
6036 * c) there is a back pointer, so it's a doubly linked list with the back
6037 * pointers being only at "level 1". This allows to traverse the list
6038 * from tail to head, useful for ZREVRANGE. */
6040 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
6041 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
6043 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
6045 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
6053 static zskiplist
*zslCreate(void) {
6057 zsl
= zmalloc(sizeof(*zsl
));
6060 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
6061 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
6062 zsl
->header
->forward
[j
] = NULL
;
6064 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
6065 if (j
< ZSKIPLIST_MAXLEVEL
-1)
6066 zsl
->header
->span
[j
] = 0;
6068 zsl
->header
->backward
= NULL
;
6073 static void zslFreeNode(zskiplistNode
*node
) {
6074 decrRefCount(node
->obj
);
6075 zfree(node
->forward
);
6080 static void zslFree(zskiplist
*zsl
) {
6081 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
6083 zfree(zsl
->header
->forward
);
6084 zfree(zsl
->header
->span
);
6087 next
= node
->forward
[0];
6094 static int zslRandomLevel(void) {
6096 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
6098 return (level
<ZSKIPLIST_MAXLEVEL
) ? level
: ZSKIPLIST_MAXLEVEL
;
6101 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
6102 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
6103 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
6107 for (i
= zsl
->level
-1; i
>= 0; i
--) {
6108 /* store rank that is crossed to reach the insert position */
6109 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
6111 while (x
->forward
[i
] &&
6112 (x
->forward
[i
]->score
< score
||
6113 (x
->forward
[i
]->score
== score
&&
6114 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
6115 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
6120 /* we assume the key is not already inside, since we allow duplicated
6121 * scores, and the re-insertion of score and redis object should never
6122 * happpen since the caller of zslInsert() should test in the hash table
6123 * if the element is already inside or not. */
6124 level
= zslRandomLevel();
6125 if (level
> zsl
->level
) {
6126 for (i
= zsl
->level
; i
< level
; i
++) {
6128 update
[i
] = zsl
->header
;
6129 update
[i
]->span
[i
-1] = zsl
->length
;
6133 x
= zslCreateNode(level
,score
,obj
);
6134 for (i
= 0; i
< level
; i
++) {
6135 x
->forward
[i
] = update
[i
]->forward
[i
];
6136 update
[i
]->forward
[i
] = x
;
6138 /* update span covered by update[i] as x is inserted here */
6140 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
6141 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
6145 /* increment span for untouched levels */
6146 for (i
= level
; i
< zsl
->level
; i
++) {
6147 update
[i
]->span
[i
-1]++;
6150 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
6152 x
->forward
[0]->backward
= x
;
6158 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
6159 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
6161 for (i
= 0; i
< zsl
->level
; i
++) {
6162 if (update
[i
]->forward
[i
] == x
) {
6164 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
6166 update
[i
]->forward
[i
] = x
->forward
[i
];
6168 /* invariant: i > 0, because update[0]->forward[0]
6169 * is always equal to x */
6170 update
[i
]->span
[i
-1] -= 1;
6173 if (x
->forward
[0]) {
6174 x
->forward
[0]->backward
= x
->backward
;
6176 zsl
->tail
= x
->backward
;
6178 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
6183 /* Delete an element with matching score/object from the skiplist. */
6184 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
6185 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
6189 for (i
= zsl
->level
-1; i
>= 0; i
--) {
6190 while (x
->forward
[i
] &&
6191 (x
->forward
[i
]->score
< score
||
6192 (x
->forward
[i
]->score
== score
&&
6193 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
6197 /* We may have multiple elements with the same score, what we need
6198 * is to find the element with both the right score and object. */
6200 if (x
&& score
== x
->score
&& equalStringObjects(x
->obj
,obj
)) {
6201 zslDeleteNode(zsl
, x
, update
);
6205 return 0; /* not found */
6207 return 0; /* not found */
6210 /* Delete all the elements with score between min and max from the skiplist.
6211 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
6212 * Note that this function takes the reference to the hash table view of the
6213 * sorted set, in order to remove the elements from the hash table too. */
6214 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
6215 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
6216 unsigned long removed
= 0;
6220 for (i
= zsl
->level
-1; i
>= 0; i
--) {
6221 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
6225 /* We may have multiple elements with the same score, what we need
6226 * is to find the element with both the right score and object. */
6228 while (x
&& x
->score
<= max
) {
6229 zskiplistNode
*next
= x
->forward
[0];
6230 zslDeleteNode(zsl
, x
, update
);
6231 dictDelete(dict
,x
->obj
);
6236 return removed
; /* not found */
6239 /* Delete all the elements with rank between start and end from the skiplist.
6240 * Start and end are inclusive. Note that start and end need to be 1-based */
6241 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
6242 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
6243 unsigned long traversed
= 0, removed
= 0;
6247 for (i
= zsl
->level
-1; i
>= 0; i
--) {
6248 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
6249 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
6257 while (x
&& traversed
<= end
) {
6258 zskiplistNode
*next
= x
->forward
[0];
6259 zslDeleteNode(zsl
, x
, update
);
6260 dictDelete(dict
,x
->obj
);
6269 /* Find the first node having a score equal or greater than the specified one.
6270 * Returns NULL if there is no match. */
6271 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
6276 for (i
= zsl
->level
-1; i
>= 0; i
--) {
6277 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
6280 /* We may have multiple elements with the same score, what we need
6281 * is to find the element with both the right score and object. */
6282 return x
->forward
[0];
6285 /* Find the rank for an element by both score and key.
6286 * Returns 0 when the element cannot be found, rank otherwise.
6287 * Note that the rank is 1-based due to the span of zsl->header to the
6289 static unsigned long zslistTypeGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
6291 unsigned long rank
= 0;
6295 for (i
= zsl
->level
-1; i
>= 0; i
--) {
6296 while (x
->forward
[i
] &&
6297 (x
->forward
[i
]->score
< score
||
6298 (x
->forward
[i
]->score
== score
&&
6299 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
6300 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
6304 /* x might be equal to zsl->header, so test if obj is non-NULL */
6305 if (x
->obj
&& equalStringObjects(x
->obj
,o
)) {
6312 /* Finds an element by its rank. The rank argument needs to be 1-based. */
6313 zskiplistNode
* zslistTypeGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
6315 unsigned long traversed
= 0;
6319 for (i
= zsl
->level
-1; i
>= 0; i
--) {
6320 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
6322 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
6325 if (traversed
== rank
) {
6332 /* The actual Z-commands implementations */
6334 /* This generic command implements both ZADD and ZINCRBY.
6335 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
6336 * the increment if the operation is a ZINCRBY (doincrement == 1). */
6337 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
6342 if (isnan(scoreval
)) {
6343 addReplySds(c
,sdsnew("-ERR provide score is Not A Number (nan)\r\n"));
6347 zsetobj
= lookupKeyWrite(c
->db
,key
);
6348 if (zsetobj
== NULL
) {
6349 zsetobj
= createZsetObject();
6350 dbAdd(c
->db
,key
,zsetobj
);
6352 if (zsetobj
->type
!= REDIS_ZSET
) {
6353 addReply(c
,shared
.wrongtypeerr
);
6359 /* Ok now since we implement both ZADD and ZINCRBY here the code
6360 * needs to handle the two different conditions. It's all about setting
6361 * '*score', that is, the new score to set, to the right value. */
6362 score
= zmalloc(sizeof(double));
6366 /* Read the old score. If the element was not present starts from 0 */
6367 de
= dictFind(zs
->dict
,ele
);
6369 double *oldscore
= dictGetEntryVal(de
);
6370 *score
= *oldscore
+ scoreval
;
6374 if (isnan(*score
)) {
6376 sdsnew("-ERR resulting score is Not A Number (nan)\r\n"));
6378 /* Note that we don't need to check if the zset may be empty and
6379 * should be removed here, as we can only obtain Nan as score if
6380 * there was already an element in the sorted set. */
6387 /* What follows is a simple remove and re-insert operation that is common
6388 * to both ZADD and ZINCRBY... */
6389 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
6390 /* case 1: New element */
6391 incrRefCount(ele
); /* added to hash */
6392 zslInsert(zs
->zsl
,*score
,ele
);
6393 incrRefCount(ele
); /* added to skiplist */
6396 addReplyDouble(c
,*score
);
6398 addReply(c
,shared
.cone
);
6403 /* case 2: Score update operation */
6404 de
= dictFind(zs
->dict
,ele
);
6405 redisAssert(de
!= NULL
);
6406 oldscore
= dictGetEntryVal(de
);
6407 if (*score
!= *oldscore
) {
6410 /* Remove and insert the element in the skip list with new score */
6411 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
6412 redisAssert(deleted
!= 0);
6413 zslInsert(zs
->zsl
,*score
,ele
);
6415 /* Update the score in the hash table */
6416 dictReplace(zs
->dict
,ele
,score
);
6422 addReplyDouble(c
,*score
);
6424 addReply(c
,shared
.czero
);
6428 static void zaddCommand(redisClient
*c
) {
6431 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
6432 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
6435 static void zincrbyCommand(redisClient
*c
) {
6438 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
6439 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
6442 static void zremCommand(redisClient
*c
) {
6449 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6450 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
6453 de
= dictFind(zs
->dict
,c
->argv
[2]);
6455 addReply(c
,shared
.czero
);
6458 /* Delete from the skiplist */
6459 oldscore
= dictGetEntryVal(de
);
6460 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
6461 redisAssert(deleted
!= 0);
6463 /* Delete from the hash table */
6464 dictDelete(zs
->dict
,c
->argv
[2]);
6465 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
6466 if (dictSize(zs
->dict
) == 0) dbDelete(c
->db
,c
->argv
[1]);
6468 addReply(c
,shared
.cone
);
6471 static void zremrangebyscoreCommand(redisClient
*c
) {
6478 if ((getDoubleFromObjectOrReply(c
, c
->argv
[2], &min
, NULL
) != REDIS_OK
) ||
6479 (getDoubleFromObjectOrReply(c
, c
->argv
[3], &max
, NULL
) != REDIS_OK
)) return;
6481 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6482 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
6485 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
6486 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
6487 if (dictSize(zs
->dict
) == 0) dbDelete(c
->db
,c
->argv
[1]);
6488 server
.dirty
+= deleted
;
6489 addReplyLongLong(c
,deleted
);
6492 static void zremrangebyrankCommand(redisClient
*c
) {
6500 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
6501 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
6503 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6504 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
6506 llen
= zs
->zsl
->length
;
6508 /* convert negative indexes */
6509 if (start
< 0) start
= llen
+start
;
6510 if (end
< 0) end
= llen
+end
;
6511 if (start
< 0) start
= 0;
6512 if (end
< 0) end
= 0;
6514 /* indexes sanity checks */
6515 if (start
> end
|| start
>= llen
) {
6516 addReply(c
,shared
.czero
);
6519 if (end
>= llen
) end
= llen
-1;
6521 /* increment start and end because zsl*Rank functions
6522 * use 1-based rank */
6523 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
6524 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
6525 if (dictSize(zs
->dict
) == 0) dbDelete(c
->db
,c
->argv
[1]);
6526 server
.dirty
+= deleted
;
6527 addReplyLongLong(c
, deleted
);
6535 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
6536 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
6537 unsigned long size1
, size2
;
6538 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
6539 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
6540 return size1
- size2
;
6543 #define REDIS_AGGR_SUM 1
6544 #define REDIS_AGGR_MIN 2
6545 #define REDIS_AGGR_MAX 3
6546 #define zunionInterDictValue(_e) (dictGetEntryVal(_e) == NULL ? 1.0 : *(double*)dictGetEntryVal(_e))
6548 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
6549 if (aggregate
== REDIS_AGGR_SUM
) {
6550 *target
= *target
+ val
;
6551 } else if (aggregate
== REDIS_AGGR_MIN
) {
6552 *target
= val
< *target
? val
: *target
;
6553 } else if (aggregate
== REDIS_AGGR_MAX
) {
6554 *target
= val
> *target
? val
: *target
;
6557 redisPanic("Unknown ZUNION/INTER aggregate type");
6561 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
6563 int aggregate
= REDIS_AGGR_SUM
;
6570 /* expect setnum input keys to be given */
6571 setnum
= atoi(c
->argv
[2]->ptr
);
6573 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNIONSTORE/ZINTERSTORE\r\n"));
6577 /* test if the expected number of keys would overflow */
6578 if (3+setnum
> c
->argc
) {
6579 addReply(c
,shared
.syntaxerr
);
6583 /* read keys to be used for input */
6584 src
= zmalloc(sizeof(zsetopsrc
) * setnum
);
6585 for (i
= 0, j
= 3; i
< setnum
; i
++, j
++) {
6586 robj
*obj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
6590 if (obj
->type
== REDIS_ZSET
) {
6591 src
[i
].dict
= ((zset
*)obj
->ptr
)->dict
;
6592 } else if (obj
->type
== REDIS_SET
) {
6593 src
[i
].dict
= (obj
->ptr
);
6596 addReply(c
,shared
.wrongtypeerr
);
6601 /* default all weights to 1 */
6602 src
[i
].weight
= 1.0;
6605 /* parse optional extra arguments */
6607 int remaining
= c
->argc
- j
;
6610 if (remaining
>= (setnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
6612 for (i
= 0; i
< setnum
; i
++, j
++, remaining
--) {
6613 if (getDoubleFromObjectOrReply(c
, c
->argv
[j
], &src
[i
].weight
, NULL
) != REDIS_OK
)
6616 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
6618 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
6619 aggregate
= REDIS_AGGR_SUM
;
6620 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
6621 aggregate
= REDIS_AGGR_MIN
;
6622 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
6623 aggregate
= REDIS_AGGR_MAX
;
6626 addReply(c
,shared
.syntaxerr
);
6632 addReply(c
,shared
.syntaxerr
);
6638 /* sort sets from the smallest to largest, this will improve our
6639 * algorithm's performance */
6640 qsort(src
,setnum
,sizeof(zsetopsrc
),qsortCompareZsetopsrcByCardinality
);
6642 dstobj
= createZsetObject();
6643 dstzset
= dstobj
->ptr
;
6645 if (op
== REDIS_OP_INTER
) {
6646 /* skip going over all entries if the smallest zset is NULL or empty */
6647 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
6648 /* precondition: as src[0].dict is non-empty and the zsets are ordered
6649 * from small to large, all src[i > 0].dict are non-empty too */
6650 di
= dictGetIterator(src
[0].dict
);
6651 while((de
= dictNext(di
)) != NULL
) {
6652 double *score
= zmalloc(sizeof(double)), value
;
6653 *score
= src
[0].weight
* zunionInterDictValue(de
);
6655 for (j
= 1; j
< setnum
; j
++) {
6656 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
6658 value
= src
[j
].weight
* zunionInterDictValue(other
);
6659 zunionInterAggregate(score
, value
, aggregate
);
6665 /* skip entry when not present in every source dict */
6669 robj
*o
= dictGetEntryKey(de
);
6670 dictAdd(dstzset
->dict
,o
,score
);
6671 incrRefCount(o
); /* added to dictionary */
6672 zslInsert(dstzset
->zsl
,*score
,o
);
6673 incrRefCount(o
); /* added to skiplist */
6676 dictReleaseIterator(di
);
6678 } else if (op
== REDIS_OP_UNION
) {
6679 for (i
= 0; i
< setnum
; i
++) {
6680 if (!src
[i
].dict
) continue;
6682 di
= dictGetIterator(src
[i
].dict
);
6683 while((de
= dictNext(di
)) != NULL
) {
6684 /* skip key when already processed */
6685 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
6687 double *score
= zmalloc(sizeof(double)), value
;
6688 *score
= src
[i
].weight
* zunionInterDictValue(de
);
6690 /* because the zsets are sorted by size, its only possible
6691 * for sets at larger indices to hold this entry */
6692 for (j
= (i
+1); j
< setnum
; j
++) {
6693 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
6695 value
= src
[j
].weight
* zunionInterDictValue(other
);
6696 zunionInterAggregate(score
, value
, aggregate
);
6700 robj
*o
= dictGetEntryKey(de
);
6701 dictAdd(dstzset
->dict
,o
,score
);
6702 incrRefCount(o
); /* added to dictionary */
6703 zslInsert(dstzset
->zsl
,*score
,o
);
6704 incrRefCount(o
); /* added to skiplist */
6706 dictReleaseIterator(di
);
6709 /* unknown operator */
6710 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
6713 dbDelete(c
->db
,dstkey
);
6714 if (dstzset
->zsl
->length
) {
6715 dbAdd(c
->db
,dstkey
,dstobj
);
6716 addReplyLongLong(c
, dstzset
->zsl
->length
);
6719 decrRefCount(dstobj
);
6720 addReply(c
, shared
.czero
);
6725 static void zunionstoreCommand(redisClient
*c
) {
6726 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
6729 static void zinterstoreCommand(redisClient
*c
) {
6730 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
6733 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
6745 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
6746 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
6748 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
6750 } else if (c
->argc
>= 5) {
6751 addReply(c
,shared
.syntaxerr
);
6755 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6756 || checkType(c
,o
,REDIS_ZSET
)) return;
6761 /* convert negative indexes */
6762 if (start
< 0) start
= llen
+start
;
6763 if (end
< 0) end
= llen
+end
;
6764 if (start
< 0) start
= 0;
6765 if (end
< 0) end
= 0;
6767 /* indexes sanity checks */
6768 if (start
> end
|| start
>= llen
) {
6769 /* Out of range start or start > end result in empty list */
6770 addReply(c
,shared
.emptymultibulk
);
6773 if (end
>= llen
) end
= llen
-1;
6774 rangelen
= (end
-start
)+1;
6776 /* check if starting point is trivial, before searching
6777 * the element in log(N) time */
6779 ln
= start
== 0 ? zsl
->tail
: zslistTypeGetElementByRank(zsl
, llen
-start
);
6782 zsl
->header
->forward
[0] : zslistTypeGetElementByRank(zsl
, start
+1);
6785 /* Return the result in form of a multi-bulk reply */
6786 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
6787 withscores
? (rangelen
*2) : rangelen
));
6788 for (j
= 0; j
< rangelen
; j
++) {
6790 addReplyBulk(c
,ele
);
6792 addReplyDouble(c
,ln
->score
);
6793 ln
= reverse
? ln
->backward
: ln
->forward
[0];
6797 static void zrangeCommand(redisClient
*c
) {
6798 zrangeGenericCommand(c
,0);
6801 static void zrevrangeCommand(redisClient
*c
) {
6802 zrangeGenericCommand(c
,1);
6805 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
6806 * If justcount is non-zero, just the count is returned. */
6807 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
6810 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
6811 int offset
= 0, limit
= -1;
6815 /* Parse the min-max interval. If one of the values is prefixed
6816 * by the "(" character, it's considered "open". For instance
6817 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
6818 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
6819 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
6820 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
6823 min
= strtod(c
->argv
[2]->ptr
,NULL
);
6825 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
6826 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
6829 max
= strtod(c
->argv
[3]->ptr
,NULL
);
6832 /* Parse "WITHSCORES": note that if the command was called with
6833 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
6834 * enter the following paths to parse WITHSCORES and LIMIT. */
6835 if (c
->argc
== 5 || c
->argc
== 8) {
6836 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
6841 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
6845 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
6850 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
6851 addReply(c
,shared
.syntaxerr
);
6853 } else if (c
->argc
== (7 + withscores
)) {
6854 offset
= atoi(c
->argv
[5]->ptr
);
6855 limit
= atoi(c
->argv
[6]->ptr
);
6856 if (offset
< 0) offset
= 0;
6859 /* Ok, lookup the key and get the range */
6860 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
6862 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6864 if (o
->type
!= REDIS_ZSET
) {
6865 addReply(c
,shared
.wrongtypeerr
);
6867 zset
*zsetobj
= o
->ptr
;
6868 zskiplist
*zsl
= zsetobj
->zsl
;
6870 robj
*ele
, *lenobj
= NULL
;
6871 unsigned long rangelen
= 0;
6873 /* Get the first node with the score >= min, or with
6874 * score > min if 'minex' is true. */
6875 ln
= zslFirstWithScore(zsl
,min
);
6876 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
6879 /* No element matching the speciifed interval */
6880 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6884 /* We don't know in advance how many matching elements there
6885 * are in the list, so we push this object that will represent
6886 * the multi-bulk length in the output buffer, and will "fix"
6889 lenobj
= createObject(REDIS_STRING
,NULL
);
6891 decrRefCount(lenobj
);
6894 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
6897 ln
= ln
->forward
[0];
6900 if (limit
== 0) break;
6903 addReplyBulk(c
,ele
);
6905 addReplyDouble(c
,ln
->score
);
6907 ln
= ln
->forward
[0];
6909 if (limit
> 0) limit
--;
6912 addReplyLongLong(c
,(long)rangelen
);
6914 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
6915 withscores
? (rangelen
*2) : rangelen
);
6921 static void zrangebyscoreCommand(redisClient
*c
) {
6922 genericZrangebyscoreCommand(c
,0);
6925 static void zcountCommand(redisClient
*c
) {
6926 genericZrangebyscoreCommand(c
,1);
6929 static void zcardCommand(redisClient
*c
) {
6933 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6934 checkType(c
,o
,REDIS_ZSET
)) return;
6937 addReplyUlong(c
,zs
->zsl
->length
);
6940 static void zscoreCommand(redisClient
*c
) {
6945 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6946 checkType(c
,o
,REDIS_ZSET
)) return;
6949 de
= dictFind(zs
->dict
,c
->argv
[2]);
6951 addReply(c
,shared
.nullbulk
);
6953 double *score
= dictGetEntryVal(de
);
6955 addReplyDouble(c
,*score
);
6959 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
6967 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6968 checkType(c
,o
,REDIS_ZSET
)) return;
6972 de
= dictFind(zs
->dict
,c
->argv
[2]);
6974 addReply(c
,shared
.nullbulk
);
6978 score
= dictGetEntryVal(de
);
6979 rank
= zslistTypeGetRank(zsl
, *score
, c
->argv
[2]);
6982 addReplyLongLong(c
, zsl
->length
- rank
);
6984 addReplyLongLong(c
, rank
-1);
6987 addReply(c
,shared
.nullbulk
);
6991 static void zrankCommand(redisClient
*c
) {
6992 zrankGenericCommand(c
, 0);
6995 static void zrevrankCommand(redisClient
*c
) {
6996 zrankGenericCommand(c
, 1);
6999 /* ========================= Hashes utility functions ======================= */
7000 #define REDIS_HASH_KEY 1
7001 #define REDIS_HASH_VALUE 2
7003 /* Check the length of a number of objects to see if we need to convert a
7004 * zipmap to a real hash. Note that we only check string encoded objects
7005 * as their string length can be queried in constant time. */
7006 static void hashTypeTryConversion(robj
*subject
, robj
**argv
, int start
, int end
) {
7008 if (subject
->encoding
!= REDIS_ENCODING_ZIPMAP
) return;
7010 for (i
= start
; i
<= end
; i
++) {
7011 if (argv
[i
]->encoding
== REDIS_ENCODING_RAW
&&
7012 sdslen(argv
[i
]->ptr
) > server
.hash_max_zipmap_value
)
7014 convertToRealHash(subject
);
7020 /* Encode given objects in-place when the hash uses a dict. */
7021 static void hashTypeTryObjectEncoding(robj
*subject
, robj
**o1
, robj
**o2
) {
7022 if (subject
->encoding
== REDIS_ENCODING_HT
) {
7023 if (o1
) *o1
= tryObjectEncoding(*o1
);
7024 if (o2
) *o2
= tryObjectEncoding(*o2
);
7028 /* Get the value from a hash identified by key. Returns either a string
7029 * object or NULL if the value cannot be found. The refcount of the object
7030 * is always increased by 1 when the value was found. */
7031 static robj
*hashTypeGet(robj
*o
, robj
*key
) {
7033 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
7036 key
= getDecodedObject(key
);
7037 if (zipmapGet(o
->ptr
,key
->ptr
,sdslen(key
->ptr
),&v
,&vlen
)) {
7038 value
= createStringObject((char*)v
,vlen
);
7042 dictEntry
*de
= dictFind(o
->ptr
,key
);
7044 value
= dictGetEntryVal(de
);
7045 incrRefCount(value
);
7051 /* Test if the key exists in the given hash. Returns 1 if the key
7052 * exists and 0 when it doesn't. */
7053 static int hashTypeExists(robj
*o
, robj
*key
) {
7054 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
7055 key
= getDecodedObject(key
);
7056 if (zipmapExists(o
->ptr
,key
->ptr
,sdslen(key
->ptr
))) {
7062 if (dictFind(o
->ptr
,key
) != NULL
) {
7069 /* Add an element, discard the old if the key already exists.
7070 * Return 0 on insert and 1 on update. */
7071 static int hashTypeSet(robj
*o
, robj
*key
, robj
*value
) {
7073 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
7074 key
= getDecodedObject(key
);
7075 value
= getDecodedObject(value
);
7076 o
->ptr
= zipmapSet(o
->ptr
,
7077 key
->ptr
,sdslen(key
->ptr
),
7078 value
->ptr
,sdslen(value
->ptr
), &update
);
7080 decrRefCount(value
);
7082 /* Check if the zipmap needs to be upgraded to a real hash table */
7083 if (zipmapLen(o
->ptr
) > server
.hash_max_zipmap_entries
)
7084 convertToRealHash(o
);
7086 if (dictReplace(o
->ptr
,key
,value
)) {
7093 incrRefCount(value
);
7098 /* Delete an element from a hash.
7099 * Return 1 on deleted and 0 on not found. */
7100 static int hashTypeDelete(robj
*o
, robj
*key
) {
7102 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
7103 key
= getDecodedObject(key
);
7104 o
->ptr
= zipmapDel(o
->ptr
,key
->ptr
,sdslen(key
->ptr
), &deleted
);
7107 deleted
= dictDelete((dict
*)o
->ptr
,key
) == DICT_OK
;
7108 /* Always check if the dictionary needs a resize after a delete. */
7109 if (deleted
&& htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
7114 /* Return the number of elements in a hash. */
7115 static unsigned long hashTypeLength(robj
*o
) {
7116 return (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
7117 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
7120 /* Structure to hold hash iteration abstration. Note that iteration over
7121 * hashes involves both fields and values. Because it is possible that
7122 * not both are required, store pointers in the iterator to avoid
7123 * unnecessary memory allocation for fields/values. */
7127 unsigned char *zk
, *zv
;
7128 unsigned int zklen
, zvlen
;
7134 static hashTypeIterator
*hashTypeInitIterator(robj
*subject
) {
7135 hashTypeIterator
*hi
= zmalloc(sizeof(hashTypeIterator
));
7136 hi
->encoding
= subject
->encoding
;
7137 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
7138 hi
->zi
= zipmapRewind(subject
->ptr
);
7139 } else if (hi
->encoding
== REDIS_ENCODING_HT
) {
7140 hi
->di
= dictGetIterator(subject
->ptr
);
7147 static void hashTypeReleaseIterator(hashTypeIterator
*hi
) {
7148 if (hi
->encoding
== REDIS_ENCODING_HT
) {
7149 dictReleaseIterator(hi
->di
);
7154 /* Move to the next entry in the hash. Return REDIS_OK when the next entry
7155 * could be found and REDIS_ERR when the iterator reaches the end. */
7156 static int hashTypeNext(hashTypeIterator
*hi
) {
7157 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
7158 if ((hi
->zi
= zipmapNext(hi
->zi
, &hi
->zk
, &hi
->zklen
,
7159 &hi
->zv
, &hi
->zvlen
)) == NULL
) return REDIS_ERR
;
7161 if ((hi
->de
= dictNext(hi
->di
)) == NULL
) return REDIS_ERR
;
7166 /* Get key or value object at current iteration position.
7167 * This increases the refcount of the field object by 1. */
7168 static robj
*hashTypeCurrent(hashTypeIterator
*hi
, int what
) {
7170 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
7171 if (what
& REDIS_HASH_KEY
) {
7172 o
= createStringObject((char*)hi
->zk
,hi
->zklen
);
7174 o
= createStringObject((char*)hi
->zv
,hi
->zvlen
);
7177 if (what
& REDIS_HASH_KEY
) {
7178 o
= dictGetEntryKey(hi
->de
);
7180 o
= dictGetEntryVal(hi
->de
);
7187 static robj
*hashTypeLookupWriteOrCreate(redisClient
*c
, robj
*key
) {
7188 robj
*o
= lookupKeyWrite(c
->db
,key
);
7190 o
= createHashObject();
7193 if (o
->type
!= REDIS_HASH
) {
7194 addReply(c
,shared
.wrongtypeerr
);
7201 /* ============================= Hash commands ============================== */
7202 static void hsetCommand(redisClient
*c
) {
7206 if ((o
= hashTypeLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
7207 hashTypeTryConversion(o
,c
->argv
,2,3);
7208 hashTypeTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
7209 update
= hashTypeSet(o
,c
->argv
[2],c
->argv
[3]);
7210 addReply(c
, update
? shared
.czero
: shared
.cone
);
7214 static void hsetnxCommand(redisClient
*c
) {
7216 if ((o
= hashTypeLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
7217 hashTypeTryConversion(o
,c
->argv
,2,3);
7219 if (hashTypeExists(o
, c
->argv
[2])) {
7220 addReply(c
, shared
.czero
);
7222 hashTypeTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
7223 hashTypeSet(o
,c
->argv
[2],c
->argv
[3]);
7224 addReply(c
, shared
.cone
);
7229 static void hmsetCommand(redisClient
*c
) {
7233 if ((c
->argc
% 2) == 1) {
7234 addReplySds(c
,sdsnew("-ERR wrong number of arguments for HMSET\r\n"));
7238 if ((o
= hashTypeLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
7239 hashTypeTryConversion(o
,c
->argv
,2,c
->argc
-1);
7240 for (i
= 2; i
< c
->argc
; i
+= 2) {
7241 hashTypeTryObjectEncoding(o
,&c
->argv
[i
], &c
->argv
[i
+1]);
7242 hashTypeSet(o
,c
->argv
[i
],c
->argv
[i
+1]);
7244 addReply(c
, shared
.ok
);
7248 static void hincrbyCommand(redisClient
*c
) {
7249 long long value
, incr
;
7250 robj
*o
, *current
, *new;
7252 if (getLongLongFromObjectOrReply(c
,c
->argv
[3],&incr
,NULL
) != REDIS_OK
) return;
7253 if ((o
= hashTypeLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
7254 if ((current
= hashTypeGet(o
,c
->argv
[2])) != NULL
) {
7255 if (getLongLongFromObjectOrReply(c
,current
,&value
,
7256 "hash value is not an integer") != REDIS_OK
) {
7257 decrRefCount(current
);
7260 decrRefCount(current
);
7266 new = createStringObjectFromLongLong(value
);
7267 hashTypeTryObjectEncoding(o
,&c
->argv
[2],NULL
);
7268 hashTypeSet(o
,c
->argv
[2],new);
7270 addReplyLongLong(c
,value
);
7274 static void hgetCommand(redisClient
*c
) {
7276 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
7277 checkType(c
,o
,REDIS_HASH
)) return;
7279 if ((value
= hashTypeGet(o
,c
->argv
[2])) != NULL
) {
7280 addReplyBulk(c
,value
);
7281 decrRefCount(value
);
7283 addReply(c
,shared
.nullbulk
);
7287 static void hmgetCommand(redisClient
*c
) {
7290 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
7291 if (o
!= NULL
&& o
->type
!= REDIS_HASH
) {
7292 addReply(c
,shared
.wrongtypeerr
);
7295 /* Note the check for o != NULL happens inside the loop. This is
7296 * done because objects that cannot be found are considered to be
7297 * an empty hash. The reply should then be a series of NULLs. */
7298 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-2));
7299 for (i
= 2; i
< c
->argc
; i
++) {
7300 if (o
!= NULL
&& (value
= hashTypeGet(o
,c
->argv
[i
])) != NULL
) {
7301 addReplyBulk(c
,value
);
7302 decrRefCount(value
);
7304 addReply(c
,shared
.nullbulk
);
7309 static void hdelCommand(redisClient
*c
) {
7311 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
7312 checkType(c
,o
,REDIS_HASH
)) return;
7314 if (hashTypeDelete(o
,c
->argv
[2])) {
7315 if (hashTypeLength(o
) == 0) dbDelete(c
->db
,c
->argv
[1]);
7316 addReply(c
,shared
.cone
);
7319 addReply(c
,shared
.czero
);
7323 static void hlenCommand(redisClient
*c
) {
7325 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
7326 checkType(c
,o
,REDIS_HASH
)) return;
7328 addReplyUlong(c
,hashTypeLength(o
));
7331 static void genericHgetallCommand(redisClient
*c
, int flags
) {
7332 robj
*o
, *lenobj
, *obj
;
7333 unsigned long count
= 0;
7334 hashTypeIterator
*hi
;
7336 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
7337 || checkType(c
,o
,REDIS_HASH
)) return;
7339 lenobj
= createObject(REDIS_STRING
,NULL
);
7341 decrRefCount(lenobj
);
7343 hi
= hashTypeInitIterator(o
);
7344 while (hashTypeNext(hi
) != REDIS_ERR
) {
7345 if (flags
& REDIS_HASH_KEY
) {
7346 obj
= hashTypeCurrent(hi
,REDIS_HASH_KEY
);
7347 addReplyBulk(c
,obj
);
7351 if (flags
& REDIS_HASH_VALUE
) {
7352 obj
= hashTypeCurrent(hi
,REDIS_HASH_VALUE
);
7353 addReplyBulk(c
,obj
);
7358 hashTypeReleaseIterator(hi
);
7360 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
7363 static void hkeysCommand(redisClient
*c
) {
7364 genericHgetallCommand(c
,REDIS_HASH_KEY
);
7367 static void hvalsCommand(redisClient
*c
) {
7368 genericHgetallCommand(c
,REDIS_HASH_VALUE
);
7371 static void hgetallCommand(redisClient
*c
) {
7372 genericHgetallCommand(c
,REDIS_HASH_KEY
|REDIS_HASH_VALUE
);
7375 static void hexistsCommand(redisClient
*c
) {
7377 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
7378 checkType(c
,o
,REDIS_HASH
)) return;
7380 addReply(c
, hashTypeExists(o
,c
->argv
[2]) ? shared
.cone
: shared
.czero
);
7383 static void convertToRealHash(robj
*o
) {
7384 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
7385 unsigned int klen
, vlen
;
7386 dict
*dict
= dictCreate(&hashDictType
,NULL
);
7388 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
7389 p
= zipmapRewind(zm
);
7390 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
7391 robj
*keyobj
, *valobj
;
7393 keyobj
= createStringObject((char*)key
,klen
);
7394 valobj
= createStringObject((char*)val
,vlen
);
7395 keyobj
= tryObjectEncoding(keyobj
);
7396 valobj
= tryObjectEncoding(valobj
);
7397 dictAdd(dict
,keyobj
,valobj
);
7399 o
->encoding
= REDIS_ENCODING_HT
;
7404 /* ========================= Non type-specific commands ==================== */
7406 static void flushdbCommand(redisClient
*c
) {
7407 server
.dirty
+= dictSize(c
->db
->dict
);
7408 touchWatchedKeysOnFlush(c
->db
->id
);
7409 dictEmpty(c
->db
->dict
);
7410 dictEmpty(c
->db
->expires
);
7411 addReply(c
,shared
.ok
);
7414 static void flushallCommand(redisClient
*c
) {
7415 touchWatchedKeysOnFlush(-1);
7416 server
.dirty
+= emptyDb();
7417 addReply(c
,shared
.ok
);
7418 if (server
.bgsavechildpid
!= -1) {
7419 kill(server
.bgsavechildpid
,SIGKILL
);
7420 rdbRemoveTempFile(server
.bgsavechildpid
);
7422 rdbSave(server
.dbfilename
);
7426 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
7427 redisSortOperation
*so
= zmalloc(sizeof(*so
));
7429 so
->pattern
= pattern
;
7433 /* Return the value associated to the key with a name obtained
7434 * substituting the first occurence of '*' in 'pattern' with 'subst'.
7435 * The returned object will always have its refcount increased by 1
7436 * when it is non-NULL. */
7437 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
7440 robj keyobj
, fieldobj
, *o
;
7441 int prefixlen
, sublen
, postfixlen
, fieldlen
;
7442 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
7446 char buf
[REDIS_SORTKEY_MAX
+1];
7447 } keyname
, fieldname
;
7449 /* If the pattern is "#" return the substitution object itself in order
7450 * to implement the "SORT ... GET #" feature. */
7451 spat
= pattern
->ptr
;
7452 if (spat
[0] == '#' && spat
[1] == '\0') {
7453 incrRefCount(subst
);
7457 /* The substitution object may be specially encoded. If so we create
7458 * a decoded object on the fly. Otherwise getDecodedObject will just
7459 * increment the ref count, that we'll decrement later. */
7460 subst
= getDecodedObject(subst
);
7463 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
7464 p
= strchr(spat
,'*');
7466 decrRefCount(subst
);
7470 /* Find out if we're dealing with a hash dereference. */
7471 if ((f
= strstr(p
+1, "->")) != NULL
) {
7472 fieldlen
= sdslen(spat
)-(f
-spat
);
7473 /* this also copies \0 character */
7474 memcpy(fieldname
.buf
,f
+2,fieldlen
-1);
7475 fieldname
.len
= fieldlen
-2;
7481 sublen
= sdslen(ssub
);
7482 postfixlen
= sdslen(spat
)-(prefixlen
+1)-fieldlen
;
7483 memcpy(keyname
.buf
,spat
,prefixlen
);
7484 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
7485 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
7486 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
7487 keyname
.len
= prefixlen
+sublen
+postfixlen
;
7488 decrRefCount(subst
);
7490 /* Lookup substituted key */
7491 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2));
7492 o
= lookupKeyRead(db
,&keyobj
);
7493 if (o
== NULL
) return NULL
;
7496 if (o
->type
!= REDIS_HASH
|| fieldname
.len
< 1) return NULL
;
7498 /* Retrieve value from hash by the field name. This operation
7499 * already increases the refcount of the returned object. */
7500 initStaticStringObject(fieldobj
,((char*)&fieldname
)+(sizeof(long)*2));
7501 o
= hashTypeGet(o
, &fieldobj
);
7503 if (o
->type
!= REDIS_STRING
) return NULL
;
7505 /* Every object that this function returns needs to have its refcount
7506 * increased. sortCommand decreases it again. */
7513 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
7514 * the additional parameter is not standard but a BSD-specific we have to
7515 * pass sorting parameters via the global 'server' structure */
7516 static int sortCompare(const void *s1
, const void *s2
) {
7517 const redisSortObject
*so1
= s1
, *so2
= s2
;
7520 if (!server
.sort_alpha
) {
7521 /* Numeric sorting. Here it's trivial as we precomputed scores */
7522 if (so1
->u
.score
> so2
->u
.score
) {
7524 } else if (so1
->u
.score
< so2
->u
.score
) {
7530 /* Alphanumeric sorting */
7531 if (server
.sort_bypattern
) {
7532 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
7533 /* At least one compare object is NULL */
7534 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
7536 else if (so1
->u
.cmpobj
== NULL
)
7541 /* We have both the objects, use strcoll */
7542 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
7545 /* Compare elements directly. */
7546 cmp
= compareStringObjects(so1
->obj
,so2
->obj
);
7549 return server
.sort_desc
? -cmp
: cmp
;
7552 /* The SORT command is the most complex command in Redis. Warning: this code
7553 * is optimized for speed and a bit less for readability */
7554 static void sortCommand(redisClient
*c
) {
7556 unsigned int outputlen
= 0;
7557 int desc
= 0, alpha
= 0;
7558 int limit_start
= 0, limit_count
= -1, start
, end
;
7559 int j
, dontsort
= 0, vectorlen
;
7560 int getop
= 0; /* GET operation counter */
7561 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
7562 redisSortObject
*vector
; /* Resulting vector to sort */
7564 /* Lookup the key to sort. It must be of the right types */
7565 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
7566 if (sortval
== NULL
) {
7567 addReply(c
,shared
.emptymultibulk
);
7570 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
7571 sortval
->type
!= REDIS_ZSET
)
7573 addReply(c
,shared
.wrongtypeerr
);
7577 /* Create a list of operations to perform for every sorted element.
7578 * Operations can be GET/DEL/INCR/DECR */
7579 operations
= listCreate();
7580 listSetFreeMethod(operations
,zfree
);
7583 /* Now we need to protect sortval incrementing its count, in the future
7584 * SORT may have options able to overwrite/delete keys during the sorting
7585 * and the sorted key itself may get destroied */
7586 incrRefCount(sortval
);
7588 /* The SORT command has an SQL-alike syntax, parse it */
7589 while(j
< c
->argc
) {
7590 int leftargs
= c
->argc
-j
-1;
7591 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
7593 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
7595 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
7597 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
7598 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
7599 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
7601 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
7602 storekey
= c
->argv
[j
+1];
7604 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
7605 sortby
= c
->argv
[j
+1];
7606 /* If the BY pattern does not contain '*', i.e. it is constant,
7607 * we don't need to sort nor to lookup the weight keys. */
7608 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
7610 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
7611 listAddNodeTail(operations
,createSortOperation(
7612 REDIS_SORT_GET
,c
->argv
[j
+1]));
7616 decrRefCount(sortval
);
7617 listRelease(operations
);
7618 addReply(c
,shared
.syntaxerr
);
7624 /* Load the sorting vector with all the objects to sort */
7625 switch(sortval
->type
) {
7626 case REDIS_LIST
: vectorlen
= listTypeLength(sortval
); break;
7627 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
7628 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
7629 default: vectorlen
= 0; redisPanic("Bad SORT type"); /* Avoid GCC warning */
7631 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
7634 if (sortval
->type
== REDIS_LIST
) {
7635 listTypeIterator
*li
= listTypeInitIterator(sortval
,0,REDIS_TAIL
);
7636 listTypeEntry entry
;
7637 while(listTypeNext(li
,&entry
)) {
7638 vector
[j
].obj
= listTypeGet(&entry
);
7639 vector
[j
].u
.score
= 0;
7640 vector
[j
].u
.cmpobj
= NULL
;
7643 listTypeReleaseIterator(li
);
7649 if (sortval
->type
== REDIS_SET
) {
7652 zset
*zs
= sortval
->ptr
;
7656 di
= dictGetIterator(set
);
7657 while((setele
= dictNext(di
)) != NULL
) {
7658 vector
[j
].obj
= dictGetEntryKey(setele
);
7659 vector
[j
].u
.score
= 0;
7660 vector
[j
].u
.cmpobj
= NULL
;
7663 dictReleaseIterator(di
);
7665 redisAssert(j
== vectorlen
);
7667 /* Now it's time to load the right scores in the sorting vector */
7668 if (dontsort
== 0) {
7669 for (j
= 0; j
< vectorlen
; j
++) {
7672 /* lookup value to sort by */
7673 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
7674 if (!byval
) continue;
7676 /* use object itself to sort by */
7677 byval
= vector
[j
].obj
;
7681 if (sortby
) vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
7683 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
7684 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
7685 } else if (byval
->encoding
== REDIS_ENCODING_INT
) {
7686 /* Don't need to decode the object if it's
7687 * integer-encoded (the only encoding supported) so
7688 * far. We can just cast it */
7689 vector
[j
].u
.score
= (long)byval
->ptr
;
7691 redisAssert(1 != 1);
7695 /* when the object was retrieved using lookupKeyByPattern,
7696 * its refcount needs to be decreased. */
7698 decrRefCount(byval
);
7703 /* We are ready to sort the vector... perform a bit of sanity check
7704 * on the LIMIT option too. We'll use a partial version of quicksort. */
7705 start
= (limit_start
< 0) ? 0 : limit_start
;
7706 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
7707 if (start
>= vectorlen
) {
7708 start
= vectorlen
-1;
7711 if (end
>= vectorlen
) end
= vectorlen
-1;
7713 if (dontsort
== 0) {
7714 server
.sort_desc
= desc
;
7715 server
.sort_alpha
= alpha
;
7716 server
.sort_bypattern
= sortby
? 1 : 0;
7717 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
7718 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
7720 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
7723 /* Send command output to the output buffer, performing the specified
7724 * GET/DEL/INCR/DECR operations if any. */
7725 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
7726 if (storekey
== NULL
) {
7727 /* STORE option not specified, sent the sorting result to client */
7728 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
7729 for (j
= start
; j
<= end
; j
++) {
7733 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
7734 listRewind(operations
,&li
);
7735 while((ln
= listNext(&li
))) {
7736 redisSortOperation
*sop
= ln
->value
;
7737 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
7740 if (sop
->type
== REDIS_SORT_GET
) {
7742 addReply(c
,shared
.nullbulk
);
7744 addReplyBulk(c
,val
);
7748 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
7753 robj
*sobj
= createZiplistObject();
7755 /* STORE option specified, set the sorting result as a List object */
7756 for (j
= start
; j
<= end
; j
++) {
7761 listTypePush(sobj
,vector
[j
].obj
,REDIS_TAIL
);
7763 listRewind(operations
,&li
);
7764 while((ln
= listNext(&li
))) {
7765 redisSortOperation
*sop
= ln
->value
;
7766 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
7769 if (sop
->type
== REDIS_SORT_GET
) {
7770 if (!val
) val
= createStringObject("",0);
7772 /* listTypePush does an incrRefCount, so we should take care
7773 * care of the incremented refcount caused by either
7774 * lookupKeyByPattern or createStringObject("",0) */
7775 listTypePush(sobj
,val
,REDIS_TAIL
);
7779 redisAssert(sop
->type
== REDIS_SORT_GET
);
7784 dbReplace(c
->db
,storekey
,sobj
);
7785 /* Note: we add 1 because the DB is dirty anyway since even if the
7786 * SORT result is empty a new key is set and maybe the old content
7788 server
.dirty
+= 1+outputlen
;
7789 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
7793 if (sortval
->type
== REDIS_LIST
)
7794 for (j
= 0; j
< vectorlen
; j
++)
7795 decrRefCount(vector
[j
].obj
);
7796 decrRefCount(sortval
);
7797 listRelease(operations
);
7798 for (j
= 0; j
< vectorlen
; j
++) {
7799 if (alpha
&& vector
[j
].u
.cmpobj
)
7800 decrRefCount(vector
[j
].u
.cmpobj
);
7805 /* Convert an amount of bytes into a human readable string in the form
7806 * of 100B, 2G, 100M, 4K, and so forth. */
7807 static void bytesToHuman(char *s
, unsigned long long n
) {
7812 sprintf(s
,"%lluB",n
);
7814 } else if (n
< (1024*1024)) {
7815 d
= (double)n
/(1024);
7816 sprintf(s
,"%.2fK",d
);
7817 } else if (n
< (1024LL*1024*1024)) {
7818 d
= (double)n
/(1024*1024);
7819 sprintf(s
,"%.2fM",d
);
7820 } else if (n
< (1024LL*1024*1024*1024)) {
7821 d
= (double)n
/(1024LL*1024*1024);
7822 sprintf(s
,"%.2fG",d
);
7826 /* Create the string returned by the INFO command. This is decoupled
7827 * by the INFO command itself as we need to report the same information
7828 * on memory corruption problems. */
7829 static sds
genRedisInfoString(void) {
7831 time_t uptime
= time(NULL
)-server
.stat_starttime
;
7835 bytesToHuman(hmem
,zmalloc_used_memory());
7836 info
= sdscatprintf(sdsempty(),
7837 "redis_version:%s\r\n"
7838 "redis_git_sha1:%s\r\n"
7839 "redis_git_dirty:%d\r\n"
7841 "multiplexing_api:%s\r\n"
7842 "process_id:%ld\r\n"
7843 "uptime_in_seconds:%ld\r\n"
7844 "uptime_in_days:%ld\r\n"
7845 "connected_clients:%d\r\n"
7846 "connected_slaves:%d\r\n"
7847 "blocked_clients:%d\r\n"
7848 "used_memory:%zu\r\n"
7849 "used_memory_human:%s\r\n"
7850 "changes_since_last_save:%lld\r\n"
7851 "bgsave_in_progress:%d\r\n"
7852 "last_save_time:%ld\r\n"
7853 "bgrewriteaof_in_progress:%d\r\n"
7854 "total_connections_received:%lld\r\n"
7855 "total_commands_processed:%lld\r\n"
7856 "expired_keys:%lld\r\n"
7857 "hash_max_zipmap_entries:%zu\r\n"
7858 "hash_max_zipmap_value:%zu\r\n"
7859 "pubsub_channels:%ld\r\n"
7860 "pubsub_patterns:%u\r\n"
7865 strtol(REDIS_GIT_DIRTY
,NULL
,10) > 0,
7866 (sizeof(long) == 8) ? "64" : "32",
7871 listLength(server
.clients
)-listLength(server
.slaves
),
7872 listLength(server
.slaves
),
7873 server
.blpop_blocked_clients
,
7874 zmalloc_used_memory(),
7877 server
.bgsavechildpid
!= -1,
7879 server
.bgrewritechildpid
!= -1,
7880 server
.stat_numconnections
,
7881 server
.stat_numcommands
,
7882 server
.stat_expiredkeys
,
7883 server
.hash_max_zipmap_entries
,
7884 server
.hash_max_zipmap_value
,
7885 dictSize(server
.pubsub_channels
),
7886 listLength(server
.pubsub_patterns
),
7887 server
.vm_enabled
!= 0,
7888 server
.masterhost
== NULL
? "master" : "slave"
7890 if (server
.masterhost
) {
7891 info
= sdscatprintf(info
,
7892 "master_host:%s\r\n"
7893 "master_port:%d\r\n"
7894 "master_link_status:%s\r\n"
7895 "master_last_io_seconds_ago:%d\r\n"
7898 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
7900 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
7903 if (server
.vm_enabled
) {
7905 info
= sdscatprintf(info
,
7906 "vm_conf_max_memory:%llu\r\n"
7907 "vm_conf_page_size:%llu\r\n"
7908 "vm_conf_pages:%llu\r\n"
7909 "vm_stats_used_pages:%llu\r\n"
7910 "vm_stats_swapped_objects:%llu\r\n"
7911 "vm_stats_swappin_count:%llu\r\n"
7912 "vm_stats_swappout_count:%llu\r\n"
7913 "vm_stats_io_newjobs_len:%lu\r\n"
7914 "vm_stats_io_processing_len:%lu\r\n"
7915 "vm_stats_io_processed_len:%lu\r\n"
7916 "vm_stats_io_active_threads:%lu\r\n"
7917 "vm_stats_blocked_clients:%lu\r\n"
7918 ,(unsigned long long) server
.vm_max_memory
,
7919 (unsigned long long) server
.vm_page_size
,
7920 (unsigned long long) server
.vm_pages
,
7921 (unsigned long long) server
.vm_stats_used_pages
,
7922 (unsigned long long) server
.vm_stats_swapped_objects
,
7923 (unsigned long long) server
.vm_stats_swapins
,
7924 (unsigned long long) server
.vm_stats_swapouts
,
7925 (unsigned long) listLength(server
.io_newjobs
),
7926 (unsigned long) listLength(server
.io_processing
),
7927 (unsigned long) listLength(server
.io_processed
),
7928 (unsigned long) server
.io_active_threads
,
7929 (unsigned long) server
.vm_blocked_clients
7933 for (j
= 0; j
< server
.dbnum
; j
++) {
7934 long long keys
, vkeys
;
7936 keys
= dictSize(server
.db
[j
].dict
);
7937 vkeys
= dictSize(server
.db
[j
].expires
);
7938 if (keys
|| vkeys
) {
7939 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
7946 static void infoCommand(redisClient
*c
) {
7947 sds info
= genRedisInfoString();
7948 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
7949 (unsigned long)sdslen(info
)));
7950 addReplySds(c
,info
);
7951 addReply(c
,shared
.crlf
);
7954 static void monitorCommand(redisClient
*c
) {
7955 /* ignore MONITOR if aleady slave or in monitor mode */
7956 if (c
->flags
& REDIS_SLAVE
) return;
7958 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
7960 listAddNodeTail(server
.monitors
,c
);
7961 addReply(c
,shared
.ok
);
7964 /* ================================= Expire ================================= */
7965 static int removeExpire(redisDb
*db
, robj
*key
) {
7966 if (dictDelete(db
->expires
,key
->ptr
) == DICT_OK
) {
7973 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
7974 sds copy
= sdsdup(key
->ptr
);
7975 if (dictAdd(db
->expires
,copy
,(void*)when
) == DICT_ERR
) {
7983 /* Return the expire time of the specified key, or -1 if no expire
7984 * is associated with this key (i.e. the key is non volatile) */
7985 static time_t getExpire(redisDb
*db
, robj
*key
) {
7988 /* No expire? return ASAP */
7989 if (dictSize(db
->expires
) == 0 ||
7990 (de
= dictFind(db
->expires
,key
->ptr
)) == NULL
) return -1;
7992 return (time_t) dictGetEntryVal(de
);
7995 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
7999 /* No expire? return ASAP */
8000 if (dictSize(db
->expires
) == 0 ||
8001 (de
= dictFind(db
->expires
,key
->ptr
)) == NULL
) return 0;
8003 /* Lookup the expire */
8004 when
= (time_t) dictGetEntryVal(de
);
8005 if (time(NULL
) <= when
) return 0;
8007 /* Delete the key */
8009 server
.stat_expiredkeys
++;
8013 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
8016 /* No expire? return ASAP */
8017 if (dictSize(db
->expires
) == 0 ||
8018 (de
= dictFind(db
->expires
,key
->ptr
)) == NULL
) return 0;
8020 /* Delete the key */
8022 server
.stat_expiredkeys
++;
8023 dictDelete(db
->expires
,key
->ptr
);
8024 return dictDelete(db
->dict
,key
->ptr
) == DICT_OK
;
8027 static void expireGenericCommand(redisClient
*c
, robj
*key
, robj
*param
, long offset
) {
8031 if (getLongFromObjectOrReply(c
, param
, &seconds
, NULL
) != REDIS_OK
) return;
8035 de
= dictFind(c
->db
->dict
,key
->ptr
);
8037 addReply(c
,shared
.czero
);
8041 if (dbDelete(c
->db
,key
)) server
.dirty
++;
8042 addReply(c
, shared
.cone
);
8045 time_t when
= time(NULL
)+seconds
;
8046 if (setExpire(c
->db
,key
,when
)) {
8047 addReply(c
,shared
.cone
);
8050 addReply(c
,shared
.czero
);
8056 static void expireCommand(redisClient
*c
) {
8057 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],0);
8060 static void expireatCommand(redisClient
*c
) {
8061 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],time(NULL
));
8064 static void ttlCommand(redisClient
*c
) {
8068 expire
= getExpire(c
->db
,c
->argv
[1]);
8070 ttl
= (int) (expire
-time(NULL
));
8071 if (ttl
< 0) ttl
= -1;
8073 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
8076 /* ================================ MULTI/EXEC ============================== */
8078 /* Client state initialization for MULTI/EXEC */
8079 static void initClientMultiState(redisClient
*c
) {
8080 c
->mstate
.commands
= NULL
;
8081 c
->mstate
.count
= 0;
8084 /* Release all the resources associated with MULTI/EXEC state */
8085 static void freeClientMultiState(redisClient
*c
) {
8088 for (j
= 0; j
< c
->mstate
.count
; j
++) {
8090 multiCmd
*mc
= c
->mstate
.commands
+j
;
8092 for (i
= 0; i
< mc
->argc
; i
++)
8093 decrRefCount(mc
->argv
[i
]);
8096 zfree(c
->mstate
.commands
);
8099 /* Add a new command into the MULTI commands queue */
8100 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
8104 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
8105 sizeof(multiCmd
)*(c
->mstate
.count
+1));
8106 mc
= c
->mstate
.commands
+c
->mstate
.count
;
8109 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
8110 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
8111 for (j
= 0; j
< c
->argc
; j
++)
8112 incrRefCount(mc
->argv
[j
]);
8116 static void multiCommand(redisClient
*c
) {
8117 if (c
->flags
& REDIS_MULTI
) {
8118 addReplySds(c
,sdsnew("-ERR MULTI calls can not be nested\r\n"));
8121 c
->flags
|= REDIS_MULTI
;
8122 addReply(c
,shared
.ok
);
8125 static void discardCommand(redisClient
*c
) {
8126 if (!(c
->flags
& REDIS_MULTI
)) {
8127 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
8131 freeClientMultiState(c
);
8132 initClientMultiState(c
);
8133 c
->flags
&= (~REDIS_MULTI
);
8135 addReply(c
,shared
.ok
);
8138 /* Send a MULTI command to all the slaves and AOF file. Check the execCommand
8139 * implememntation for more information. */
8140 static void execCommandReplicateMulti(redisClient
*c
) {
8141 struct redisCommand
*cmd
;
8142 robj
*multistring
= createStringObject("MULTI",5);
8144 cmd
= lookupCommand("multi");
8145 if (server
.appendonly
)
8146 feedAppendOnlyFile(cmd
,c
->db
->id
,&multistring
,1);
8147 if (listLength(server
.slaves
))
8148 replicationFeedSlaves(server
.slaves
,c
->db
->id
,&multistring
,1);
8149 decrRefCount(multistring
);
8152 static void execCommand(redisClient
*c
) {
8157 if (!(c
->flags
& REDIS_MULTI
)) {
8158 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
8162 /* Check if we need to abort the EXEC if some WATCHed key was touched.
8163 * A failed EXEC will return a multi bulk nil object. */
8164 if (c
->flags
& REDIS_DIRTY_CAS
) {
8165 freeClientMultiState(c
);
8166 initClientMultiState(c
);
8167 c
->flags
&= ~(REDIS_MULTI
|REDIS_DIRTY_CAS
);
8169 addReply(c
,shared
.nullmultibulk
);
8173 /* Replicate a MULTI request now that we are sure the block is executed.
8174 * This way we'll deliver the MULTI/..../EXEC block as a whole and
8175 * both the AOF and the replication link will have the same consistency
8176 * and atomicity guarantees. */
8177 execCommandReplicateMulti(c
);
8179 /* Exec all the queued commands */
8180 unwatchAllKeys(c
); /* Unwatch ASAP otherwise we'll waste CPU cycles */
8181 orig_argv
= c
->argv
;
8182 orig_argc
= c
->argc
;
8183 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
8184 for (j
= 0; j
< c
->mstate
.count
; j
++) {
8185 c
->argc
= c
->mstate
.commands
[j
].argc
;
8186 c
->argv
= c
->mstate
.commands
[j
].argv
;
8187 call(c
,c
->mstate
.commands
[j
].cmd
);
8189 c
->argv
= orig_argv
;
8190 c
->argc
= orig_argc
;
8191 freeClientMultiState(c
);
8192 initClientMultiState(c
);
8193 c
->flags
&= ~(REDIS_MULTI
|REDIS_DIRTY_CAS
);
8194 /* Make sure the EXEC command is always replicated / AOF, since we
8195 * always send the MULTI command (we can't know beforehand if the
8196 * next operations will contain at least a modification to the DB). */
8200 /* =========================== Blocking Operations ========================= */
8202 /* Currently Redis blocking operations support is limited to list POP ops,
8203 * so the current implementation is not fully generic, but it is also not
8204 * completely specific so it will not require a rewrite to support new
8205 * kind of blocking operations in the future.
8207 * Still it's important to note that list blocking operations can be already
8208 * used as a notification mechanism in order to implement other blocking
8209 * operations at application level, so there must be a very strong evidence
8210 * of usefulness and generality before new blocking operations are implemented.
8212 * This is how the current blocking POP works, we use BLPOP as example:
8213 * - If the user calls BLPOP and the key exists and contains a non empty list
8214 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
8215 * if there is not to block.
8216 * - If instead BLPOP is called and the key does not exists or the list is
8217 * empty we need to block. In order to do so we remove the notification for
8218 * new data to read in the client socket (so that we'll not serve new
8219 * requests if the blocking request is not served). Also we put the client
8220 * in a dictionary (db->blocking_keys) mapping keys to a list of clients
8221 * blocking for this keys.
8222 * - If a PUSH operation against a key with blocked clients waiting is
8223 * performed, we serve the first in the list: basically instead to push
8224 * the new element inside the list we return it to the (first / oldest)
8225 * blocking client, unblock the client, and remove it form the list.
8227 * The above comment and the source code should be enough in order to understand
8228 * the implementation and modify / fix it later.
8231 /* Set a client in blocking mode for the specified key, with the specified
8233 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
8238 c
->blocking_keys
= zmalloc(sizeof(robj
*)*numkeys
);
8239 c
->blocking_keys_num
= numkeys
;
8240 c
->blockingto
= timeout
;
8241 for (j
= 0; j
< numkeys
; j
++) {
8242 /* Add the key in the client structure, to map clients -> keys */
8243 c
->blocking_keys
[j
] = keys
[j
];
8244 incrRefCount(keys
[j
]);
8246 /* And in the other "side", to map keys -> clients */
8247 de
= dictFind(c
->db
->blocking_keys
,keys
[j
]);
8251 /* For every key we take a list of clients blocked for it */
8253 retval
= dictAdd(c
->db
->blocking_keys
,keys
[j
],l
);
8254 incrRefCount(keys
[j
]);
8255 assert(retval
== DICT_OK
);
8257 l
= dictGetEntryVal(de
);
8259 listAddNodeTail(l
,c
);
8261 /* Mark the client as a blocked client */
8262 c
->flags
|= REDIS_BLOCKED
;
8263 server
.blpop_blocked_clients
++;
8266 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
8267 static void unblockClientWaitingData(redisClient
*c
) {
8272 assert(c
->blocking_keys
!= NULL
);
8273 /* The client may wait for multiple keys, so unblock it for every key. */
8274 for (j
= 0; j
< c
->blocking_keys_num
; j
++) {
8275 /* Remove this client from the list of clients waiting for this key. */
8276 de
= dictFind(c
->db
->blocking_keys
,c
->blocking_keys
[j
]);
8278 l
= dictGetEntryVal(de
);
8279 listDelNode(l
,listSearchKey(l
,c
));
8280 /* If the list is empty we need to remove it to avoid wasting memory */
8281 if (listLength(l
) == 0)
8282 dictDelete(c
->db
->blocking_keys
,c
->blocking_keys
[j
]);
8283 decrRefCount(c
->blocking_keys
[j
]);
8285 /* Cleanup the client structure */
8286 zfree(c
->blocking_keys
);
8287 c
->blocking_keys
= NULL
;
8288 c
->flags
&= (~REDIS_BLOCKED
);
8289 server
.blpop_blocked_clients
--;
8290 /* We want to process data if there is some command waiting
8291 * in the input buffer. Note that this is safe even if
8292 * unblockClientWaitingData() gets called from freeClient() because
8293 * freeClient() will be smart enough to call this function
8294 * *after* c->querybuf was set to NULL. */
8295 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
8298 /* This should be called from any function PUSHing into lists.
8299 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
8300 * 'ele' is the element pushed.
8302 * If the function returns 0 there was no client waiting for a list push
8305 * If the function returns 1 there was a client waiting for a list push
8306 * against this key, the element was passed to this client thus it's not
8307 * needed to actually add it to the list and the caller should return asap. */
8308 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
8309 struct dictEntry
*de
;
8310 redisClient
*receiver
;
8314 de
= dictFind(c
->db
->blocking_keys
,key
);
8315 if (de
== NULL
) return 0;
8316 l
= dictGetEntryVal(de
);
8319 receiver
= ln
->value
;
8321 addReplySds(receiver
,sdsnew("*2\r\n"));
8322 addReplyBulk(receiver
,key
);
8323 addReplyBulk(receiver
,ele
);
8324 unblockClientWaitingData(receiver
);
8328 /* Blocking RPOP/LPOP */
8329 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
8334 for (j
= 1; j
< c
->argc
-1; j
++) {
8335 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
8337 if (o
->type
!= REDIS_LIST
) {
8338 addReply(c
,shared
.wrongtypeerr
);
8341 list
*list
= o
->ptr
;
8342 if (listLength(list
) != 0) {
8343 /* If the list contains elements fall back to the usual
8344 * non-blocking POP operation */
8345 robj
*argv
[2], **orig_argv
;
8348 /* We need to alter the command arguments before to call
8349 * popGenericCommand() as the command takes a single key. */
8350 orig_argv
= c
->argv
;
8351 orig_argc
= c
->argc
;
8352 argv
[1] = c
->argv
[j
];
8356 /* Also the return value is different, we need to output
8357 * the multi bulk reply header and the key name. The
8358 * "real" command will add the last element (the value)
8359 * for us. If this souds like an hack to you it's just
8360 * because it is... */
8361 addReplySds(c
,sdsnew("*2\r\n"));
8362 addReplyBulk(c
,argv
[1]);
8363 popGenericCommand(c
,where
);
8365 /* Fix the client structure with the original stuff */
8366 c
->argv
= orig_argv
;
8367 c
->argc
= orig_argc
;
8373 /* If the list is empty or the key does not exists we must block */
8374 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
8375 if (timeout
> 0) timeout
+= time(NULL
);
8376 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
8379 static void blpopCommand(redisClient
*c
) {
8380 blockingPopGenericCommand(c
,REDIS_HEAD
);
8383 static void brpopCommand(redisClient
*c
) {
8384 blockingPopGenericCommand(c
,REDIS_TAIL
);
8387 /* =============================== Replication ============================= */
8389 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
8390 ssize_t nwritten
, ret
= size
;
8391 time_t start
= time(NULL
);
8395 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
8396 nwritten
= write(fd
,ptr
,size
);
8397 if (nwritten
== -1) return -1;
8401 if ((time(NULL
)-start
) > timeout
) {
8409 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
8410 ssize_t nread
, totread
= 0;
8411 time_t start
= time(NULL
);
8415 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
8416 nread
= read(fd
,ptr
,size
);
8417 if (nread
== -1) return -1;
8422 if ((time(NULL
)-start
) > timeout
) {
8430 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
8437 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
8440 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
8451 static void syncCommand(redisClient
*c
) {
8452 /* ignore SYNC if aleady slave or in monitor mode */
8453 if (c
->flags
& REDIS_SLAVE
) return;
8455 /* SYNC can't be issued when the server has pending data to send to
8456 * the client about already issued commands. We need a fresh reply
8457 * buffer registering the differences between the BGSAVE and the current
8458 * dataset, so that we can copy to other slaves if needed. */
8459 if (listLength(c
->reply
) != 0) {
8460 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
8464 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
8465 /* Here we need to check if there is a background saving operation
8466 * in progress, or if it is required to start one */
8467 if (server
.bgsavechildpid
!= -1) {
8468 /* Ok a background save is in progress. Let's check if it is a good
8469 * one for replication, i.e. if there is another slave that is
8470 * registering differences since the server forked to save */
8475 listRewind(server
.slaves
,&li
);
8476 while((ln
= listNext(&li
))) {
8478 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
8481 /* Perfect, the server is already registering differences for
8482 * another slave. Set the right state, and copy the buffer. */
8483 listRelease(c
->reply
);
8484 c
->reply
= listDup(slave
->reply
);
8485 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
8486 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
8488 /* No way, we need to wait for the next BGSAVE in order to
8489 * register differences */
8490 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
8491 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
8494 /* Ok we don't have a BGSAVE in progress, let's start one */
8495 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
8496 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
8497 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
8498 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
8501 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
8504 c
->flags
|= REDIS_SLAVE
;
8506 listAddNodeTail(server
.slaves
,c
);
8510 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
8511 redisClient
*slave
= privdata
;
8513 REDIS_NOTUSED(mask
);
8514 char buf
[REDIS_IOBUF_LEN
];
8515 ssize_t nwritten
, buflen
;
8517 if (slave
->repldboff
== 0) {
8518 /* Write the bulk write count before to transfer the DB. In theory here
8519 * we don't know how much room there is in the output buffer of the
8520 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
8521 * operations) will never be smaller than the few bytes we need. */
8524 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
8526 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
8534 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
8535 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
8537 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
8538 (buflen
== 0) ? "premature EOF" : strerror(errno
));
8542 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
8543 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
8548 slave
->repldboff
+= nwritten
;
8549 if (slave
->repldboff
== slave
->repldbsize
) {
8550 close(slave
->repldbfd
);
8551 slave
->repldbfd
= -1;
8552 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
8553 slave
->replstate
= REDIS_REPL_ONLINE
;
8554 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
8555 sendReplyToClient
, slave
) == AE_ERR
) {
8559 addReplySds(slave
,sdsempty());
8560 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
8564 /* This function is called at the end of every backgrond saving.
8565 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
8566 * otherwise REDIS_ERR is passed to the function.
8568 * The goal of this function is to handle slaves waiting for a successful
8569 * background saving in order to perform non-blocking synchronization. */
8570 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
8572 int startbgsave
= 0;
8575 listRewind(server
.slaves
,&li
);
8576 while((ln
= listNext(&li
))) {
8577 redisClient
*slave
= ln
->value
;
8579 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
8581 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
8582 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
8583 struct redis_stat buf
;
8585 if (bgsaveerr
!= REDIS_OK
) {
8587 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
8590 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
8591 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
8593 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
8596 slave
->repldboff
= 0;
8597 slave
->repldbsize
= buf
.st_size
;
8598 slave
->replstate
= REDIS_REPL_SEND_BULK
;
8599 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
8600 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
8607 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
8610 listRewind(server
.slaves
,&li
);
8611 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
8612 while((ln
= listNext(&li
))) {
8613 redisClient
*slave
= ln
->value
;
8615 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
8622 static int syncWithMaster(void) {
8623 char buf
[1024], tmpfile
[256], authcmd
[1024];
8625 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
8626 int dfd
, maxtries
= 5;
8629 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
8634 /* AUTH with the master if required. */
8635 if(server
.masterauth
) {
8636 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
8637 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
8639 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
8643 /* Read the AUTH result. */
8644 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
8646 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
8650 if (buf
[0] != '+') {
8652 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
8657 /* Issue the SYNC command */
8658 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
8660 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
8664 /* Read the bulk write count */
8665 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
8667 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
8671 if (buf
[0] != '$') {
8673 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
8676 dumpsize
= strtol(buf
+1,NULL
,10);
8677 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
8678 /* Read the bulk write data on a temp file */
8680 snprintf(tmpfile
,256,
8681 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
8682 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
8683 if (dfd
!= -1) break;
8688 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
8692 int nread
, nwritten
;
8694 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
8696 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
8702 nwritten
= write(dfd
,buf
,nread
);
8703 if (nwritten
== -1) {
8704 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
8712 if (rename(tmpfile
,server
.dbfilename
) == -1) {
8713 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
8719 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
8720 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
8724 server
.master
= createClient(fd
);
8725 server
.master
->flags
|= REDIS_MASTER
;
8726 server
.master
->authenticated
= 1;
8727 server
.replstate
= REDIS_REPL_CONNECTED
;
8731 static void slaveofCommand(redisClient
*c
) {
8732 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
8733 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
8734 if (server
.masterhost
) {
8735 sdsfree(server
.masterhost
);
8736 server
.masterhost
= NULL
;
8737 if (server
.master
) freeClient(server
.master
);
8738 server
.replstate
= REDIS_REPL_NONE
;
8739 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
8742 sdsfree(server
.masterhost
);
8743 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
8744 server
.masterport
= atoi(c
->argv
[2]->ptr
);
8745 if (server
.master
) freeClient(server
.master
);
8746 server
.replstate
= REDIS_REPL_CONNECT
;
8747 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
8748 server
.masterhost
, server
.masterport
);
8750 addReply(c
,shared
.ok
);
8753 /* ============================ Maxmemory directive ======================== */
8755 /* Try to free one object form the pre-allocated objects free list.
8756 * This is useful under low mem conditions as by default we take 1 million
8757 * free objects allocated. On success REDIS_OK is returned, otherwise
8759 static int tryFreeOneObjectFromFreelist(void) {
8762 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
8763 if (listLength(server
.objfreelist
)) {
8764 listNode
*head
= listFirst(server
.objfreelist
);
8765 o
= listNodeValue(head
);
8766 listDelNode(server
.objfreelist
,head
);
8767 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
8771 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
8776 /* This function gets called when 'maxmemory' is set on the config file to limit
8777 * the max memory used by the server, and we are out of memory.
8778 * This function will try to, in order:
8780 * - Free objects from the free list
8781 * - Try to remove keys with an EXPIRE set
8783 * It is not possible to free enough memory to reach used-memory < maxmemory
8784 * the server will start refusing commands that will enlarge even more the
8787 static void freeMemoryIfNeeded(void) {
8788 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
8789 int j
, k
, freed
= 0;
8791 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
8792 for (j
= 0; j
< server
.dbnum
; j
++) {
8794 robj
*minkey
= NULL
;
8795 struct dictEntry
*de
;
8797 if (dictSize(server
.db
[j
].expires
)) {
8799 /* From a sample of three keys drop the one nearest to
8800 * the natural expire */
8801 for (k
= 0; k
< 3; k
++) {
8804 de
= dictGetRandomKey(server
.db
[j
].expires
);
8805 t
= (time_t) dictGetEntryVal(de
);
8806 if (minttl
== -1 || t
< minttl
) {
8807 minkey
= dictGetEntryKey(de
);
8811 dbDelete(server
.db
+j
,minkey
);
8814 if (!freed
) return; /* nothing to free... */
8818 /* ============================== Append Only file ========================== */
8820 /* Called when the user switches from "appendonly yes" to "appendonly no"
8821 * at runtime using the CONFIG command. */
8822 static void stopAppendOnly(void) {
8823 flushAppendOnlyFile();
8824 aof_fsync(server
.appendfd
);
8825 close(server
.appendfd
);
8827 server
.appendfd
= -1;
8828 server
.appendseldb
= -1;
8829 server
.appendonly
= 0;
8830 /* rewrite operation in progress? kill it, wait child exit */
8831 if (server
.bgsavechildpid
!= -1) {
8834 if (kill(server
.bgsavechildpid
,SIGKILL
) != -1)
8835 wait3(&statloc
,0,NULL
);
8836 /* reset the buffer accumulating changes while the child saves */
8837 sdsfree(server
.bgrewritebuf
);
8838 server
.bgrewritebuf
= sdsempty();
8839 server
.bgsavechildpid
= -1;
8843 /* Called when the user switches from "appendonly no" to "appendonly yes"
8844 * at runtime using the CONFIG command. */
8845 static int startAppendOnly(void) {
8846 server
.appendonly
= 1;
8847 server
.lastfsync
= time(NULL
);
8848 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
8849 if (server
.appendfd
== -1) {
8850 redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, but I can't open the AOF file: %s",strerror(errno
));
8853 if (rewriteAppendOnlyFileBackground() == REDIS_ERR
) {
8854 server
.appendonly
= 0;
8855 close(server
.appendfd
);
8856 redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, I can't trigger a background AOF rewrite operation. Check the above logs for more info about the error.",strerror(errno
));
8862 /* Write the append only file buffer on disk.
8864 * Since we are required to write the AOF before replying to the client,
8865 * and the only way the client socket can get a write is entering when the
8866 * the event loop, we accumulate all the AOF writes in a memory
8867 * buffer and write it on disk using this function just before entering
8868 * the event loop again. */
8869 static void flushAppendOnlyFile(void) {
8873 if (sdslen(server
.aofbuf
) == 0) return;
8875 /* We want to perform a single write. This should be guaranteed atomic
8876 * at least if the filesystem we are writing is a real physical one.
8877 * While this will save us against the server being killed I don't think
8878 * there is much to do about the whole server stopping for power problems
8880 nwritten
= write(server
.appendfd
,server
.aofbuf
,sdslen(server
.aofbuf
));
8881 if (nwritten
!= (signed)sdslen(server
.aofbuf
)) {
8882 /* Ooops, we are in troubles. The best thing to do for now is
8883 * aborting instead of giving the illusion that everything is
8884 * working as expected. */
8885 if (nwritten
== -1) {
8886 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
8888 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
8892 sdsfree(server
.aofbuf
);
8893 server
.aofbuf
= sdsempty();
8895 /* Don't Fsync if no-appendfsync-on-rewrite is set to yes and we have
8896 * childs performing heavy I/O on disk. */
8897 if (server
.no_appendfsync_on_rewrite
&&
8898 (server
.bgrewritechildpid
!= -1 || server
.bgsavechildpid
!= -1))
8900 /* Fsync if needed */
8902 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
8903 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
8904 now
-server
.lastfsync
> 1))
8906 /* aof_fsync is defined as fdatasync() for Linux in order to avoid
8907 * flushing metadata. */
8908 aof_fsync(server
.appendfd
); /* Let's try to get this data on the disk */
8909 server
.lastfsync
= now
;
8913 static sds
catAppendOnlyGenericCommand(sds buf
, int argc
, robj
**argv
) {
8915 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
8916 for (j
= 0; j
< argc
; j
++) {
8917 robj
*o
= getDecodedObject(argv
[j
]);
8918 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
8919 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
8920 buf
= sdscatlen(buf
,"\r\n",2);
8926 static sds
catAppendOnlyExpireAtCommand(sds buf
, robj
*key
, robj
*seconds
) {
8931 /* Make sure we can use strtol */
8932 seconds
= getDecodedObject(seconds
);
8933 when
= time(NULL
)+strtol(seconds
->ptr
,NULL
,10);
8934 decrRefCount(seconds
);
8936 argv
[0] = createStringObject("EXPIREAT",8);
8938 argv
[2] = createObject(REDIS_STRING
,
8939 sdscatprintf(sdsempty(),"%ld",when
));
8940 buf
= catAppendOnlyGenericCommand(buf
, argc
, argv
);
8941 decrRefCount(argv
[0]);
8942 decrRefCount(argv
[2]);
8946 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
8947 sds buf
= sdsempty();
8950 /* The DB this command was targetting is not the same as the last command
8951 * we appendend. To issue a SELECT command is needed. */
8952 if (dictid
!= server
.appendseldb
) {
8955 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
8956 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
8957 (unsigned long)strlen(seldb
),seldb
);
8958 server
.appendseldb
= dictid
;
8961 if (cmd
->proc
== expireCommand
) {
8962 /* Translate EXPIRE into EXPIREAT */
8963 buf
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]);
8964 } else if (cmd
->proc
== setexCommand
) {
8965 /* Translate SETEX to SET and EXPIREAT */
8966 tmpargv
[0] = createStringObject("SET",3);
8967 tmpargv
[1] = argv
[1];
8968 tmpargv
[2] = argv
[3];
8969 buf
= catAppendOnlyGenericCommand(buf
,3,tmpargv
);
8970 decrRefCount(tmpargv
[0]);
8971 buf
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]);
8973 buf
= catAppendOnlyGenericCommand(buf
,argc
,argv
);
8976 /* Append to the AOF buffer. This will be flushed on disk just before
8977 * of re-entering the event loop, so before the client will get a
8978 * positive reply about the operation performed. */
8979 server
.aofbuf
= sdscatlen(server
.aofbuf
,buf
,sdslen(buf
));
8981 /* If a background append only file rewriting is in progress we want to
8982 * accumulate the differences between the child DB and the current one
8983 * in a buffer, so that when the child process will do its work we
8984 * can append the differences to the new append only file. */
8985 if (server
.bgrewritechildpid
!= -1)
8986 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
8991 /* In Redis commands are always executed in the context of a client, so in
8992 * order to load the append only file we need to create a fake client. */
8993 static struct redisClient
*createFakeClient(void) {
8994 struct redisClient
*c
= zmalloc(sizeof(*c
));
8998 c
->querybuf
= sdsempty();
9002 /* We set the fake client as a slave waiting for the synchronization
9003 * so that Redis will not try to send replies to this client. */
9004 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
9005 c
->reply
= listCreate();
9006 listSetFreeMethod(c
->reply
,decrRefCount
);
9007 listSetDupMethod(c
->reply
,dupClientReplyValue
);
9008 initClientMultiState(c
);
9012 static void freeFakeClient(struct redisClient
*c
) {
9013 sdsfree(c
->querybuf
);
9014 listRelease(c
->reply
);
9015 freeClientMultiState(c
);
9019 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
9020 * error (the append only file is zero-length) REDIS_ERR is returned. On
9021 * fatal error an error message is logged and the program exists. */
9022 int loadAppendOnlyFile(char *filename
) {
9023 struct redisClient
*fakeClient
;
9024 FILE *fp
= fopen(filename
,"r");
9025 struct redis_stat sb
;
9026 int appendonly
= server
.appendonly
;
9028 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
9032 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
9036 /* Temporarily disable AOF, to prevent EXEC from feeding a MULTI
9037 * to the same file we're about to read. */
9038 server
.appendonly
= 0;
9040 fakeClient
= createFakeClient();
9047 struct redisCommand
*cmd
;
9050 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
9056 if (buf
[0] != '*') goto fmterr
;
9058 argv
= zmalloc(sizeof(robj
*)*argc
);
9059 for (j
= 0; j
< argc
; j
++) {
9060 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
9061 if (buf
[0] != '$') goto fmterr
;
9062 len
= strtol(buf
+1,NULL
,10);
9063 argsds
= sdsnewlen(NULL
,len
);
9064 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
9065 argv
[j
] = createObject(REDIS_STRING
,argsds
);
9066 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
9069 /* Command lookup */
9070 cmd
= lookupCommand(argv
[0]->ptr
);
9072 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
9075 /* Try object encoding */
9076 if (cmd
->flags
& REDIS_CMD_BULK
)
9077 argv
[argc
-1] = tryObjectEncoding(argv
[argc
-1]);
9078 /* Run the command in the context of a fake client */
9079 fakeClient
->argc
= argc
;
9080 fakeClient
->argv
= argv
;
9081 cmd
->proc(fakeClient
);
9082 /* Discard the reply objects list from the fake client */
9083 while(listLength(fakeClient
->reply
))
9084 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
9085 /* Clean up, ready for the next command */
9086 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
9088 /* Handle swapping while loading big datasets when VM is on */
9090 if ((zmalloc_used_memory() - server
.vm_max_memory
) > 1024*1024*32)
9093 if (server
.vm_enabled
&& force_swapout
) {
9094 while (zmalloc_used_memory() > server
.vm_max_memory
) {
9095 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
9100 /* This point can only be reached when EOF is reached without errors.
9101 * If the client is in the middle of a MULTI/EXEC, log error and quit. */
9102 if (fakeClient
->flags
& REDIS_MULTI
) goto readerr
;
9105 freeFakeClient(fakeClient
);
9106 server
.appendonly
= appendonly
;
9111 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
9113 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
9117 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
9121 /* Write binary-safe string into a file in the bulkformat
9122 * $<count>\r\n<payload>\r\n */
9123 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
9127 clen
= 1+ll2string(cbuf
+1,sizeof(cbuf
)-1,len
);
9128 cbuf
[clen
++] = '\r';
9129 cbuf
[clen
++] = '\n';
9130 if (fwrite(cbuf
,clen
,1,fp
) == 0) return 0;
9131 if (len
> 0 && fwrite(s
,len
,1,fp
) == 0) return 0;
9132 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
9136 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
9137 static int fwriteBulkDouble(FILE *fp
, double d
) {
9138 char buf
[128], dbuf
[128];
9140 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
9141 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
9142 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
9143 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
9147 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
9148 static int fwriteBulkLongLong(FILE *fp
, long long l
) {
9149 char bbuf
[128], lbuf
[128];
9150 unsigned int blen
, llen
;
9151 llen
= ll2string(lbuf
,32,l
);
9152 blen
= snprintf(bbuf
,sizeof(bbuf
),"$%u\r\n%s\r\n",llen
,lbuf
);
9153 if (fwrite(bbuf
,blen
,1,fp
) == 0) return 0;
9157 /* Delegate writing an object to writing a bulk string or bulk long long. */
9158 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
9159 /* Avoid using getDecodedObject to help copy-on-write (we are often
9160 * in a child process when this function is called). */
9161 if (obj
->encoding
== REDIS_ENCODING_INT
) {
9162 return fwriteBulkLongLong(fp
,(long)obj
->ptr
);
9163 } else if (obj
->encoding
== REDIS_ENCODING_RAW
) {
9164 return fwriteBulkString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
9166 redisPanic("Unknown string encoding");
9170 /* Write a sequence of commands able to fully rebuild the dataset into
9171 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
9172 static int rewriteAppendOnlyFile(char *filename
) {
9173 dictIterator
*di
= NULL
;
9178 time_t now
= time(NULL
);
9180 /* Note that we have to use a different temp name here compared to the
9181 * one used by rewriteAppendOnlyFileBackground() function. */
9182 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
9183 fp
= fopen(tmpfile
,"w");
9185 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
9188 for (j
= 0; j
< server
.dbnum
; j
++) {
9189 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
9190 redisDb
*db
= server
.db
+j
;
9192 if (dictSize(d
) == 0) continue;
9193 di
= dictGetIterator(d
);
9199 /* SELECT the new DB */
9200 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
9201 if (fwriteBulkLongLong(fp
,j
) == 0) goto werr
;
9203 /* Iterate this DB writing every entry */
9204 while((de
= dictNext(di
)) != NULL
) {
9205 sds keystr
= dictGetEntryKey(de
);
9210 keystr
= dictGetEntryKey(de
);
9211 o
= dictGetEntryVal(de
);
9212 initStaticStringObject(key
,keystr
);
9213 /* If the value for this key is swapped, load a preview in memory.
9214 * We use a "swapped" flag to remember if we need to free the
9215 * value object instead to just increment the ref count anyway
9216 * in order to avoid copy-on-write of pages if we are forked() */
9217 if (!server
.vm_enabled
|| o
->storage
== REDIS_VM_MEMORY
||
9218 o
->storage
== REDIS_VM_SWAPPING
) {
9221 o
= vmPreviewObject(o
);
9224 expiretime
= getExpire(db
,&key
);
9226 /* Save the key and associated value */
9227 if (o
->type
== REDIS_STRING
) {
9228 /* Emit a SET command */
9229 char cmd
[]="*3\r\n$3\r\nSET\r\n";
9230 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
9232 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
9233 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
9234 } else if (o
->type
== REDIS_LIST
) {
9235 /* Emit the RPUSHes needed to rebuild the list */
9236 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
9237 if (o
->encoding
== REDIS_ENCODING_ZIPLIST
) {
9238 unsigned char *zl
= o
->ptr
;
9239 unsigned char *p
= ziplistIndex(zl
,0);
9240 unsigned char *vstr
;
9244 while(ziplistGet(p
,&vstr
,&vlen
,&vlong
)) {
9245 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
9246 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
9248 if (fwriteBulkString(fp
,(char*)vstr
,vlen
) == 0)
9251 if (fwriteBulkLongLong(fp
,vlong
) == 0)
9254 p
= ziplistNext(zl
,p
);
9256 } else if (o
->encoding
== REDIS_ENCODING_LIST
) {
9257 list
*list
= o
->ptr
;
9261 listRewind(list
,&li
);
9262 while((ln
= listNext(&li
))) {
9263 robj
*eleobj
= listNodeValue(ln
);
9265 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
9266 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
9267 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
9270 redisPanic("Unknown list encoding");
9272 } else if (o
->type
== REDIS_SET
) {
9273 /* Emit the SADDs needed to rebuild the set */
9275 dictIterator
*di
= dictGetIterator(set
);
9278 while((de
= dictNext(di
)) != NULL
) {
9279 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
9280 robj
*eleobj
= dictGetEntryKey(de
);
9282 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
9283 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
9284 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
9286 dictReleaseIterator(di
);
9287 } else if (o
->type
== REDIS_ZSET
) {
9288 /* Emit the ZADDs needed to rebuild the sorted set */
9290 dictIterator
*di
= dictGetIterator(zs
->dict
);
9293 while((de
= dictNext(di
)) != NULL
) {
9294 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
9295 robj
*eleobj
= dictGetEntryKey(de
);
9296 double *score
= dictGetEntryVal(de
);
9298 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
9299 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
9300 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
9301 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
9303 dictReleaseIterator(di
);
9304 } else if (o
->type
== REDIS_HASH
) {
9305 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
9307 /* Emit the HSETs needed to rebuild the hash */
9308 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
9309 unsigned char *p
= zipmapRewind(o
->ptr
);
9310 unsigned char *field
, *val
;
9311 unsigned int flen
, vlen
;
9313 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
9314 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
9315 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
9316 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
9318 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
9322 dictIterator
*di
= dictGetIterator(o
->ptr
);
9325 while((de
= dictNext(di
)) != NULL
) {
9326 robj
*field
= dictGetEntryKey(de
);
9327 robj
*val
= dictGetEntryVal(de
);
9329 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
9330 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
9331 if (fwriteBulkObject(fp
,field
) == -1) return -1;
9332 if (fwriteBulkObject(fp
,val
) == -1) return -1;
9334 dictReleaseIterator(di
);
9337 redisPanic("Unknown object type");
9339 /* Save the expire time */
9340 if (expiretime
!= -1) {
9341 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
9342 /* If this key is already expired skip it */
9343 if (expiretime
< now
) continue;
9344 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
9345 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
9346 if (fwriteBulkLongLong(fp
,expiretime
) == 0) goto werr
;
9348 if (swapped
) decrRefCount(o
);
9350 dictReleaseIterator(di
);
9353 /* Make sure data will not remain on the OS's output buffers */
9355 aof_fsync(fileno(fp
));
9358 /* Use RENAME to make sure the DB file is changed atomically only
9359 * if the generate DB file is ok. */
9360 if (rename(tmpfile
,filename
) == -1) {
9361 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
9365 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
9371 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
9372 if (di
) dictReleaseIterator(di
);
9376 /* This is how rewriting of the append only file in background works:
9378 * 1) The user calls BGREWRITEAOF
9379 * 2) Redis calls this function, that forks():
9380 * 2a) the child rewrite the append only file in a temp file.
9381 * 2b) the parent accumulates differences in server.bgrewritebuf.
9382 * 3) When the child finished '2a' exists.
9383 * 4) The parent will trap the exit code, if it's OK, will append the
9384 * data accumulated into server.bgrewritebuf into the temp file, and
9385 * finally will rename(2) the temp file in the actual file name.
9386 * The the new file is reopened as the new append only file. Profit!
9388 static int rewriteAppendOnlyFileBackground(void) {
9391 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
9392 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
9393 if ((childpid
= fork()) == 0) {
9397 if (server
.vm_enabled
) vmReopenSwapFile();
9399 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
9400 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
9407 if (childpid
== -1) {
9408 redisLog(REDIS_WARNING
,
9409 "Can't rewrite append only file in background: fork: %s",
9413 redisLog(REDIS_NOTICE
,
9414 "Background append only file rewriting started by pid %d",childpid
);
9415 server
.bgrewritechildpid
= childpid
;
9416 updateDictResizePolicy();
9417 /* We set appendseldb to -1 in order to force the next call to the
9418 * feedAppendOnlyFile() to issue a SELECT command, so the differences
9419 * accumulated by the parent into server.bgrewritebuf will start
9420 * with a SELECT statement and it will be safe to merge. */
9421 server
.appendseldb
= -1;
9424 return REDIS_OK
; /* unreached */
9427 static void bgrewriteaofCommand(redisClient
*c
) {
9428 if (server
.bgrewritechildpid
!= -1) {
9429 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
9432 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
9433 char *status
= "+Background append only file rewriting started\r\n";
9434 addReplySds(c
,sdsnew(status
));
9436 addReply(c
,shared
.err
);
9440 static void aofRemoveTempFile(pid_t childpid
) {
9443 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
9447 /* Virtual Memory is composed mainly of two subsystems:
9448 * - Blocking Virutal Memory
9449 * - Threaded Virtual Memory I/O
9450 * The two parts are not fully decoupled, but functions are split among two
9451 * different sections of the source code (delimited by comments) in order to
9452 * make more clear what functionality is about the blocking VM and what about
9453 * the threaded (not blocking) VM.
9457 * Redis VM is a blocking VM (one that blocks reading swapped values from
9458 * disk into memory when a value swapped out is needed in memory) that is made
9459 * unblocking by trying to examine the command argument vector in order to
9460 * load in background values that will likely be needed in order to exec
9461 * the command. The command is executed only once all the relevant keys
9462 * are loaded into memory.
9464 * This basically is almost as simple of a blocking VM, but almost as parallel
9465 * as a fully non-blocking VM.
9468 /* =================== Virtual Memory - Blocking Side ====================== */
9470 /* Create a VM pointer object. This kind of objects are used in place of
9471 * values in the key -> value hash table, for swapped out objects. */
9472 static vmpointer
*createVmPointer(int vtype
) {
9473 vmpointer
*vp
= zmalloc(sizeof(vmpointer
));
9475 vp
->type
= REDIS_VMPOINTER
;
9476 vp
->storage
= REDIS_VM_SWAPPED
;
9481 static void vmInit(void) {
9487 if (server
.vm_max_threads
!= 0)
9488 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
9490 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
9491 /* Try to open the old swap file, otherwise create it */
9492 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
9493 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
9495 if (server
.vm_fp
== NULL
) {
9496 redisLog(REDIS_WARNING
,
9497 "Can't open the swap file: %s. Exiting.",
9501 server
.vm_fd
= fileno(server
.vm_fp
);
9502 /* Lock the swap file for writing, this is useful in order to avoid
9503 * another instance to use the same swap file for a config error. */
9504 fl
.l_type
= F_WRLCK
;
9505 fl
.l_whence
= SEEK_SET
;
9506 fl
.l_start
= fl
.l_len
= 0;
9507 if (fcntl(server
.vm_fd
,F_SETLK
,&fl
) == -1) {
9508 redisLog(REDIS_WARNING
,
9509 "Can't lock the swap file at '%s': %s. Make sure it is not used by another Redis instance.", server
.vm_swap_file
, strerror(errno
));
9513 server
.vm_next_page
= 0;
9514 server
.vm_near_pages
= 0;
9515 server
.vm_stats_used_pages
= 0;
9516 server
.vm_stats_swapped_objects
= 0;
9517 server
.vm_stats_swapouts
= 0;
9518 server
.vm_stats_swapins
= 0;
9519 totsize
= server
.vm_pages
*server
.vm_page_size
;
9520 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
9521 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
9522 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
9526 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
9528 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
9529 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
9530 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
9531 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
9533 /* Initialize threaded I/O (used by Virtual Memory) */
9534 server
.io_newjobs
= listCreate();
9535 server
.io_processing
= listCreate();
9536 server
.io_processed
= listCreate();
9537 server
.io_ready_clients
= listCreate();
9538 pthread_mutex_init(&server
.io_mutex
,NULL
);
9539 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
9540 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
9541 server
.io_active_threads
= 0;
9542 if (pipe(pipefds
) == -1) {
9543 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
9547 server
.io_ready_pipe_read
= pipefds
[0];
9548 server
.io_ready_pipe_write
= pipefds
[1];
9549 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
9550 /* LZF requires a lot of stack */
9551 pthread_attr_init(&server
.io_threads_attr
);
9552 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
9553 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
9554 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
9555 /* Listen for events in the threaded I/O pipe */
9556 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
9557 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
9558 oom("creating file event");
9561 /* Mark the page as used */
9562 static void vmMarkPageUsed(off_t page
) {
9563 off_t byte
= page
/8;
9565 redisAssert(vmFreePage(page
) == 1);
9566 server
.vm_bitmap
[byte
] |= 1<<bit
;
9569 /* Mark N contiguous pages as used, with 'page' being the first. */
9570 static void vmMarkPagesUsed(off_t page
, off_t count
) {
9573 for (j
= 0; j
< count
; j
++)
9574 vmMarkPageUsed(page
+j
);
9575 server
.vm_stats_used_pages
+= count
;
9576 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
9577 (long long)count
, (long long)page
);
9580 /* Mark the page as free */
9581 static void vmMarkPageFree(off_t page
) {
9582 off_t byte
= page
/8;
9584 redisAssert(vmFreePage(page
) == 0);
9585 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
9588 /* Mark N contiguous pages as free, with 'page' being the first. */
9589 static void vmMarkPagesFree(off_t page
, off_t count
) {
9592 for (j
= 0; j
< count
; j
++)
9593 vmMarkPageFree(page
+j
);
9594 server
.vm_stats_used_pages
-= count
;
9595 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
9596 (long long)count
, (long long)page
);
9599 /* Test if the page is free */
9600 static int vmFreePage(off_t page
) {
9601 off_t byte
= page
/8;
9603 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
9606 /* Find N contiguous free pages storing the first page of the cluster in *first.
9607 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
9608 * REDIS_ERR is returned.
9610 * This function uses a simple algorithm: we try to allocate
9611 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
9612 * again from the start of the swap file searching for free spaces.
9614 * If it looks pretty clear that there are no free pages near our offset
9615 * we try to find less populated places doing a forward jump of
9616 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
9617 * without hurry, and then we jump again and so forth...
9619 * This function can be improved using a free list to avoid to guess
9620 * too much, since we could collect data about freed pages.
9622 * note: I implemented this function just after watching an episode of
9623 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
9625 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
9626 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
9628 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
9629 server
.vm_near_pages
= 0;
9630 server
.vm_next_page
= 0;
9632 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
9633 base
= server
.vm_next_page
;
9635 while(offset
< server
.vm_pages
) {
9636 off_t
this = base
+offset
;
9638 /* If we overflow, restart from page zero */
9639 if (this >= server
.vm_pages
) {
9640 this -= server
.vm_pages
;
9642 /* Just overflowed, what we found on tail is no longer
9643 * interesting, as it's no longer contiguous. */
9647 if (vmFreePage(this)) {
9648 /* This is a free page */
9650 /* Already got N free pages? Return to the caller, with success */
9652 *first
= this-(n
-1);
9653 server
.vm_next_page
= this+1;
9654 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
9658 /* The current one is not a free page */
9662 /* Fast-forward if the current page is not free and we already
9663 * searched enough near this place. */
9665 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
9666 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
9668 /* Note that even if we rewind after the jump, we are don't need
9669 * to make sure numfree is set to zero as we only jump *if* it
9670 * is set to zero. */
9672 /* Otherwise just check the next page */
9679 /* Write the specified object at the specified page of the swap file */
9680 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
9681 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
9682 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
9683 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9684 redisLog(REDIS_WARNING
,
9685 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
9689 rdbSaveObject(server
.vm_fp
,o
);
9690 fflush(server
.vm_fp
);
9691 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9695 /* Transfers the 'val' object to disk. Store all the information
9696 * a 'vmpointer' object containing all the information needed to load the
9697 * object back later is returned.
9699 * If we can't find enough contiguous empty pages to swap the object on disk
9700 * NULL is returned. */
9701 static vmpointer
*vmSwapObjectBlocking(robj
*val
) {
9702 off_t pages
= rdbSavedObjectPages(val
,NULL
);
9706 assert(val
->storage
== REDIS_VM_MEMORY
);
9707 assert(val
->refcount
== 1);
9708 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return NULL
;
9709 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return NULL
;
9711 vp
= createVmPointer(val
->type
);
9713 vp
->usedpages
= pages
;
9714 decrRefCount(val
); /* Deallocate the object from memory. */
9715 vmMarkPagesUsed(page
,pages
);
9716 redisLog(REDIS_DEBUG
,"VM: object %p swapped out at %lld (%lld pages)",
9718 (unsigned long long) page
, (unsigned long long) pages
);
9719 server
.vm_stats_swapped_objects
++;
9720 server
.vm_stats_swapouts
++;
9724 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
9727 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
9728 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
9729 redisLog(REDIS_WARNING
,
9730 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
9734 o
= rdbLoadObject(type
,server
.vm_fp
);
9736 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
9739 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9743 /* Load the specified object from swap to memory.
9744 * The newly allocated object is returned.
9746 * If preview is true the unserialized object is returned to the caller but
9747 * the pages are not marked as freed, nor the vp object is freed. */
9748 static robj
*vmGenericLoadObject(vmpointer
*vp
, int preview
) {
9751 redisAssert(vp
->type
== REDIS_VMPOINTER
&&
9752 (vp
->storage
== REDIS_VM_SWAPPED
|| vp
->storage
== REDIS_VM_LOADING
));
9753 val
= vmReadObjectFromSwap(vp
->page
,vp
->vtype
);
9755 redisLog(REDIS_DEBUG
, "VM: object %p loaded from disk", (void*)vp
);
9756 vmMarkPagesFree(vp
->page
,vp
->usedpages
);
9758 server
.vm_stats_swapped_objects
--;
9760 redisLog(REDIS_DEBUG
, "VM: object %p previewed from disk", (void*)vp
);
9762 server
.vm_stats_swapins
++;
9766 /* Plain object loading, from swap to memory.
9768 * 'o' is actually a redisVmPointer structure that will be freed by the call.
9769 * The return value is the loaded object. */
9770 static robj
*vmLoadObject(robj
*o
) {
9771 /* If we are loading the object in background, stop it, we
9772 * need to load this object synchronously ASAP. */
9773 if (o
->storage
== REDIS_VM_LOADING
)
9774 vmCancelThreadedIOJob(o
);
9775 return vmGenericLoadObject((vmpointer
*)o
,0);
9778 /* Just load the value on disk, without to modify the key.
9779 * This is useful when we want to perform some operation on the value
9780 * without to really bring it from swap to memory, like while saving the
9781 * dataset or rewriting the append only log. */
9782 static robj
*vmPreviewObject(robj
*o
) {
9783 return vmGenericLoadObject((vmpointer
*)o
,1);
9786 /* How a good candidate is this object for swapping?
9787 * The better candidate it is, the greater the returned value.
9789 * Currently we try to perform a fast estimation of the object size in
9790 * memory, and combine it with aging informations.
9792 * Basically swappability = idle-time * log(estimated size)
9794 * Bigger objects are preferred over smaller objects, but not
9795 * proportionally, this is why we use the logarithm. This algorithm is
9796 * just a first try and will probably be tuned later. */
9797 static double computeObjectSwappability(robj
*o
) {
9798 /* actual age can be >= minage, but not < minage. As we use wrapping
9799 * 21 bit clocks with minutes resolution for the LRU. */
9800 time_t minage
= abs(server
.lruclock
- o
->lru
);
9804 struct dictEntry
*de
;
9807 if (minage
<= 0) return 0;
9810 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
9813 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
9818 listNode
*ln
= listFirst(l
);
9820 asize
= sizeof(list
);
9822 robj
*ele
= ln
->value
;
9825 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9826 (sizeof(*o
)+sdslen(ele
->ptr
)) : sizeof(*o
);
9827 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
9832 z
= (o
->type
== REDIS_ZSET
);
9833 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
9835 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
9836 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
9841 de
= dictGetRandomKey(d
);
9842 ele
= dictGetEntryKey(de
);
9843 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9844 (sizeof(*o
)+sdslen(ele
->ptr
)) : sizeof(*o
);
9845 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
9846 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
9850 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
9851 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
9852 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
9853 unsigned int klen
, vlen
;
9854 unsigned char *key
, *val
;
9856 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
9860 asize
= len
*(klen
+vlen
+3);
9861 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
9863 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
9868 de
= dictGetRandomKey(d
);
9869 ele
= dictGetEntryKey(de
);
9870 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9871 (sizeof(*o
)+sdslen(ele
->ptr
)) : sizeof(*o
);
9872 ele
= dictGetEntryVal(de
);
9873 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9874 (sizeof(*o
)+sdslen(ele
->ptr
)) : sizeof(*o
);
9875 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
9880 return (double)minage
*log(1+asize
);
9883 /* Try to swap an object that's a good candidate for swapping.
9884 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
9885 * to swap any object at all.
9887 * If 'usethreaded' is true, Redis will try to swap the object in background
9888 * using I/O threads. */
9889 static int vmSwapOneObject(int usethreads
) {
9891 struct dictEntry
*best
= NULL
;
9892 double best_swappability
= 0;
9893 redisDb
*best_db
= NULL
;
9897 for (j
= 0; j
< server
.dbnum
; j
++) {
9898 redisDb
*db
= server
.db
+j
;
9899 /* Why maxtries is set to 100?
9900 * Because this way (usually) we'll find 1 object even if just 1% - 2%
9901 * are swappable objects */
9904 if (dictSize(db
->dict
) == 0) continue;
9905 for (i
= 0; i
< 5; i
++) {
9907 double swappability
;
9909 if (maxtries
) maxtries
--;
9910 de
= dictGetRandomKey(db
->dict
);
9911 val
= dictGetEntryVal(de
);
9912 /* Only swap objects that are currently in memory.
9914 * Also don't swap shared objects: not a good idea in general and
9915 * we need to ensure that the main thread does not touch the
9916 * object while the I/O thread is using it, but we can't
9917 * control other keys without adding additional mutex. */
9918 if (val
->storage
!= REDIS_VM_MEMORY
|| val
->refcount
!= 1) {
9919 if (maxtries
) i
--; /* don't count this try */
9922 swappability
= computeObjectSwappability(val
);
9923 if (!best
|| swappability
> best_swappability
) {
9925 best_swappability
= swappability
;
9930 if (best
== NULL
) return REDIS_ERR
;
9931 key
= dictGetEntryKey(best
);
9932 val
= dictGetEntryVal(best
);
9934 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
9935 key
, best_swappability
);
9939 robj
*keyobj
= createStringObject(key
,sdslen(key
));
9940 vmSwapObjectThreaded(keyobj
,val
,best_db
);
9941 decrRefCount(keyobj
);
9946 if ((vp
= vmSwapObjectBlocking(val
)) != NULL
) {
9947 dictGetEntryVal(best
) = vp
;
9955 static int vmSwapOneObjectBlocking() {
9956 return vmSwapOneObject(0);
9959 static int vmSwapOneObjectThreaded() {
9960 return vmSwapOneObject(1);
9963 /* Return true if it's safe to swap out objects in a given moment.
9964 * Basically we don't want to swap objects out while there is a BGSAVE
9965 * or a BGAEOREWRITE running in backgroud. */
9966 static int vmCanSwapOut(void) {
9967 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
9970 /* =================== Virtual Memory - Threaded I/O ======================= */
9972 static void freeIOJob(iojob
*j
) {
9973 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
9974 j
->type
== REDIS_IOJOB_DO_SWAP
||
9975 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
9977 /* we fix the storage type, otherwise decrRefCount() will try to
9978 * kill the I/O thread Job (that does no longer exists). */
9979 if (j
->val
->storage
== REDIS_VM_SWAPPING
)
9980 j
->val
->storage
= REDIS_VM_MEMORY
;
9981 decrRefCount(j
->val
);
9983 decrRefCount(j
->key
);
9987 /* Every time a thread finished a Job, it writes a byte into the write side
9988 * of an unix pipe in order to "awake" the main thread, and this function
9990 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
9994 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
9996 REDIS_NOTUSED(mask
);
9997 REDIS_NOTUSED(privdata
);
9999 /* For every byte we read in the read side of the pipe, there is one
10000 * I/O job completed to process. */
10001 while((retval
= read(fd
,buf
,1)) == 1) {
10004 struct dictEntry
*de
;
10006 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
10008 /* Get the processed element (the oldest one) */
10010 assert(listLength(server
.io_processed
) != 0);
10011 if (toprocess
== -1) {
10012 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
10013 if (toprocess
<= 0) toprocess
= 1;
10015 ln
= listFirst(server
.io_processed
);
10017 listDelNode(server
.io_processed
,ln
);
10018 unlockThreadedIO();
10019 /* If this job is marked as canceled, just ignore it */
10024 /* Post process it in the main thread, as there are things we
10025 * can do just here to avoid race conditions and/or invasive locks */
10026 redisLog(REDIS_DEBUG
,"COMPLETED Job type: %d, ID %p, key: %s", j
->type
, (void*)j
->id
, (unsigned char*)j
->key
->ptr
);
10027 de
= dictFind(j
->db
->dict
,j
->key
->ptr
);
10028 redisAssert(de
!= NULL
);
10029 if (j
->type
== REDIS_IOJOB_LOAD
) {
10031 vmpointer
*vp
= dictGetEntryVal(de
);
10033 /* Key loaded, bring it at home */
10034 vmMarkPagesFree(vp
->page
,vp
->usedpages
);
10035 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
10036 (unsigned char*) j
->key
->ptr
);
10037 server
.vm_stats_swapped_objects
--;
10038 server
.vm_stats_swapins
++;
10039 dictGetEntryVal(de
) = j
->val
;
10040 incrRefCount(j
->val
);
10042 /* Handle clients waiting for this key to be loaded. */
10043 handleClientsBlockedOnSwappedKey(db
,j
->key
);
10046 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
10047 /* Now we know the amount of pages required to swap this object.
10048 * Let's find some space for it, and queue this task again
10049 * rebranded as REDIS_IOJOB_DO_SWAP. */
10050 if (!vmCanSwapOut() ||
10051 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
10053 /* Ooops... no space or we can't swap as there is
10054 * a fork()ed Redis trying to save stuff on disk. */
10055 j
->val
->storage
= REDIS_VM_MEMORY
; /* undo operation */
10058 /* Note that we need to mark this pages as used now,
10059 * if the job will be canceled, we'll mark them as freed
10061 vmMarkPagesUsed(j
->page
,j
->pages
);
10062 j
->type
= REDIS_IOJOB_DO_SWAP
;
10065 unlockThreadedIO();
10067 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
10070 /* Key swapped. We can finally free some memory. */
10071 if (j
->val
->storage
!= REDIS_VM_SWAPPING
) {
10072 vmpointer
*vp
= (vmpointer
*) j
->id
;
10073 printf("storage: %d\n",vp
->storage
);
10074 printf("key->name: %s\n",(char*)j
->key
->ptr
);
10075 printf("val: %p\n",(void*)j
->val
);
10076 printf("val->type: %d\n",j
->val
->type
);
10077 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
10079 redisAssert(j
->val
->storage
== REDIS_VM_SWAPPING
);
10080 vp
= createVmPointer(j
->val
->type
);
10081 vp
->page
= j
->page
;
10082 vp
->usedpages
= j
->pages
;
10083 dictGetEntryVal(de
) = vp
;
10084 /* Fix the storage otherwise decrRefCount will attempt to
10085 * remove the associated I/O job */
10086 j
->val
->storage
= REDIS_VM_MEMORY
;
10087 decrRefCount(j
->val
);
10088 redisLog(REDIS_DEBUG
,
10089 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
10090 (unsigned char*) j
->key
->ptr
,
10091 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
10092 server
.vm_stats_swapped_objects
++;
10093 server
.vm_stats_swapouts
++;
10095 /* Put a few more swap requests in queue if we are still
10097 if (trytoswap
&& vmCanSwapOut() &&
10098 zmalloc_used_memory() > server
.vm_max_memory
)
10103 more
= listLength(server
.io_newjobs
) <
10104 (unsigned) server
.vm_max_threads
;
10105 unlockThreadedIO();
10106 /* Don't waste CPU time if swappable objects are rare. */
10107 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
10115 if (processed
== toprocess
) return;
10117 if (retval
< 0 && errno
!= EAGAIN
) {
10118 redisLog(REDIS_WARNING
,
10119 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
10124 static void lockThreadedIO(void) {
10125 pthread_mutex_lock(&server
.io_mutex
);
10128 static void unlockThreadedIO(void) {
10129 pthread_mutex_unlock(&server
.io_mutex
);
10132 /* Remove the specified object from the threaded I/O queue if still not
10133 * processed, otherwise make sure to flag it as canceled. */
10134 static void vmCancelThreadedIOJob(robj
*o
) {
10136 server
.io_newjobs
, /* 0 */
10137 server
.io_processing
, /* 1 */
10138 server
.io_processed
/* 2 */
10142 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
10145 /* Search for a matching object in one of the queues */
10146 for (i
= 0; i
< 3; i
++) {
10150 listRewind(lists
[i
],&li
);
10151 while ((ln
= listNext(&li
)) != NULL
) {
10152 iojob
*job
= ln
->value
;
10154 if (job
->canceled
) continue; /* Skip this, already canceled. */
10155 if (job
->id
== o
) {
10156 redisLog(REDIS_DEBUG
,"*** CANCELED %p (key %s) (type %d) (LIST ID %d)\n",
10157 (void*)job
, (char*)job
->key
->ptr
, job
->type
, i
);
10158 /* Mark the pages as free since the swap didn't happened
10159 * or happened but is now discarded. */
10160 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
10161 vmMarkPagesFree(job
->page
,job
->pages
);
10162 /* Cancel the job. It depends on the list the job is
10165 case 0: /* io_newjobs */
10166 /* If the job was yet not processed the best thing to do
10167 * is to remove it from the queue at all */
10169 listDelNode(lists
[i
],ln
);
10171 case 1: /* io_processing */
10172 /* Oh Shi- the thread is messing with the Job:
10174 * Probably it's accessing the object if this is a
10175 * PREPARE_SWAP or DO_SWAP job.
10176 * If it's a LOAD job it may be reading from disk and
10177 * if we don't wait for the job to terminate before to
10178 * cancel it, maybe in a few microseconds data can be
10179 * corrupted in this pages. So the short story is:
10181 * Better to wait for the job to move into the
10182 * next queue (processed)... */
10184 /* We try again and again until the job is completed. */
10185 unlockThreadedIO();
10186 /* But let's wait some time for the I/O thread
10187 * to finish with this job. After all this condition
10188 * should be very rare. */
10191 case 2: /* io_processed */
10192 /* The job was already processed, that's easy...
10193 * just mark it as canceled so that we'll ignore it
10194 * when processing completed jobs. */
10198 /* Finally we have to adjust the storage type of the object
10199 * in order to "UNDO" the operaiton. */
10200 if (o
->storage
== REDIS_VM_LOADING
)
10201 o
->storage
= REDIS_VM_SWAPPED
;
10202 else if (o
->storage
== REDIS_VM_SWAPPING
)
10203 o
->storage
= REDIS_VM_MEMORY
;
10204 unlockThreadedIO();
10205 redisLog(REDIS_DEBUG
,"*** DONE");
10210 unlockThreadedIO();
10211 printf("Not found: %p\n", (void*)o
);
10212 redisAssert(1 != 1); /* We should never reach this */
10215 static void *IOThreadEntryPoint(void *arg
) {
10218 REDIS_NOTUSED(arg
);
10220 pthread_detach(pthread_self());
10222 /* Get a new job to process */
10224 if (listLength(server
.io_newjobs
) == 0) {
10225 /* No new jobs in queue, exit. */
10226 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
10227 (long) pthread_self());
10228 server
.io_active_threads
--;
10229 unlockThreadedIO();
10232 ln
= listFirst(server
.io_newjobs
);
10234 listDelNode(server
.io_newjobs
,ln
);
10235 /* Add the job in the processing queue */
10236 j
->thread
= pthread_self();
10237 listAddNodeTail(server
.io_processing
,j
);
10238 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
10239 unlockThreadedIO();
10240 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
10241 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
10243 /* Process the Job */
10244 if (j
->type
== REDIS_IOJOB_LOAD
) {
10245 vmpointer
*vp
= (vmpointer
*)j
->id
;
10246 j
->val
= vmReadObjectFromSwap(j
->page
,vp
->vtype
);
10247 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
10248 FILE *fp
= fopen("/dev/null","w+");
10249 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
10251 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
10252 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
10256 /* Done: insert the job into the processed queue */
10257 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
10258 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
10260 listDelNode(server
.io_processing
,ln
);
10261 listAddNodeTail(server
.io_processed
,j
);
10262 unlockThreadedIO();
10264 /* Signal the main thread there is new stuff to process */
10265 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
10267 return NULL
; /* never reached */
10270 static void spawnIOThread(void) {
10272 sigset_t mask
, omask
;
10275 sigemptyset(&mask
);
10276 sigaddset(&mask
,SIGCHLD
);
10277 sigaddset(&mask
,SIGHUP
);
10278 sigaddset(&mask
,SIGPIPE
);
10279 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
10280 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
10281 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
10285 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
10286 server
.io_active_threads
++;
10289 /* We need to wait for the last thread to exit before we are able to
10290 * fork() in order to BGSAVE or BGREWRITEAOF. */
10291 static void waitEmptyIOJobsQueue(void) {
10293 int io_processed_len
;
10296 if (listLength(server
.io_newjobs
) == 0 &&
10297 listLength(server
.io_processing
) == 0 &&
10298 server
.io_active_threads
== 0)
10300 unlockThreadedIO();
10303 /* While waiting for empty jobs queue condition we post-process some
10304 * finshed job, as I/O threads may be hanging trying to write against
10305 * the io_ready_pipe_write FD but there are so much pending jobs that
10306 * it's blocking. */
10307 io_processed_len
= listLength(server
.io_processed
);
10308 unlockThreadedIO();
10309 if (io_processed_len
) {
10310 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
10311 usleep(1000); /* 1 millisecond */
10313 usleep(10000); /* 10 milliseconds */
10318 static void vmReopenSwapFile(void) {
10319 /* Note: we don't close the old one as we are in the child process
10320 * and don't want to mess at all with the original file object. */
10321 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
10322 if (server
.vm_fp
== NULL
) {
10323 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
10324 server
.vm_swap_file
);
10327 server
.vm_fd
= fileno(server
.vm_fp
);
10330 /* This function must be called while with threaded IO locked */
10331 static void queueIOJob(iojob
*j
) {
10332 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
10333 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
10334 listAddNodeTail(server
.io_newjobs
,j
);
10335 if (server
.io_active_threads
< server
.vm_max_threads
)
10339 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
10342 j
= zmalloc(sizeof(*j
));
10343 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
10347 j
->id
= j
->val
= val
;
10350 j
->thread
= (pthread_t
) -1;
10351 val
->storage
= REDIS_VM_SWAPPING
;
10355 unlockThreadedIO();
10359 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
10361 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
10362 * If there is not already a job loading the key, it is craeted.
10363 * The key is added to the io_keys list in the client structure, and also
10364 * in the hash table mapping swapped keys to waiting clients, that is,
10365 * server.io_waited_keys. */
10366 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
10367 struct dictEntry
*de
;
10371 /* If the key does not exist or is already in RAM we don't need to
10372 * block the client at all. */
10373 de
= dictFind(c
->db
->dict
,key
->ptr
);
10374 if (de
== NULL
) return 0;
10375 o
= dictGetEntryVal(de
);
10376 if (o
->storage
== REDIS_VM_MEMORY
) {
10378 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
10379 /* We were swapping the key, undo it! */
10380 vmCancelThreadedIOJob(o
);
10384 /* OK: the key is either swapped, or being loaded just now. */
10386 /* Add the key to the list of keys this client is waiting for.
10387 * This maps clients to keys they are waiting for. */
10388 listAddNodeTail(c
->io_keys
,key
);
10391 /* Add the client to the swapped keys => clients waiting map. */
10392 de
= dictFind(c
->db
->io_keys
,key
);
10396 /* For every key we take a list of clients blocked for it */
10398 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
10400 assert(retval
== DICT_OK
);
10402 l
= dictGetEntryVal(de
);
10404 listAddNodeTail(l
,c
);
10406 /* Are we already loading the key from disk? If not create a job */
10407 if (o
->storage
== REDIS_VM_SWAPPED
) {
10409 vmpointer
*vp
= (vmpointer
*)o
;
10411 o
->storage
= REDIS_VM_LOADING
;
10412 j
= zmalloc(sizeof(*j
));
10413 j
->type
= REDIS_IOJOB_LOAD
;
10418 j
->page
= vp
->page
;
10421 j
->thread
= (pthread_t
) -1;
10424 unlockThreadedIO();
10429 /* Preload keys for any command with first, last and step values for
10430 * the command keys prototype, as defined in the command table. */
10431 static void waitForMultipleSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
10433 if (cmd
->vm_firstkey
== 0) return;
10434 last
= cmd
->vm_lastkey
;
10435 if (last
< 0) last
= argc
+last
;
10436 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
) {
10437 redisAssert(j
< argc
);
10438 waitForSwappedKey(c
,argv
[j
]);
10442 /* Preload keys needed for the ZUNIONSTORE and ZINTERSTORE commands.
10443 * Note that the number of keys to preload is user-defined, so we need to
10444 * apply a sanity check against argc. */
10445 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
10447 REDIS_NOTUSED(cmd
);
10449 num
= atoi(argv
[2]->ptr
);
10450 if (num
> (argc
-3)) return;
10451 for (i
= 0; i
< num
; i
++) {
10452 waitForSwappedKey(c
,argv
[3+i
]);
10456 /* Preload keys needed to execute the entire MULTI/EXEC block.
10458 * This function is called by blockClientOnSwappedKeys when EXEC is issued,
10459 * and will block the client when any command requires a swapped out value. */
10460 static void execBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
10462 struct redisCommand
*mcmd
;
10464 REDIS_NOTUSED(cmd
);
10465 REDIS_NOTUSED(argc
);
10466 REDIS_NOTUSED(argv
);
10468 if (!(c
->flags
& REDIS_MULTI
)) return;
10469 for (i
= 0; i
< c
->mstate
.count
; i
++) {
10470 mcmd
= c
->mstate
.commands
[i
].cmd
;
10471 margc
= c
->mstate
.commands
[i
].argc
;
10472 margv
= c
->mstate
.commands
[i
].argv
;
10474 if (mcmd
->vm_preload_proc
!= NULL
) {
10475 mcmd
->vm_preload_proc(c
,mcmd
,margc
,margv
);
10477 waitForMultipleSwappedKeys(c
,mcmd
,margc
,margv
);
10482 /* Is this client attempting to run a command against swapped keys?
10483 * If so, block it ASAP, load the keys in background, then resume it.
10485 * The important idea about this function is that it can fail! If keys will
10486 * still be swapped when the client is resumed, this key lookups will
10487 * just block loading keys from disk. In practical terms this should only
10488 * happen with SORT BY command or if there is a bug in this function.
10490 * Return 1 if the client is marked as blocked, 0 if the client can
10491 * continue as the keys it is going to access appear to be in memory. */
10492 static int blockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
) {
10493 if (cmd
->vm_preload_proc
!= NULL
) {
10494 cmd
->vm_preload_proc(c
,cmd
,c
->argc
,c
->argv
);
10496 waitForMultipleSwappedKeys(c
,cmd
,c
->argc
,c
->argv
);
10499 /* If the client was blocked for at least one key, mark it as blocked. */
10500 if (listLength(c
->io_keys
)) {
10501 c
->flags
|= REDIS_IO_WAIT
;
10502 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
10503 server
.vm_blocked_clients
++;
10510 /* Remove the 'key' from the list of blocked keys for a given client.
10512 * The function returns 1 when there are no longer blocking keys after
10513 * the current one was removed (and the client can be unblocked). */
10514 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
10518 struct dictEntry
*de
;
10520 /* Remove the key from the list of keys this client is waiting for. */
10521 listRewind(c
->io_keys
,&li
);
10522 while ((ln
= listNext(&li
)) != NULL
) {
10523 if (equalStringObjects(ln
->value
,key
)) {
10524 listDelNode(c
->io_keys
,ln
);
10528 assert(ln
!= NULL
);
10530 /* Remove the client form the key => waiting clients map. */
10531 de
= dictFind(c
->db
->io_keys
,key
);
10532 assert(de
!= NULL
);
10533 l
= dictGetEntryVal(de
);
10534 ln
= listSearchKey(l
,c
);
10535 assert(ln
!= NULL
);
10537 if (listLength(l
) == 0)
10538 dictDelete(c
->db
->io_keys
,key
);
10540 return listLength(c
->io_keys
) == 0;
10543 /* Every time we now a key was loaded back in memory, we handle clients
10544 * waiting for this key if any. */
10545 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
10546 struct dictEntry
*de
;
10551 de
= dictFind(db
->io_keys
,key
);
10554 l
= dictGetEntryVal(de
);
10555 len
= listLength(l
);
10556 /* Note: we can't use something like while(listLength(l)) as the list
10557 * can be freed by the calling function when we remove the last element. */
10560 redisClient
*c
= ln
->value
;
10562 if (dontWaitForSwappedKey(c
,key
)) {
10563 /* Put the client in the list of clients ready to go as we
10564 * loaded all the keys about it. */
10565 listAddNodeTail(server
.io_ready_clients
,c
);
10570 /* =========================== Remote Configuration ========================= */
10572 static void configSetCommand(redisClient
*c
) {
10573 robj
*o
= getDecodedObject(c
->argv
[3]);
10576 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
10577 zfree(server
.dbfilename
);
10578 server
.dbfilename
= zstrdup(o
->ptr
);
10579 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
10580 zfree(server
.requirepass
);
10581 server
.requirepass
= zstrdup(o
->ptr
);
10582 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
10583 zfree(server
.masterauth
);
10584 server
.masterauth
= zstrdup(o
->ptr
);
10585 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
10586 if (getLongLongFromObject(o
,&ll
) == REDIS_ERR
||
10587 ll
< 0) goto badfmt
;
10588 server
.maxmemory
= ll
;
10589 } else if (!strcasecmp(c
->argv
[2]->ptr
,"timeout")) {
10590 if (getLongLongFromObject(o
,&ll
) == REDIS_ERR
||
10591 ll
< 0 || ll
> LONG_MAX
) goto badfmt
;
10592 server
.maxidletime
= ll
;
10593 } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendfsync")) {
10594 if (!strcasecmp(o
->ptr
,"no")) {
10595 server
.appendfsync
= APPENDFSYNC_NO
;
10596 } else if (!strcasecmp(o
->ptr
,"everysec")) {
10597 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
10598 } else if (!strcasecmp(o
->ptr
,"always")) {
10599 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
10603 } else if (!strcasecmp(c
->argv
[2]->ptr
,"no-appendfsync-on-rewrite")) {
10604 int yn
= yesnotoi(o
->ptr
);
10606 if (yn
== -1) goto badfmt
;
10607 server
.no_appendfsync_on_rewrite
= yn
;
10608 } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendonly")) {
10609 int old
= server
.appendonly
;
10610 int new = yesnotoi(o
->ptr
);
10612 if (new == -1) goto badfmt
;
10617 if (startAppendOnly() == REDIS_ERR
) {
10618 addReplySds(c
,sdscatprintf(sdsempty(),
10619 "-ERR Unable to turn on AOF. Check server logs.\r\n"));
10625 } else if (!strcasecmp(c
->argv
[2]->ptr
,"save")) {
10627 sds
*v
= sdssplitlen(o
->ptr
,sdslen(o
->ptr
)," ",1,&vlen
);
10629 /* Perform sanity check before setting the new config:
10630 * - Even number of args
10631 * - Seconds >= 1, changes >= 0 */
10633 sdsfreesplitres(v
,vlen
);
10636 for (j
= 0; j
< vlen
; j
++) {
10640 val
= strtoll(v
[j
], &eptr
, 10);
10641 if (eptr
[0] != '\0' ||
10642 ((j
& 1) == 0 && val
< 1) ||
10643 ((j
& 1) == 1 && val
< 0)) {
10644 sdsfreesplitres(v
,vlen
);
10648 /* Finally set the new config */
10649 resetServerSaveParams();
10650 for (j
= 0; j
< vlen
; j
+= 2) {
10654 seconds
= strtoll(v
[j
],NULL
,10);
10655 changes
= strtoll(v
[j
+1],NULL
,10);
10656 appendServerSaveParams(seconds
, changes
);
10658 sdsfreesplitres(v
,vlen
);
10660 addReplySds(c
,sdscatprintf(sdsempty(),
10661 "-ERR not supported CONFIG parameter %s\r\n",
10662 (char*)c
->argv
[2]->ptr
));
10667 addReply(c
,shared
.ok
);
10670 badfmt
: /* Bad format errors */
10671 addReplySds(c
,sdscatprintf(sdsempty(),
10672 "-ERR invalid argument '%s' for CONFIG SET '%s'\r\n",
10674 (char*)c
->argv
[2]->ptr
));
10678 static void configGetCommand(redisClient
*c
) {
10679 robj
*o
= getDecodedObject(c
->argv
[2]);
10680 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
10681 char *pattern
= o
->ptr
;
10684 addReply(c
,lenobj
);
10685 decrRefCount(lenobj
);
10687 if (stringmatch(pattern
,"dbfilename",0)) {
10688 addReplyBulkCString(c
,"dbfilename");
10689 addReplyBulkCString(c
,server
.dbfilename
);
10692 if (stringmatch(pattern
,"requirepass",0)) {
10693 addReplyBulkCString(c
,"requirepass");
10694 addReplyBulkCString(c
,server
.requirepass
);
10697 if (stringmatch(pattern
,"masterauth",0)) {
10698 addReplyBulkCString(c
,"masterauth");
10699 addReplyBulkCString(c
,server
.masterauth
);
10702 if (stringmatch(pattern
,"maxmemory",0)) {
10705 ll2string(buf
,128,server
.maxmemory
);
10706 addReplyBulkCString(c
,"maxmemory");
10707 addReplyBulkCString(c
,buf
);
10710 if (stringmatch(pattern
,"timeout",0)) {
10713 ll2string(buf
,128,server
.maxidletime
);
10714 addReplyBulkCString(c
,"timeout");
10715 addReplyBulkCString(c
,buf
);
10718 if (stringmatch(pattern
,"appendonly",0)) {
10719 addReplyBulkCString(c
,"appendonly");
10720 addReplyBulkCString(c
,server
.appendonly
? "yes" : "no");
10723 if (stringmatch(pattern
,"no-appendfsync-on-rewrite",0)) {
10724 addReplyBulkCString(c
,"no-appendfsync-on-rewrite");
10725 addReplyBulkCString(c
,server
.no_appendfsync_on_rewrite
? "yes" : "no");
10728 if (stringmatch(pattern
,"appendfsync",0)) {
10731 switch(server
.appendfsync
) {
10732 case APPENDFSYNC_NO
: policy
= "no"; break;
10733 case APPENDFSYNC_EVERYSEC
: policy
= "everysec"; break;
10734 case APPENDFSYNC_ALWAYS
: policy
= "always"; break;
10735 default: policy
= "unknown"; break; /* too harmless to panic */
10737 addReplyBulkCString(c
,"appendfsync");
10738 addReplyBulkCString(c
,policy
);
10741 if (stringmatch(pattern
,"save",0)) {
10742 sds buf
= sdsempty();
10745 for (j
= 0; j
< server
.saveparamslen
; j
++) {
10746 buf
= sdscatprintf(buf
,"%ld %d",
10747 server
.saveparams
[j
].seconds
,
10748 server
.saveparams
[j
].changes
);
10749 if (j
!= server
.saveparamslen
-1)
10750 buf
= sdscatlen(buf
," ",1);
10752 addReplyBulkCString(c
,"save");
10753 addReplyBulkCString(c
,buf
);
10758 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
10761 static void configCommand(redisClient
*c
) {
10762 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
10763 if (c
->argc
!= 4) goto badarity
;
10764 configSetCommand(c
);
10765 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
10766 if (c
->argc
!= 3) goto badarity
;
10767 configGetCommand(c
);
10768 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
10769 if (c
->argc
!= 2) goto badarity
;
10770 server
.stat_numcommands
= 0;
10771 server
.stat_numconnections
= 0;
10772 server
.stat_expiredkeys
= 0;
10773 server
.stat_starttime
= time(NULL
);
10774 addReply(c
,shared
.ok
);
10776 addReplySds(c
,sdscatprintf(sdsempty(),
10777 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
10782 addReplySds(c
,sdscatprintf(sdsempty(),
10783 "-ERR Wrong number of arguments for CONFIG %s\r\n",
10784 (char*) c
->argv
[1]->ptr
));
10787 /* =========================== Pubsub implementation ======================== */
10789 static void freePubsubPattern(void *p
) {
10790 pubsubPattern
*pat
= p
;
10792 decrRefCount(pat
->pattern
);
10796 static int listMatchPubsubPattern(void *a
, void *b
) {
10797 pubsubPattern
*pa
= a
, *pb
= b
;
10799 return (pa
->client
== pb
->client
) &&
10800 (equalStringObjects(pa
->pattern
,pb
->pattern
));
10803 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
10804 * 0 if the client was already subscribed to that channel. */
10805 static int pubsubSubscribeChannel(redisClient
*c
, robj
*channel
) {
10806 struct dictEntry
*de
;
10807 list
*clients
= NULL
;
10810 /* Add the channel to the client -> channels hash table */
10811 if (dictAdd(c
->pubsub_channels
,channel
,NULL
) == DICT_OK
) {
10813 incrRefCount(channel
);
10814 /* Add the client to the channel -> list of clients hash table */
10815 de
= dictFind(server
.pubsub_channels
,channel
);
10817 clients
= listCreate();
10818 dictAdd(server
.pubsub_channels
,channel
,clients
);
10819 incrRefCount(channel
);
10821 clients
= dictGetEntryVal(de
);
10823 listAddNodeTail(clients
,c
);
10825 /* Notify the client */
10826 addReply(c
,shared
.mbulk3
);
10827 addReply(c
,shared
.subscribebulk
);
10828 addReplyBulk(c
,channel
);
10829 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
10833 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
10834 * 0 if the client was not subscribed to the specified channel. */
10835 static int pubsubUnsubscribeChannel(redisClient
*c
, robj
*channel
, int notify
) {
10836 struct dictEntry
*de
;
10841 /* Remove the channel from the client -> channels hash table */
10842 incrRefCount(channel
); /* channel may be just a pointer to the same object
10843 we have in the hash tables. Protect it... */
10844 if (dictDelete(c
->pubsub_channels
,channel
) == DICT_OK
) {
10846 /* Remove the client from the channel -> clients list hash table */
10847 de
= dictFind(server
.pubsub_channels
,channel
);
10848 assert(de
!= NULL
);
10849 clients
= dictGetEntryVal(de
);
10850 ln
= listSearchKey(clients
,c
);
10851 assert(ln
!= NULL
);
10852 listDelNode(clients
,ln
);
10853 if (listLength(clients
) == 0) {
10854 /* Free the list and associated hash entry at all if this was
10855 * the latest client, so that it will be possible to abuse
10856 * Redis PUBSUB creating millions of channels. */
10857 dictDelete(server
.pubsub_channels
,channel
);
10860 /* Notify the client */
10862 addReply(c
,shared
.mbulk3
);
10863 addReply(c
,shared
.unsubscribebulk
);
10864 addReplyBulk(c
,channel
);
10865 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+
10866 listLength(c
->pubsub_patterns
));
10869 decrRefCount(channel
); /* it is finally safe to release it */
10873 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */
10874 static int pubsubSubscribePattern(redisClient
*c
, robj
*pattern
) {
10877 if (listSearchKey(c
->pubsub_patterns
,pattern
) == NULL
) {
10879 pubsubPattern
*pat
;
10880 listAddNodeTail(c
->pubsub_patterns
,pattern
);
10881 incrRefCount(pattern
);
10882 pat
= zmalloc(sizeof(*pat
));
10883 pat
->pattern
= getDecodedObject(pattern
);
10885 listAddNodeTail(server
.pubsub_patterns
,pat
);
10887 /* Notify the client */
10888 addReply(c
,shared
.mbulk3
);
10889 addReply(c
,shared
.psubscribebulk
);
10890 addReplyBulk(c
,pattern
);
10891 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
10895 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
10896 * 0 if the client was not subscribed to the specified channel. */
10897 static int pubsubUnsubscribePattern(redisClient
*c
, robj
*pattern
, int notify
) {
10902 incrRefCount(pattern
); /* Protect the object. May be the same we remove */
10903 if ((ln
= listSearchKey(c
->pubsub_patterns
,pattern
)) != NULL
) {
10905 listDelNode(c
->pubsub_patterns
,ln
);
10907 pat
.pattern
= pattern
;
10908 ln
= listSearchKey(server
.pubsub_patterns
,&pat
);
10909 listDelNode(server
.pubsub_patterns
,ln
);
10911 /* Notify the client */
10913 addReply(c
,shared
.mbulk3
);
10914 addReply(c
,shared
.punsubscribebulk
);
10915 addReplyBulk(c
,pattern
);
10916 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+
10917 listLength(c
->pubsub_patterns
));
10919 decrRefCount(pattern
);
10923 /* Unsubscribe from all the channels. Return the number of channels the
10924 * client was subscribed from. */
10925 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
) {
10926 dictIterator
*di
= dictGetIterator(c
->pubsub_channels
);
10930 while((de
= dictNext(di
)) != NULL
) {
10931 robj
*channel
= dictGetEntryKey(de
);
10933 count
+= pubsubUnsubscribeChannel(c
,channel
,notify
);
10935 dictReleaseIterator(di
);
10939 /* Unsubscribe from all the patterns. Return the number of patterns the
10940 * client was subscribed from. */
10941 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
) {
10946 listRewind(c
->pubsub_patterns
,&li
);
10947 while ((ln
= listNext(&li
)) != NULL
) {
10948 robj
*pattern
= ln
->value
;
10950 count
+= pubsubUnsubscribePattern(c
,pattern
,notify
);
10955 /* Publish a message */
10956 static int pubsubPublishMessage(robj
*channel
, robj
*message
) {
10958 struct dictEntry
*de
;
10962 /* Send to clients listening for that channel */
10963 de
= dictFind(server
.pubsub_channels
,channel
);
10965 list
*list
= dictGetEntryVal(de
);
10969 listRewind(list
,&li
);
10970 while ((ln
= listNext(&li
)) != NULL
) {
10971 redisClient
*c
= ln
->value
;
10973 addReply(c
,shared
.mbulk3
);
10974 addReply(c
,shared
.messagebulk
);
10975 addReplyBulk(c
,channel
);
10976 addReplyBulk(c
,message
);
10980 /* Send to clients listening to matching channels */
10981 if (listLength(server
.pubsub_patterns
)) {
10982 listRewind(server
.pubsub_patterns
,&li
);
10983 channel
= getDecodedObject(channel
);
10984 while ((ln
= listNext(&li
)) != NULL
) {
10985 pubsubPattern
*pat
= ln
->value
;
10987 if (stringmatchlen((char*)pat
->pattern
->ptr
,
10988 sdslen(pat
->pattern
->ptr
),
10989 (char*)channel
->ptr
,
10990 sdslen(channel
->ptr
),0)) {
10991 addReply(pat
->client
,shared
.mbulk4
);
10992 addReply(pat
->client
,shared
.pmessagebulk
);
10993 addReplyBulk(pat
->client
,pat
->pattern
);
10994 addReplyBulk(pat
->client
,channel
);
10995 addReplyBulk(pat
->client
,message
);
10999 decrRefCount(channel
);
11004 static void subscribeCommand(redisClient
*c
) {
11007 for (j
= 1; j
< c
->argc
; j
++)
11008 pubsubSubscribeChannel(c
,c
->argv
[j
]);
11011 static void unsubscribeCommand(redisClient
*c
) {
11012 if (c
->argc
== 1) {
11013 pubsubUnsubscribeAllChannels(c
,1);
11018 for (j
= 1; j
< c
->argc
; j
++)
11019 pubsubUnsubscribeChannel(c
,c
->argv
[j
],1);
11023 static void psubscribeCommand(redisClient
*c
) {
11026 for (j
= 1; j
< c
->argc
; j
++)
11027 pubsubSubscribePattern(c
,c
->argv
[j
]);
11030 static void punsubscribeCommand(redisClient
*c
) {
11031 if (c
->argc
== 1) {
11032 pubsubUnsubscribeAllPatterns(c
,1);
11037 for (j
= 1; j
< c
->argc
; j
++)
11038 pubsubUnsubscribePattern(c
,c
->argv
[j
],1);
11042 static void publishCommand(redisClient
*c
) {
11043 int receivers
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]);
11044 addReplyLongLong(c
,receivers
);
11047 /* ===================== WATCH (CAS alike for MULTI/EXEC) ===================
11049 * The implementation uses a per-DB hash table mapping keys to list of clients
11050 * WATCHing those keys, so that given a key that is going to be modified
11051 * we can mark all the associated clients as dirty.
11053 * Also every client contains a list of WATCHed keys so that's possible to
11054 * un-watch such keys when the client is freed or when UNWATCH is called. */
11056 /* In the client->watched_keys list we need to use watchedKey structures
11057 * as in order to identify a key in Redis we need both the key name and the
11059 typedef struct watchedKey
{
11064 /* Watch for the specified key */
11065 static void watchForKey(redisClient
*c
, robj
*key
) {
11066 list
*clients
= NULL
;
11071 /* Check if we are already watching for this key */
11072 listRewind(c
->watched_keys
,&li
);
11073 while((ln
= listNext(&li
))) {
11074 wk
= listNodeValue(ln
);
11075 if (wk
->db
== c
->db
&& equalStringObjects(key
,wk
->key
))
11076 return; /* Key already watched */
11078 /* This key is not already watched in this DB. Let's add it */
11079 clients
= dictFetchValue(c
->db
->watched_keys
,key
);
11081 clients
= listCreate();
11082 dictAdd(c
->db
->watched_keys
,key
,clients
);
11085 listAddNodeTail(clients
,c
);
11086 /* Add the new key to the lits of keys watched by this client */
11087 wk
= zmalloc(sizeof(*wk
));
11091 listAddNodeTail(c
->watched_keys
,wk
);
11094 /* Unwatch all the keys watched by this client. To clean the EXEC dirty
11095 * flag is up to the caller. */
11096 static void unwatchAllKeys(redisClient
*c
) {
11100 if (listLength(c
->watched_keys
) == 0) return;
11101 listRewind(c
->watched_keys
,&li
);
11102 while((ln
= listNext(&li
))) {
11106 /* Lookup the watched key -> clients list and remove the client
11108 wk
= listNodeValue(ln
);
11109 clients
= dictFetchValue(wk
->db
->watched_keys
, wk
->key
);
11110 assert(clients
!= NULL
);
11111 listDelNode(clients
,listSearchKey(clients
,c
));
11112 /* Kill the entry at all if this was the only client */
11113 if (listLength(clients
) == 0)
11114 dictDelete(wk
->db
->watched_keys
, wk
->key
);
11115 /* Remove this watched key from the client->watched list */
11116 listDelNode(c
->watched_keys
,ln
);
11117 decrRefCount(wk
->key
);
11122 /* "Touch" a key, so that if this key is being WATCHed by some client the
11123 * next EXEC will fail. */
11124 static void touchWatchedKey(redisDb
*db
, robj
*key
) {
11129 if (dictSize(db
->watched_keys
) == 0) return;
11130 clients
= dictFetchValue(db
->watched_keys
, key
);
11131 if (!clients
) return;
11133 /* Mark all the clients watching this key as REDIS_DIRTY_CAS */
11134 /* Check if we are already watching for this key */
11135 listRewind(clients
,&li
);
11136 while((ln
= listNext(&li
))) {
11137 redisClient
*c
= listNodeValue(ln
);
11139 c
->flags
|= REDIS_DIRTY_CAS
;
11143 /* On FLUSHDB or FLUSHALL all the watched keys that are present before the
11144 * flush but will be deleted as effect of the flushing operation should
11145 * be touched. "dbid" is the DB that's getting the flush. -1 if it is
11146 * a FLUSHALL operation (all the DBs flushed). */
11147 static void touchWatchedKeysOnFlush(int dbid
) {
11151 /* For every client, check all the waited keys */
11152 listRewind(server
.clients
,&li1
);
11153 while((ln
= listNext(&li1
))) {
11154 redisClient
*c
= listNodeValue(ln
);
11155 listRewind(c
->watched_keys
,&li2
);
11156 while((ln
= listNext(&li2
))) {
11157 watchedKey
*wk
= listNodeValue(ln
);
11159 /* For every watched key matching the specified DB, if the
11160 * key exists, mark the client as dirty, as the key will be
11162 if (dbid
== -1 || wk
->db
->id
== dbid
) {
11163 if (dictFind(wk
->db
->dict
, wk
->key
->ptr
) != NULL
)
11164 c
->flags
|= REDIS_DIRTY_CAS
;
11170 static void watchCommand(redisClient
*c
) {
11173 if (c
->flags
& REDIS_MULTI
) {
11174 addReplySds(c
,sdsnew("-ERR WATCH inside MULTI is not allowed\r\n"));
11177 for (j
= 1; j
< c
->argc
; j
++)
11178 watchForKey(c
,c
->argv
[j
]);
11179 addReply(c
,shared
.ok
);
11182 static void unwatchCommand(redisClient
*c
) {
11184 c
->flags
&= (~REDIS_DIRTY_CAS
);
11185 addReply(c
,shared
.ok
);
11188 /* ================================= Debugging ============================== */
11190 /* Compute the sha1 of string at 's' with 'len' bytes long.
11191 * The SHA1 is then xored againt the string pointed by digest.
11192 * Since xor is commutative, this operation is used in order to
11193 * "add" digests relative to unordered elements.
11195 * So digest(a,b,c,d) will be the same of digest(b,a,c,d) */
11196 static void xorDigest(unsigned char *digest
, void *ptr
, size_t len
) {
11198 unsigned char hash
[20], *s
= ptr
;
11202 SHA1Update(&ctx
,s
,len
);
11203 SHA1Final(hash
,&ctx
);
11205 for (j
= 0; j
< 20; j
++)
11206 digest
[j
] ^= hash
[j
];
11209 static void xorObjectDigest(unsigned char *digest
, robj
*o
) {
11210 o
= getDecodedObject(o
);
11211 xorDigest(digest
,o
->ptr
,sdslen(o
->ptr
));
11215 /* This function instead of just computing the SHA1 and xoring it
11216 * against diget, also perform the digest of "digest" itself and
11217 * replace the old value with the new one.
11219 * So the final digest will be:
11221 * digest = SHA1(digest xor SHA1(data))
11223 * This function is used every time we want to preserve the order so
11224 * that digest(a,b,c,d) will be different than digest(b,c,d,a)
11226 * Also note that mixdigest("foo") followed by mixdigest("bar")
11227 * will lead to a different digest compared to "fo", "obar".
11229 static void mixDigest(unsigned char *digest
, void *ptr
, size_t len
) {
11233 xorDigest(digest
,s
,len
);
11235 SHA1Update(&ctx
,digest
,20);
11236 SHA1Final(digest
,&ctx
);
11239 static void mixObjectDigest(unsigned char *digest
, robj
*o
) {
11240 o
= getDecodedObject(o
);
11241 mixDigest(digest
,o
->ptr
,sdslen(o
->ptr
));
11245 /* Compute the dataset digest. Since keys, sets elements, hashes elements
11246 * are not ordered, we use a trick: every aggregate digest is the xor
11247 * of the digests of their elements. This way the order will not change
11248 * the result. For list instead we use a feedback entering the output digest
11249 * as input in order to ensure that a different ordered list will result in
11250 * a different digest. */
11251 static void computeDatasetDigest(unsigned char *final
) {
11252 unsigned char digest
[20];
11254 dictIterator
*di
= NULL
;
11259 memset(final
,0,20); /* Start with a clean result */
11261 for (j
= 0; j
< server
.dbnum
; j
++) {
11262 redisDb
*db
= server
.db
+j
;
11264 if (dictSize(db
->dict
) == 0) continue;
11265 di
= dictGetIterator(db
->dict
);
11267 /* hash the DB id, so the same dataset moved in a different
11268 * DB will lead to a different digest */
11270 mixDigest(final
,&aux
,sizeof(aux
));
11272 /* Iterate this DB writing every entry */
11273 while((de
= dictNext(di
)) != NULL
) {
11278 memset(digest
,0,20); /* This key-val digest */
11279 key
= dictGetEntryKey(de
);
11280 keyobj
= createStringObject(key
,sdslen(key
));
11282 mixDigest(digest
,key
,sdslen(key
));
11284 /* Make sure the key is loaded if VM is active */
11285 o
= lookupKeyRead(db
,keyobj
);
11287 aux
= htonl(o
->type
);
11288 mixDigest(digest
,&aux
,sizeof(aux
));
11289 expiretime
= getExpire(db
,keyobj
);
11291 /* Save the key and associated value */
11292 if (o
->type
== REDIS_STRING
) {
11293 mixObjectDigest(digest
,o
);
11294 } else if (o
->type
== REDIS_LIST
) {
11295 listTypeIterator
*li
= listTypeInitIterator(o
,0,REDIS_TAIL
);
11296 listTypeEntry entry
;
11297 while(listTypeNext(li
,&entry
)) {
11298 robj
*eleobj
= listTypeGet(&entry
);
11299 mixObjectDigest(digest
,eleobj
);
11300 decrRefCount(eleobj
);
11302 listTypeReleaseIterator(li
);
11303 } else if (o
->type
== REDIS_SET
) {
11304 dict
*set
= o
->ptr
;
11305 dictIterator
*di
= dictGetIterator(set
);
11308 while((de
= dictNext(di
)) != NULL
) {
11309 robj
*eleobj
= dictGetEntryKey(de
);
11311 xorObjectDigest(digest
,eleobj
);
11313 dictReleaseIterator(di
);
11314 } else if (o
->type
== REDIS_ZSET
) {
11316 dictIterator
*di
= dictGetIterator(zs
->dict
);
11319 while((de
= dictNext(di
)) != NULL
) {
11320 robj
*eleobj
= dictGetEntryKey(de
);
11321 double *score
= dictGetEntryVal(de
);
11322 unsigned char eledigest
[20];
11324 snprintf(buf
,sizeof(buf
),"%.17g",*score
);
11325 memset(eledigest
,0,20);
11326 mixObjectDigest(eledigest
,eleobj
);
11327 mixDigest(eledigest
,buf
,strlen(buf
));
11328 xorDigest(digest
,eledigest
,20);
11330 dictReleaseIterator(di
);
11331 } else if (o
->type
== REDIS_HASH
) {
11332 hashTypeIterator
*hi
;
11335 hi
= hashTypeInitIterator(o
);
11336 while (hashTypeNext(hi
) != REDIS_ERR
) {
11337 unsigned char eledigest
[20];
11339 memset(eledigest
,0,20);
11340 obj
= hashTypeCurrent(hi
,REDIS_HASH_KEY
);
11341 mixObjectDigest(eledigest
,obj
);
11343 obj
= hashTypeCurrent(hi
,REDIS_HASH_VALUE
);
11344 mixObjectDigest(eledigest
,obj
);
11346 xorDigest(digest
,eledigest
,20);
11348 hashTypeReleaseIterator(hi
);
11350 redisPanic("Unknown object type");
11352 /* If the key has an expire, add it to the mix */
11353 if (expiretime
!= -1) xorDigest(digest
,"!!expire!!",10);
11354 /* We can finally xor the key-val digest to the final digest */
11355 xorDigest(final
,digest
,20);
11356 decrRefCount(keyobj
);
11358 dictReleaseIterator(di
);
11362 static void debugCommand(redisClient
*c
) {
11363 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
11364 *((char*)-1) = 'x';
11365 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
11366 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
11367 addReply(c
,shared
.err
);
11371 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
11372 addReply(c
,shared
.err
);
11375 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
11376 addReply(c
,shared
.ok
);
11377 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
11379 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
11380 addReply(c
,shared
.err
);
11383 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
11384 addReply(c
,shared
.ok
);
11385 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
11386 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]->ptr
);
11390 addReply(c
,shared
.nokeyerr
);
11393 val
= dictGetEntryVal(de
);
11394 if (!server
.vm_enabled
|| (val
->storage
== REDIS_VM_MEMORY
||
11395 val
->storage
== REDIS_VM_SWAPPING
)) {
11399 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
11400 strenc
= strencoding
[val
->encoding
];
11402 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
11405 addReplySds(c
,sdscatprintf(sdsempty(),
11406 "+Value at:%p refcount:%d "
11407 "encoding:%s serializedlength:%lld\r\n",
11408 (void*)val
, val
->refcount
,
11409 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
11411 vmpointer
*vp
= (vmpointer
*) val
;
11412 addReplySds(c
,sdscatprintf(sdsempty(),
11413 "+Value swapped at: page %llu "
11414 "using %llu pages\r\n",
11415 (unsigned long long) vp
->page
,
11416 (unsigned long long) vp
->usedpages
));
11418 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapin") && c
->argc
== 3) {
11419 lookupKeyRead(c
->db
,c
->argv
[2]);
11420 addReply(c
,shared
.ok
);
11421 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
11422 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]->ptr
);
11426 if (!server
.vm_enabled
) {
11427 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
11431 addReply(c
,shared
.nokeyerr
);
11434 val
= dictGetEntryVal(de
);
11436 if (val
->storage
!= REDIS_VM_MEMORY
) {
11437 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
11438 } else if (val
->refcount
!= 1) {
11439 addReplySds(c
,sdsnew("-ERR Object is shared\r\n"));
11440 } else if ((vp
= vmSwapObjectBlocking(val
)) != NULL
) {
11441 dictGetEntryVal(de
) = vp
;
11442 addReply(c
,shared
.ok
);
11444 addReply(c
,shared
.err
);
11446 } else if (!strcasecmp(c
->argv
[1]->ptr
,"populate") && c
->argc
== 3) {
11451 if (getLongFromObjectOrReply(c
, c
->argv
[2], &keys
, NULL
) != REDIS_OK
)
11453 for (j
= 0; j
< keys
; j
++) {
11454 snprintf(buf
,sizeof(buf
),"key:%lu",j
);
11455 key
= createStringObject(buf
,strlen(buf
));
11456 if (lookupKeyRead(c
->db
,key
) != NULL
) {
11460 snprintf(buf
,sizeof(buf
),"value:%lu",j
);
11461 val
= createStringObject(buf
,strlen(buf
));
11462 dbAdd(c
->db
,key
,val
);
11465 addReply(c
,shared
.ok
);
11466 } else if (!strcasecmp(c
->argv
[1]->ptr
,"digest") && c
->argc
== 2) {
11467 unsigned char digest
[20];
11468 sds d
= sdsnew("+");
11471 computeDatasetDigest(digest
);
11472 for (j
= 0; j
< 20; j
++)
11473 d
= sdscatprintf(d
, "%02x",digest
[j
]);
11475 d
= sdscatlen(d
,"\r\n",2);
11478 addReplySds(c
,sdsnew(
11479 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n"));
11483 static void _redisAssert(char *estr
, char *file
, int line
) {
11484 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
11485 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true",file
,line
,estr
);
11486 #ifdef HAVE_BACKTRACE
11487 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
11488 *((char*)-1) = 'x';
11492 static void _redisPanic(char *msg
, char *file
, int line
) {
11493 redisLog(REDIS_WARNING
,"!!! Software Failure. Press left mouse button to continue");
11494 redisLog(REDIS_WARNING
,"Guru Meditation: %s #%s:%d",msg
,file
,line
);
11495 #ifdef HAVE_BACKTRACE
11496 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
11497 *((char*)-1) = 'x';
11501 /* =================================== Main! ================================ */
11504 int linuxOvercommitMemoryValue(void) {
11505 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
11508 if (!fp
) return -1;
11509 if (fgets(buf
,64,fp
) == NULL
) {
11518 void linuxOvercommitMemoryWarning(void) {
11519 if (linuxOvercommitMemoryValue() == 0) {
11520 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
11523 #endif /* __linux__ */
11525 static void daemonize(void) {
11529 if (fork() != 0) exit(0); /* parent exits */
11530 setsid(); /* create a new session */
11532 /* Every output goes to /dev/null. If Redis is daemonized but
11533 * the 'logfile' is set to 'stdout' in the configuration file
11534 * it will not log at all. */
11535 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
11536 dup2(fd
, STDIN_FILENO
);
11537 dup2(fd
, STDOUT_FILENO
);
11538 dup2(fd
, STDERR_FILENO
);
11539 if (fd
> STDERR_FILENO
) close(fd
);
11541 /* Try to write the pid file */
11542 fp
= fopen(server
.pidfile
,"w");
11544 fprintf(fp
,"%d\n",getpid());
11549 static void version() {
11550 printf("Redis server version %s (%s:%d)\n", REDIS_VERSION
,
11551 REDIS_GIT_SHA1
, atoi(REDIS_GIT_DIRTY
) > 0);
11555 static void usage() {
11556 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
11557 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
11561 int main(int argc
, char **argv
) {
11564 initServerConfig();
11565 sortCommandTable();
11567 if (strcmp(argv
[1], "-v") == 0 ||
11568 strcmp(argv
[1], "--version") == 0) version();
11569 if (strcmp(argv
[1], "--help") == 0) usage();
11570 resetServerSaveParams();
11571 loadServerConfig(argv
[1]);
11572 } else if ((argc
> 2)) {
11575 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
11577 if (server
.daemonize
) daemonize();
11579 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
11581 linuxOvercommitMemoryWarning();
11583 start
= time(NULL
);
11584 if (server
.appendonly
) {
11585 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
11586 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
11588 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
11589 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
11591 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
11592 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
11594 aeDeleteEventLoop(server
.el
);
11598 /* ============================= Backtrace support ========================= */
11600 #ifdef HAVE_BACKTRACE
11601 static char *findFuncName(void *pointer
, unsigned long *offset
);
11603 static void *getMcontextEip(ucontext_t
*uc
) {
11604 #if defined(__FreeBSD__)
11605 return (void*) uc
->uc_mcontext
.mc_eip
;
11606 #elif defined(__dietlibc__)
11607 return (void*) uc
->uc_mcontext
.eip
;
11608 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
11610 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
11612 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
11614 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
11615 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
11616 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
11618 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
11620 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
11621 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
11622 #elif defined(__ia64__) /* Linux IA64 */
11623 return (void*) uc
->uc_mcontext
.sc_ip
;
11629 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
11631 char **messages
= NULL
;
11632 int i
, trace_size
= 0;
11633 unsigned long offset
=0;
11634 ucontext_t
*uc
= (ucontext_t
*) secret
;
11636 REDIS_NOTUSED(info
);
11638 redisLog(REDIS_WARNING
,
11639 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
11640 infostring
= genRedisInfoString();
11641 redisLog(REDIS_WARNING
, "%s",infostring
);
11642 /* It's not safe to sdsfree() the returned string under memory
11643 * corruption conditions. Let it leak as we are going to abort */
11645 trace_size
= backtrace(trace
, 100);
11646 /* overwrite sigaction with caller's address */
11647 if (getMcontextEip(uc
) != NULL
) {
11648 trace
[1] = getMcontextEip(uc
);
11650 messages
= backtrace_symbols(trace
, trace_size
);
11652 for (i
=1; i
<trace_size
; ++i
) {
11653 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
11655 p
= strchr(messages
[i
],'+');
11656 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
11657 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
11659 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
11662 /* free(messages); Don't call free() with possibly corrupted memory. */
11666 static void sigtermHandler(int sig
) {
11667 REDIS_NOTUSED(sig
);
11669 redisLog(REDIS_WARNING
,"SIGTERM received, scheduling shutting down...");
11670 server
.shutdown_asap
= 1;
11673 static void setupSigSegvAction(void) {
11674 struct sigaction act
;
11676 sigemptyset (&act
.sa_mask
);
11677 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
11678 * is used. Otherwise, sa_handler is used */
11679 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
11680 act
.sa_sigaction
= segvHandler
;
11681 sigaction (SIGSEGV
, &act
, NULL
);
11682 sigaction (SIGBUS
, &act
, NULL
);
11683 sigaction (SIGFPE
, &act
, NULL
);
11684 sigaction (SIGILL
, &act
, NULL
);
11685 sigaction (SIGBUS
, &act
, NULL
);
11687 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
;
11688 act
.sa_handler
= sigtermHandler
;
11689 sigaction (SIGTERM
, &act
, NULL
);
11693 #include "staticsymbols.h"
11694 /* This function try to convert a pointer into a function name. It's used in
11695 * oreder to provide a backtrace under segmentation fault that's able to
11696 * display functions declared as static (otherwise the backtrace is useless). */
11697 static char *findFuncName(void *pointer
, unsigned long *offset
){
11699 unsigned long off
, minoff
= 0;
11701 /* Try to match against the Symbol with the smallest offset */
11702 for (i
=0; symsTable
[i
].pointer
; i
++) {
11703 unsigned long lp
= (unsigned long) pointer
;
11705 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
11706 off
=lp
-symsTable
[i
].pointer
;
11707 if (ret
< 0 || off
< minoff
) {
11713 if (ret
== -1) return NULL
;
11715 return symsTable
[ret
].name
;
11717 #else /* HAVE_BACKTRACE */
11718 static void setupSigSegvAction(void) {
11720 #endif /* HAVE_BACKTRACE */