2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "2.1.1"
45 #endif /* HAVE_BACKTRACE */
53 #include <arpa/inet.h>
57 #include <sys/resource.h>
65 #include "solarisfixes.h"
69 #include "ae.h" /* Event driven programming library */
70 #include "sds.h" /* Dynamic safe strings */
71 #include "anet.h" /* Networking the easy way */
72 #include "dict.h" /* Hash tables */
73 #include "adlist.h" /* Linked lists */
74 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
75 #include "lzf.h" /* LZF compression library */
76 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
77 #include "zipmap.h" /* Compact dictionary-alike data structure */
78 #include "sha1.h" /* SHA1 is used for DEBUG DIGEST */
79 #include "release.h" /* Release and/or git repository information */
85 /* Static server configuration */
86 #define REDIS_SERVERPORT 6379 /* TCP port */
87 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
88 #define REDIS_IOBUF_LEN 1024
89 #define REDIS_LOADBUF_LEN 1024
90 #define REDIS_STATIC_ARGS 8
91 #define REDIS_DEFAULT_DBNUM 16
92 #define REDIS_CONFIGLINE_MAX 1024
93 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
94 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
95 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* lookup 10 expires per loop */
96 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
97 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
99 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
100 #define REDIS_WRITEV_THRESHOLD 3
101 /* Max number of iovecs used for each writev call */
102 #define REDIS_WRITEV_IOVEC_COUNT 256
104 /* Hash table parameters */
105 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
108 #define REDIS_CMD_BULK 1 /* Bulk write command */
109 #define REDIS_CMD_INLINE 2 /* Inline command */
110 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
111 this flags will return an error when the 'maxmemory' option is set in the
112 config file and the server is using more than maxmemory bytes of memory.
113 In short this commands are denied on low memory conditions. */
114 #define REDIS_CMD_DENYOOM 4
115 #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */
118 #define REDIS_STRING 0
124 /* Objects encoding. Some kind of objects like Strings and Hashes can be
125 * internally represented in multiple ways. The 'encoding' field of the object
126 * is set to one of this fields for this object. */
127 #define REDIS_ENCODING_RAW 0 /* Raw representation */
128 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
129 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
130 #define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
132 static char* strencoding
[] = {
133 "raw", "int", "zipmap", "hashtable"
136 /* Object types only used for dumping to disk */
137 #define REDIS_EXPIRETIME 253
138 #define REDIS_SELECTDB 254
139 #define REDIS_EOF 255
141 /* Defines related to the dump file format. To store 32 bits lengths for short
142 * keys requires a lot of space, so we check the most significant 2 bits of
143 * the first byte to interpreter the length:
145 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
146 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
147 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
148 * 11|000000 this means: specially encoded object will follow. The six bits
149 * number specify the kind of object that follows.
150 * See the REDIS_RDB_ENC_* defines.
152 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
153 * values, will fit inside. */
154 #define REDIS_RDB_6BITLEN 0
155 #define REDIS_RDB_14BITLEN 1
156 #define REDIS_RDB_32BITLEN 2
157 #define REDIS_RDB_ENCVAL 3
158 #define REDIS_RDB_LENERR UINT_MAX
160 /* When a length of a string object stored on disk has the first two bits
161 * set, the remaining two bits specify a special encoding for the object
162 * accordingly to the following defines: */
163 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
164 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
165 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
166 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
168 /* Virtual memory object->where field. */
169 #define REDIS_VM_MEMORY 0 /* The object is on memory */
170 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
171 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
172 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
174 /* Virtual memory static configuration stuff.
175 * Check vmFindContiguousPages() to know more about this magic numbers. */
176 #define REDIS_VM_MAX_NEAR_PAGES 65536
177 #define REDIS_VM_MAX_RANDOM_JUMP 4096
178 #define REDIS_VM_MAX_THREADS 32
179 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
180 /* The following is the *percentage* of completed I/O jobs to process when the
181 * handelr is called. While Virtual Memory I/O operations are performed by
182 * threads, this operations must be processed by the main thread when completed
183 * in order to take effect. */
184 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
187 #define REDIS_SLAVE 1 /* This client is a slave server */
188 #define REDIS_MASTER 2 /* This client is a master server */
189 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
190 #define REDIS_MULTI 8 /* This client is in a MULTI context */
191 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
192 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
193 #define REDIS_DIRTY_CAS 64 /* Watched keys modified. EXEC will fail. */
195 /* Slave replication state - slave side */
196 #define REDIS_REPL_NONE 0 /* No active replication */
197 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
198 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
200 /* Slave replication state - from the point of view of master
201 * Note that in SEND_BULK and ONLINE state the slave receives new updates
202 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
203 * to start the next background saving in order to send updates to it. */
204 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
205 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
206 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
207 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
209 /* List related stuff */
213 /* Sort operations */
214 #define REDIS_SORT_GET 0
215 #define REDIS_SORT_ASC 1
216 #define REDIS_SORT_DESC 2
217 #define REDIS_SORTKEY_MAX 1024
220 #define REDIS_DEBUG 0
221 #define REDIS_VERBOSE 1
222 #define REDIS_NOTICE 2
223 #define REDIS_WARNING 3
225 /* Anti-warning macro... */
226 #define REDIS_NOTUSED(V) ((void) V)
228 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
229 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
231 /* Append only defines */
232 #define APPENDFSYNC_NO 0
233 #define APPENDFSYNC_ALWAYS 1
234 #define APPENDFSYNC_EVERYSEC 2
236 /* Hashes related defaults */
237 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
238 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
240 /* We can print the stacktrace, so our assert is defined this way: */
241 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
242 #define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1)
243 static void _redisAssert(char *estr
, char *file
, int line
);
244 static void _redisPanic(char *msg
, char *file
, int line
);
246 /*================================= Data types ============================== */
248 /* A redis object, that is a type able to hold a string / list / set */
250 /* The VM object structure */
251 struct redisObjectVM
{
252 off_t page
; /* the page at witch the object is stored on disk */
253 off_t usedpages
; /* number of pages used on disk */
254 time_t atime
; /* Last access time */
257 /* The actual Redis Object */
258 typedef struct redisObject
{
261 unsigned char encoding
;
262 unsigned char storage
; /* If this object is a key, where is the value?
263 * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
264 unsigned char vtype
; /* If this object is a key, and value is swapped out,
265 * this is the type of the swapped out object. */
267 /* VM fields, this are only allocated if VM is active, otherwise the
268 * object allocation function will just allocate
269 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
270 * Redis without VM active will not have any overhead. */
271 struct redisObjectVM vm
;
274 /* Macro used to initalize a Redis object allocated on the stack.
275 * Note that this macro is taken near the structure definition to make sure
276 * we'll update it when the structure is changed, to avoid bugs like
277 * bug #85 introduced exactly in this way. */
278 #define initStaticStringObject(_var,_ptr) do { \
280 _var.type = REDIS_STRING; \
281 _var.encoding = REDIS_ENCODING_RAW; \
283 if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \
286 typedef struct redisDb
{
287 dict
*dict
; /* The keyspace for this DB */
288 dict
*expires
; /* Timeout of keys with a timeout set */
289 dict
*blocking_keys
; /* Keys with clients waiting for data (BLPOP) */
290 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
291 dict
*watched_keys
; /* WATCHED keys for MULTI/EXEC CAS */
295 /* Client MULTI/EXEC state */
296 typedef struct multiCmd
{
299 struct redisCommand
*cmd
;
302 typedef struct multiState
{
303 multiCmd
*commands
; /* Array of MULTI commands */
304 int count
; /* Total number of MULTI commands */
307 /* With multiplexing we need to take per-clinet state.
308 * Clients are taken in a liked list. */
309 typedef struct redisClient
{
314 robj
**argv
, **mbargv
;
316 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
317 int multibulk
; /* multi bulk command format active */
320 time_t lastinteraction
; /* time of the last interaction, used for timeout */
321 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
322 int slaveseldb
; /* slave selected db, if this client is a slave */
323 int authenticated
; /* when requirepass is non-NULL */
324 int replstate
; /* replication state if this is a slave */
325 int repldbfd
; /* replication DB file descriptor */
326 long repldboff
; /* replication DB file offset */
327 off_t repldbsize
; /* replication DB file size */
328 multiState mstate
; /* MULTI/EXEC state */
329 robj
**blocking_keys
; /* The key we are waiting to terminate a blocking
330 * operation such as BLPOP. Otherwise NULL. */
331 int blocking_keys_num
; /* Number of blocking keys */
332 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
333 * is >= blockingto then the operation timed out. */
334 list
*io_keys
; /* Keys this client is waiting to be loaded from the
335 * swap file in order to continue. */
336 list
*watched_keys
; /* Keys WATCHED for MULTI/EXEC CAS */
337 dict
*pubsub_channels
; /* channels a client is interested in (SUBSCRIBE) */
338 list
*pubsub_patterns
; /* patterns a client is interested in (SUBSCRIBE) */
346 /* Global server state structure */
351 long long dirty
; /* changes to DB from the last save */
353 list
*slaves
, *monitors
;
354 char neterr
[ANET_ERR_LEN
];
356 int cronloops
; /* number of times the cron function run */
357 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
358 time_t lastsave
; /* Unix time of last save succeeede */
359 /* Fields used only for stats */
360 time_t stat_starttime
; /* server start time */
361 long long stat_numcommands
; /* number of processed commands */
362 long long stat_numconnections
; /* number of connections received */
363 long long stat_expiredkeys
; /* number of expired keys */
372 int no_appendfsync_on_rewrite
;
378 pid_t bgsavechildpid
;
379 pid_t bgrewritechildpid
;
380 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
381 sds aofbuf
; /* AOF buffer, written before entering the event loop */
382 struct saveparam
*saveparams
;
387 char *appendfilename
;
391 /* Replication related */
396 redisClient
*master
; /* client that is master for this slave */
398 unsigned int maxclients
;
399 unsigned long long maxmemory
;
400 unsigned int blpop_blocked_clients
;
401 unsigned int vm_blocked_clients
;
402 /* Sort parameters - qsort_r() is only available under BSD so we
403 * have to take this state global, in order to pass it to sortCompare() */
407 /* Virtual memory configuration */
412 unsigned long long vm_max_memory
;
414 size_t hash_max_zipmap_entries
;
415 size_t hash_max_zipmap_value
;
416 /* Virtual memory state */
419 off_t vm_next_page
; /* Next probably empty page */
420 off_t vm_near_pages
; /* Number of pages allocated sequentially */
421 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
422 time_t unixtime
; /* Unix time sampled every second. */
423 /* Virtual memory I/O threads stuff */
424 /* An I/O thread process an element taken from the io_jobs queue and
425 * put the result of the operation in the io_done list. While the
426 * job is being processed, it's put on io_processing queue. */
427 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
428 list
*io_processing
; /* List of VM I/O jobs being processed */
429 list
*io_processed
; /* List of VM I/O jobs already processed */
430 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
431 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
432 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
433 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
434 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
435 int io_active_threads
; /* Number of running I/O threads */
436 int vm_max_threads
; /* Max number of I/O threads running at the same time */
437 /* Our main thread is blocked on the event loop, locking for sockets ready
438 * to be read or written, so when a threaded I/O operation is ready to be
439 * processed by the main thread, the I/O thread will use a unix pipe to
440 * awake the main thread. The followings are the two pipe FDs. */
441 int io_ready_pipe_read
;
442 int io_ready_pipe_write
;
443 /* Virtual memory stats */
444 unsigned long long vm_stats_used_pages
;
445 unsigned long long vm_stats_swapped_objects
;
446 unsigned long long vm_stats_swapouts
;
447 unsigned long long vm_stats_swapins
;
449 dict
*pubsub_channels
; /* Map channels to list of subscribed clients */
450 list
*pubsub_patterns
; /* A list of pubsub_patterns */
455 typedef struct pubsubPattern
{
460 typedef void redisCommandProc(redisClient
*c
);
461 typedef void redisVmPreloadProc(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
462 struct redisCommand
{
464 redisCommandProc
*proc
;
467 /* Use a function to determine which keys need to be loaded
468 * in the background prior to executing this command. Takes precedence
469 * over vm_firstkey and others, ignored when NULL */
470 redisVmPreloadProc
*vm_preload_proc
;
471 /* What keys should be loaded in background when calling this command? */
472 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
473 int vm_lastkey
; /* THe last argument that's a key */
474 int vm_keystep
; /* The step between first and last key */
477 struct redisFunctionSym
{
479 unsigned long pointer
;
482 typedef struct _redisSortObject
{
490 typedef struct _redisSortOperation
{
493 } redisSortOperation
;
495 /* ZSETs use a specialized version of Skiplists */
497 typedef struct zskiplistNode
{
498 struct zskiplistNode
**forward
;
499 struct zskiplistNode
*backward
;
505 typedef struct zskiplist
{
506 struct zskiplistNode
*header
, *tail
;
507 unsigned long length
;
511 typedef struct zset
{
516 /* Our shared "common" objects */
518 #define REDIS_SHARED_INTEGERS 10000
519 struct sharedObjectsStruct
{
520 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
521 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
522 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
523 *outofrangeerr
, *plus
,
524 *select0
, *select1
, *select2
, *select3
, *select4
,
525 *select5
, *select6
, *select7
, *select8
, *select9
,
526 *messagebulk
, *pmessagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
,
527 *mbulk4
, *psubscribebulk
, *punsubscribebulk
,
528 *integers
[REDIS_SHARED_INTEGERS
];
531 /* Global vars that are actally used as constants. The following double
532 * values are used for double on-disk serialization, and are initialized
533 * at runtime to avoid strange compiler optimizations. */
535 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
537 /* VM threaded I/O request message */
538 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
539 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
540 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
541 typedef struct iojob
{
542 int type
; /* Request type, REDIS_IOJOB_* */
543 redisDb
*db
;/* Redis database */
544 robj
*key
; /* This I/O request is about swapping this key */
545 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
546 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
547 off_t page
; /* Swap page where to read/write the object */
548 off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */
549 int canceled
; /* True if this command was canceled by blocking side of VM */
550 pthread_t thread
; /* ID of the thread processing this entry */
553 /*================================ Prototypes =============================== */
555 static void freeStringObject(robj
*o
);
556 static void freeListObject(robj
*o
);
557 static void freeSetObject(robj
*o
);
558 static void decrRefCount(void *o
);
559 static robj
*createObject(int type
, void *ptr
);
560 static void freeClient(redisClient
*c
);
561 static int rdbLoad(char *filename
);
562 static void addReply(redisClient
*c
, robj
*obj
);
563 static void addReplySds(redisClient
*c
, sds s
);
564 static void incrRefCount(robj
*o
);
565 static int rdbSaveBackground(char *filename
);
566 static robj
*createStringObject(char *ptr
, size_t len
);
567 static robj
*dupStringObject(robj
*o
);
568 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
);
569 static void replicationFeedMonitors(list
*monitors
, int dictid
, robj
**argv
, int argc
);
570 static void flushAppendOnlyFile(void);
571 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
572 static int syncWithMaster(void);
573 static robj
*tryObjectEncoding(robj
*o
);
574 static robj
*getDecodedObject(robj
*o
);
575 static int removeExpire(redisDb
*db
, robj
*key
);
576 static int expireIfNeeded(redisDb
*db
, robj
*key
);
577 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
578 static int deleteIfSwapped(redisDb
*db
, robj
*key
);
579 static int deleteKey(redisDb
*db
, robj
*key
);
580 static time_t getExpire(redisDb
*db
, robj
*key
);
581 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
582 static void updateSlavesWaitingBgsave(int bgsaveerr
);
583 static void freeMemoryIfNeeded(void);
584 static int processCommand(redisClient
*c
);
585 static void setupSigSegvAction(void);
586 static void rdbRemoveTempFile(pid_t childpid
);
587 static void aofRemoveTempFile(pid_t childpid
);
588 static size_t stringObjectLen(robj
*o
);
589 static void processInputBuffer(redisClient
*c
);
590 static zskiplist
*zslCreate(void);
591 static void zslFree(zskiplist
*zsl
);
592 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
593 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
594 static void initClientMultiState(redisClient
*c
);
595 static void freeClientMultiState(redisClient
*c
);
596 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
597 static void unblockClientWaitingData(redisClient
*c
);
598 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
599 static void vmInit(void);
600 static void vmMarkPagesFree(off_t page
, off_t count
);
601 static robj
*vmLoadObject(robj
*key
);
602 static robj
*vmPreviewObject(robj
*key
);
603 static int vmSwapOneObjectBlocking(void);
604 static int vmSwapOneObjectThreaded(void);
605 static int vmCanSwapOut(void);
606 static int tryFreeOneObjectFromFreelist(void);
607 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
608 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
609 static void vmCancelThreadedIOJob(robj
*o
);
610 static void lockThreadedIO(void);
611 static void unlockThreadedIO(void);
612 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
613 static void freeIOJob(iojob
*j
);
614 static void queueIOJob(iojob
*j
);
615 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
616 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
617 static void waitEmptyIOJobsQueue(void);
618 static void vmReopenSwapFile(void);
619 static int vmFreePage(off_t page
);
620 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
621 static void execBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
622 static int blockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
);
623 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
624 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
625 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
626 static struct redisCommand
*lookupCommand(char *name
);
627 static void call(redisClient
*c
, struct redisCommand
*cmd
);
628 static void resetClient(redisClient
*c
);
629 static void convertToRealHash(robj
*o
);
630 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
);
631 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
);
632 static void freePubsubPattern(void *p
);
633 static int listMatchPubsubPattern(void *a
, void *b
);
634 static int compareStringObjects(robj
*a
, robj
*b
);
635 static int equalStringObjects(robj
*a
, robj
*b
);
637 static int rewriteAppendOnlyFileBackground(void);
638 static int vmSwapObjectBlocking(robj
*key
, robj
*val
);
639 static int prepareForShutdown();
640 static void touchWatchedKey(redisDb
*db
, robj
*key
);
641 static void touchWatchedKeysOnFlush(int dbid
);
642 static void unwatchAllKeys(redisClient
*c
);
644 static void authCommand(redisClient
*c
);
645 static void pingCommand(redisClient
*c
);
646 static void echoCommand(redisClient
*c
);
647 static void setCommand(redisClient
*c
);
648 static void setnxCommand(redisClient
*c
);
649 static void setexCommand(redisClient
*c
);
650 static void getCommand(redisClient
*c
);
651 static void delCommand(redisClient
*c
);
652 static void existsCommand(redisClient
*c
);
653 static void incrCommand(redisClient
*c
);
654 static void decrCommand(redisClient
*c
);
655 static void incrbyCommand(redisClient
*c
);
656 static void decrbyCommand(redisClient
*c
);
657 static void selectCommand(redisClient
*c
);
658 static void randomkeyCommand(redisClient
*c
);
659 static void keysCommand(redisClient
*c
);
660 static void dbsizeCommand(redisClient
*c
);
661 static void lastsaveCommand(redisClient
*c
);
662 static void saveCommand(redisClient
*c
);
663 static void bgsaveCommand(redisClient
*c
);
664 static void bgrewriteaofCommand(redisClient
*c
);
665 static void shutdownCommand(redisClient
*c
);
666 static void moveCommand(redisClient
*c
);
667 static void renameCommand(redisClient
*c
);
668 static void renamenxCommand(redisClient
*c
);
669 static void lpushCommand(redisClient
*c
);
670 static void rpushCommand(redisClient
*c
);
671 static void lpopCommand(redisClient
*c
);
672 static void rpopCommand(redisClient
*c
);
673 static void llenCommand(redisClient
*c
);
674 static void lindexCommand(redisClient
*c
);
675 static void lrangeCommand(redisClient
*c
);
676 static void ltrimCommand(redisClient
*c
);
677 static void typeCommand(redisClient
*c
);
678 static void lsetCommand(redisClient
*c
);
679 static void saddCommand(redisClient
*c
);
680 static void sremCommand(redisClient
*c
);
681 static void smoveCommand(redisClient
*c
);
682 static void sismemberCommand(redisClient
*c
);
683 static void scardCommand(redisClient
*c
);
684 static void spopCommand(redisClient
*c
);
685 static void srandmemberCommand(redisClient
*c
);
686 static void sinterCommand(redisClient
*c
);
687 static void sinterstoreCommand(redisClient
*c
);
688 static void sunionCommand(redisClient
*c
);
689 static void sunionstoreCommand(redisClient
*c
);
690 static void sdiffCommand(redisClient
*c
);
691 static void sdiffstoreCommand(redisClient
*c
);
692 static void syncCommand(redisClient
*c
);
693 static void flushdbCommand(redisClient
*c
);
694 static void flushallCommand(redisClient
*c
);
695 static void sortCommand(redisClient
*c
);
696 static void lremCommand(redisClient
*c
);
697 static void rpoplpushcommand(redisClient
*c
);
698 static void infoCommand(redisClient
*c
);
699 static void mgetCommand(redisClient
*c
);
700 static void monitorCommand(redisClient
*c
);
701 static void expireCommand(redisClient
*c
);
702 static void expireatCommand(redisClient
*c
);
703 static void getsetCommand(redisClient
*c
);
704 static void ttlCommand(redisClient
*c
);
705 static void slaveofCommand(redisClient
*c
);
706 static void debugCommand(redisClient
*c
);
707 static void msetCommand(redisClient
*c
);
708 static void msetnxCommand(redisClient
*c
);
709 static void zaddCommand(redisClient
*c
);
710 static void zincrbyCommand(redisClient
*c
);
711 static void zrangeCommand(redisClient
*c
);
712 static void zrangebyscoreCommand(redisClient
*c
);
713 static void zcountCommand(redisClient
*c
);
714 static void zrevrangeCommand(redisClient
*c
);
715 static void zcardCommand(redisClient
*c
);
716 static void zremCommand(redisClient
*c
);
717 static void zscoreCommand(redisClient
*c
);
718 static void zremrangebyscoreCommand(redisClient
*c
);
719 static void multiCommand(redisClient
*c
);
720 static void execCommand(redisClient
*c
);
721 static void discardCommand(redisClient
*c
);
722 static void blpopCommand(redisClient
*c
);
723 static void brpopCommand(redisClient
*c
);
724 static void appendCommand(redisClient
*c
);
725 static void substrCommand(redisClient
*c
);
726 static void zrankCommand(redisClient
*c
);
727 static void zrevrankCommand(redisClient
*c
);
728 static void hsetCommand(redisClient
*c
);
729 static void hsetnxCommand(redisClient
*c
);
730 static void hgetCommand(redisClient
*c
);
731 static void hmsetCommand(redisClient
*c
);
732 static void hmgetCommand(redisClient
*c
);
733 static void hdelCommand(redisClient
*c
);
734 static void hlenCommand(redisClient
*c
);
735 static void zremrangebyrankCommand(redisClient
*c
);
736 static void zunionstoreCommand(redisClient
*c
);
737 static void zinterstoreCommand(redisClient
*c
);
738 static void hkeysCommand(redisClient
*c
);
739 static void hvalsCommand(redisClient
*c
);
740 static void hgetallCommand(redisClient
*c
);
741 static void hexistsCommand(redisClient
*c
);
742 static void configCommand(redisClient
*c
);
743 static void hincrbyCommand(redisClient
*c
);
744 static void subscribeCommand(redisClient
*c
);
745 static void unsubscribeCommand(redisClient
*c
);
746 static void psubscribeCommand(redisClient
*c
);
747 static void punsubscribeCommand(redisClient
*c
);
748 static void publishCommand(redisClient
*c
);
749 static void watchCommand(redisClient
*c
);
750 static void unwatchCommand(redisClient
*c
);
752 /*================================= Globals ================================= */
755 static struct redisServer server
; /* server global state */
756 static struct redisCommand
*commandTable
;
757 static struct redisCommand readonlyCommandTable
[] = {
758 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
759 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
760 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
761 {"setex",setexCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
762 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
763 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
764 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
765 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
766 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
767 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
768 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
769 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
770 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
771 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
772 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
773 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
774 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
775 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
776 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
777 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
778 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
779 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
780 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
781 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
782 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
783 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
784 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
785 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
786 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
787 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
788 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
789 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
790 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
791 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
792 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
793 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
794 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
795 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
796 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
797 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
798 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
799 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
800 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
801 {"zunionstore",zunionstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
802 {"zinterstore",zinterstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
803 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
804 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
805 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
806 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
807 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
808 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
809 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
810 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
811 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
812 {"hsetnx",hsetnxCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
813 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
814 {"hmset",hmsetCommand
,-4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
815 {"hmget",hmgetCommand
,-3,REDIS_CMD_BULK
,NULL
,1,1,1},
816 {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
817 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
818 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
819 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
820 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
821 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
822 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
823 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
824 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
825 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
826 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
827 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
828 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
829 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
830 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
831 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
832 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
833 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
834 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
835 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
836 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
837 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
838 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
839 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
840 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
841 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
842 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
843 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
844 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
845 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
846 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
847 {"exec",execCommand
,1,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,execBlockClientOnSwappedKeys
,0,0,0},
848 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
849 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
850 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
851 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
852 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
853 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
854 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
855 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
856 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
857 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
858 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
859 {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
860 {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
861 {"psubscribe",psubscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
862 {"punsubscribe",punsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
863 {"publish",publishCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_FORCE_REPLICATION
,NULL
,0,0,0},
864 {"watch",watchCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
865 {"unwatch",unwatchCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}
868 /*============================ Utility functions ============================ */
870 /* Glob-style pattern matching. */
871 static int stringmatchlen(const char *pattern
, int patternLen
,
872 const char *string
, int stringLen
, int nocase
)
877 while (pattern
[1] == '*') {
882 return 1; /* match */
884 if (stringmatchlen(pattern
+1, patternLen
-1,
885 string
, stringLen
, nocase
))
886 return 1; /* match */
890 return 0; /* no match */
894 return 0; /* no match */
904 not = pattern
[0] == '^';
911 if (pattern
[0] == '\\') {
914 if (pattern
[0] == string
[0])
916 } else if (pattern
[0] == ']') {
918 } else if (patternLen
== 0) {
922 } else if (pattern
[1] == '-' && patternLen
>= 3) {
923 int start
= pattern
[0];
924 int end
= pattern
[2];
932 start
= tolower(start
);
938 if (c
>= start
&& c
<= end
)
942 if (pattern
[0] == string
[0])
945 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
955 return 0; /* no match */
961 if (patternLen
>= 2) {
968 if (pattern
[0] != string
[0])
969 return 0; /* no match */
971 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
972 return 0; /* no match */
980 if (stringLen
== 0) {
981 while(*pattern
== '*') {
988 if (patternLen
== 0 && stringLen
== 0)
993 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
994 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
997 /* Convert a string representing an amount of memory into the number of
998 * bytes, so for instance memtoll("1Gi") will return 1073741824 that is
1001 * On parsing error, if *err is not NULL, it's set to 1, otherwise it's
1003 static long long memtoll(const char *p
, int *err
) {
1006 long mul
; /* unit multiplier */
1008 unsigned int digits
;
1011 /* Search the first non digit character. */
1014 while(*u
&& isdigit(*u
)) u
++;
1015 if (*u
== '\0' || !strcasecmp(u
,"b")) {
1017 } else if (!strcasecmp(u
,"k")) {
1019 } else if (!strcasecmp(u
,"kb")) {
1021 } else if (!strcasecmp(u
,"m")) {
1023 } else if (!strcasecmp(u
,"mb")) {
1025 } else if (!strcasecmp(u
,"g")) {
1026 mul
= 1000L*1000*1000;
1027 } else if (!strcasecmp(u
,"gb")) {
1028 mul
= 1024L*1024*1024;
1034 if (digits
>= sizeof(buf
)) {
1038 memcpy(buf
,p
,digits
);
1040 val
= strtoll(buf
,NULL
,10);
1044 /* Convert a long long into a string. Returns the number of
1045 * characters needed to represent the number, that can be shorter if passed
1046 * buffer length is not enough to store the whole number. */
1047 static int ll2string(char *s
, size_t len
, long long value
) {
1049 unsigned long long v
;
1052 if (len
== 0) return 0;
1053 v
= (value
< 0) ? -value
: value
;
1054 p
= buf
+31; /* point to the last character */
1059 if (value
< 0) *p
-- = '-';
1062 if (l
+1 > len
) l
= len
-1; /* Make sure it fits, including the nul term */
1068 static void redisLog(int level
, const char *fmt
, ...) {
1072 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
1076 if (level
>= server
.verbosity
) {
1082 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
1083 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
1084 vfprintf(fp
, fmt
, ap
);
1090 if (server
.logfile
) fclose(fp
);
1093 /*====================== Hash table type implementation ==================== */
1095 /* This is an hash table type that uses the SDS dynamic strings libary as
1096 * keys and radis objects as values (objects can hold SDS strings,
1099 static void dictVanillaFree(void *privdata
, void *val
)
1101 DICT_NOTUSED(privdata
);
1105 static void dictListDestructor(void *privdata
, void *val
)
1107 DICT_NOTUSED(privdata
);
1108 listRelease((list
*)val
);
1111 static int sdsDictKeyCompare(void *privdata
, const void *key1
,
1115 DICT_NOTUSED(privdata
);
1117 l1
= sdslen((sds
)key1
);
1118 l2
= sdslen((sds
)key2
);
1119 if (l1
!= l2
) return 0;
1120 return memcmp(key1
, key2
, l1
) == 0;
1123 static void dictRedisObjectDestructor(void *privdata
, void *val
)
1125 DICT_NOTUSED(privdata
);
1127 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
1131 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1134 const robj
*o1
= key1
, *o2
= key2
;
1135 return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1138 static unsigned int dictObjHash(const void *key
) {
1139 const robj
*o
= key
;
1140 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1143 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1146 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1149 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1150 o2
->encoding
== REDIS_ENCODING_INT
)
1151 return o1
->ptr
== o2
->ptr
;
1153 o1
= getDecodedObject(o1
);
1154 o2
= getDecodedObject(o2
);
1155 cmp
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1161 static unsigned int dictEncObjHash(const void *key
) {
1162 robj
*o
= (robj
*) key
;
1164 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1165 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1167 if (o
->encoding
== REDIS_ENCODING_INT
) {
1171 len
= ll2string(buf
,32,(long)o
->ptr
);
1172 return dictGenHashFunction((unsigned char*)buf
, len
);
1176 o
= getDecodedObject(o
);
1177 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1184 /* Sets type and expires */
1185 static dictType setDictType
= {
1186 dictEncObjHash
, /* hash function */
1189 dictEncObjKeyCompare
, /* key compare */
1190 dictRedisObjectDestructor
, /* key destructor */
1191 NULL
/* val destructor */
1194 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1195 static dictType zsetDictType
= {
1196 dictEncObjHash
, /* hash function */
1199 dictEncObjKeyCompare
, /* key compare */
1200 dictRedisObjectDestructor
, /* key destructor */
1201 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1205 static dictType dbDictType
= {
1206 dictObjHash
, /* hash function */
1209 dictObjKeyCompare
, /* key compare */
1210 dictRedisObjectDestructor
, /* key destructor */
1211 dictRedisObjectDestructor
/* val destructor */
1215 static dictType keyptrDictType
= {
1216 dictObjHash
, /* hash function */
1219 dictObjKeyCompare
, /* key compare */
1220 dictRedisObjectDestructor
, /* key destructor */
1221 NULL
/* val destructor */
1224 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1225 static dictType hashDictType
= {
1226 dictEncObjHash
, /* hash function */
1229 dictEncObjKeyCompare
, /* key compare */
1230 dictRedisObjectDestructor
, /* key destructor */
1231 dictRedisObjectDestructor
/* val destructor */
1234 /* Keylist hash table type has unencoded redis objects as keys and
1235 * lists as values. It's used for blocking operations (BLPOP) and to
1236 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1237 static dictType keylistDictType
= {
1238 dictObjHash
, /* hash function */
1241 dictObjKeyCompare
, /* key compare */
1242 dictRedisObjectDestructor
, /* key destructor */
1243 dictListDestructor
/* val destructor */
1246 static void version();
1248 /* ========================= Random utility functions ======================= */
1250 /* Redis generally does not try to recover from out of memory conditions
1251 * when allocating objects or strings, it is not clear if it will be possible
1252 * to report this condition to the client since the networking layer itself
1253 * is based on heap allocation for send buffers, so we simply abort.
1254 * At least the code will be simpler to read... */
1255 static void oom(const char *msg
) {
1256 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1261 /* ====================== Redis server networking stuff ===================== */
1262 static void closeTimedoutClients(void) {
1265 time_t now
= time(NULL
);
1268 listRewind(server
.clients
,&li
);
1269 while ((ln
= listNext(&li
)) != NULL
) {
1270 c
= listNodeValue(ln
);
1271 if (server
.maxidletime
&&
1272 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1273 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1274 dictSize(c
->pubsub_channels
) == 0 && /* no timeout for pubsub */
1275 listLength(c
->pubsub_patterns
) == 0 &&
1276 (now
- c
->lastinteraction
> server
.maxidletime
))
1278 redisLog(REDIS_VERBOSE
,"Closing idle client");
1280 } else if (c
->flags
& REDIS_BLOCKED
) {
1281 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1282 addReply(c
,shared
.nullmultibulk
);
1283 unblockClientWaitingData(c
);
1289 static int htNeedsResize(dict
*dict
) {
1290 long long size
, used
;
1292 size
= dictSlots(dict
);
1293 used
= dictSize(dict
);
1294 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1295 (used
*100/size
< REDIS_HT_MINFILL
));
1298 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1299 * we resize the hash table to save memory */
1300 static void tryResizeHashTables(void) {
1303 for (j
= 0; j
< server
.dbnum
; j
++) {
1304 if (htNeedsResize(server
.db
[j
].dict
))
1305 dictResize(server
.db
[j
].dict
);
1306 if (htNeedsResize(server
.db
[j
].expires
))
1307 dictResize(server
.db
[j
].expires
);
1311 /* Our hash table implementation performs rehashing incrementally while
1312 * we write/read from the hash table. Still if the server is idle, the hash
1313 * table will use two tables for a long time. So we try to use 1 millisecond
1314 * of CPU time at every serverCron() loop in order to rehash some key. */
1315 static void incrementallyRehash(void) {
1318 for (j
= 0; j
< server
.dbnum
; j
++) {
1319 if (dictIsRehashing(server
.db
[j
].dict
)) {
1320 dictRehashMilliseconds(server
.db
[j
].dict
,1);
1321 break; /* already used our millisecond for this loop... */
1326 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1327 void backgroundSaveDoneHandler(int statloc
) {
1328 int exitcode
= WEXITSTATUS(statloc
);
1329 int bysignal
= WIFSIGNALED(statloc
);
1331 if (!bysignal
&& exitcode
== 0) {
1332 redisLog(REDIS_NOTICE
,
1333 "Background saving terminated with success");
1335 server
.lastsave
= time(NULL
);
1336 } else if (!bysignal
&& exitcode
!= 0) {
1337 redisLog(REDIS_WARNING
, "Background saving error");
1339 redisLog(REDIS_WARNING
,
1340 "Background saving terminated by signal %d", WTERMSIG(statloc
));
1341 rdbRemoveTempFile(server
.bgsavechildpid
);
1343 server
.bgsavechildpid
= -1;
1344 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1345 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1346 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1349 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1351 void backgroundRewriteDoneHandler(int statloc
) {
1352 int exitcode
= WEXITSTATUS(statloc
);
1353 int bysignal
= WIFSIGNALED(statloc
);
1355 if (!bysignal
&& exitcode
== 0) {
1359 redisLog(REDIS_NOTICE
,
1360 "Background append only file rewriting terminated with success");
1361 /* Now it's time to flush the differences accumulated by the parent */
1362 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1363 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1365 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1368 /* Flush our data... */
1369 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1370 (signed) sdslen(server
.bgrewritebuf
)) {
1371 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1375 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1376 /* Now our work is to rename the temp file into the stable file. And
1377 * switch the file descriptor used by the server for append only. */
1378 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1379 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1383 /* Mission completed... almost */
1384 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1385 if (server
.appendfd
!= -1) {
1386 /* If append only is actually enabled... */
1387 close(server
.appendfd
);
1388 server
.appendfd
= fd
;
1389 if (server
.appendfsync
!= APPENDFSYNC_NO
) aof_fsync(fd
);
1390 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1391 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1393 /* If append only is disabled we just generate a dump in this
1394 * format. Why not? */
1397 } else if (!bysignal
&& exitcode
!= 0) {
1398 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1400 redisLog(REDIS_WARNING
,
1401 "Background append only file rewriting terminated by signal %d",
1405 sdsfree(server
.bgrewritebuf
);
1406 server
.bgrewritebuf
= sdsempty();
1407 aofRemoveTempFile(server
.bgrewritechildpid
);
1408 server
.bgrewritechildpid
= -1;
1411 /* This function is called once a background process of some kind terminates,
1412 * as we want to avoid resizing the hash tables when there is a child in order
1413 * to play well with copy-on-write (otherwise when a resize happens lots of
1414 * memory pages are copied). The goal of this function is to update the ability
1415 * for dict.c to resize the hash tables accordingly to the fact we have o not
1416 * running childs. */
1417 static void updateDictResizePolicy(void) {
1418 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1)
1421 dictDisableResize();
1424 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1425 int j
, loops
= server
.cronloops
++;
1426 REDIS_NOTUSED(eventLoop
);
1428 REDIS_NOTUSED(clientData
);
1430 /* We take a cached value of the unix time in the global state because
1431 * with virtual memory and aging there is to store the current time
1432 * in objects at every object access, and accuracy is not needed.
1433 * To access a global var is faster than calling time(NULL) */
1434 server
.unixtime
= time(NULL
);
1436 /* We received a SIGTERM, shutting down here in a safe way, as it is
1437 * not ok doing so inside the signal handler. */
1438 if (server
.shutdown_asap
) {
1439 if (prepareForShutdown() == REDIS_OK
) exit(0);
1440 redisLog(REDIS_WARNING
,"SIGTERM received but errors trying to shut down the server, check the logs for more information");
1443 /* Show some info about non-empty databases */
1444 for (j
= 0; j
< server
.dbnum
; j
++) {
1445 long long size
, used
, vkeys
;
1447 size
= dictSlots(server
.db
[j
].dict
);
1448 used
= dictSize(server
.db
[j
].dict
);
1449 vkeys
= dictSize(server
.db
[j
].expires
);
1450 if (!(loops
% 50) && (used
|| vkeys
)) {
1451 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1452 /* dictPrintStats(server.dict); */
1456 /* We don't want to resize the hash tables while a bacground saving
1457 * is in progress: the saving child is created using fork() that is
1458 * implemented with a copy-on-write semantic in most modern systems, so
1459 * if we resize the HT while there is the saving child at work actually
1460 * a lot of memory movements in the parent will cause a lot of pages
1462 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1) {
1463 if (!(loops
% 10)) tryResizeHashTables();
1464 if (server
.activerehashing
) incrementallyRehash();
1467 /* Show information about connected clients */
1468 if (!(loops
% 50)) {
1469 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use",
1470 listLength(server
.clients
)-listLength(server
.slaves
),
1471 listLength(server
.slaves
),
1472 zmalloc_used_memory());
1475 /* Close connections of timedout clients */
1476 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1477 closeTimedoutClients();
1479 /* Check if a background saving or AOF rewrite in progress terminated */
1480 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1484 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1485 if (pid
== server
.bgsavechildpid
) {
1486 backgroundSaveDoneHandler(statloc
);
1488 backgroundRewriteDoneHandler(statloc
);
1490 updateDictResizePolicy();
1493 /* If there is not a background saving in progress check if
1494 * we have to save now */
1495 time_t now
= time(NULL
);
1496 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1497 struct saveparam
*sp
= server
.saveparams
+j
;
1499 if (server
.dirty
>= sp
->changes
&&
1500 now
-server
.lastsave
> sp
->seconds
) {
1501 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1502 sp
->changes
, sp
->seconds
);
1503 rdbSaveBackground(server
.dbfilename
);
1509 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1510 * will use few CPU cycles if there are few expiring keys, otherwise
1511 * it will get more aggressive to avoid that too much memory is used by
1512 * keys that can be removed from the keyspace. */
1513 for (j
= 0; j
< server
.dbnum
; j
++) {
1515 redisDb
*db
= server
.db
+j
;
1517 /* Continue to expire if at the end of the cycle more than 25%
1518 * of the keys were expired. */
1520 long num
= dictSize(db
->expires
);
1521 time_t now
= time(NULL
);
1524 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1525 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1530 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1531 t
= (time_t) dictGetEntryVal(de
);
1533 deleteKey(db
,dictGetEntryKey(de
));
1535 server
.stat_expiredkeys
++;
1538 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1541 /* Swap a few keys on disk if we are over the memory limit and VM
1542 * is enbled. Try to free objects from the free list first. */
1543 if (vmCanSwapOut()) {
1544 while (server
.vm_enabled
&& zmalloc_used_memory() >
1545 server
.vm_max_memory
)
1549 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1550 retval
= (server
.vm_max_threads
== 0) ?
1551 vmSwapOneObjectBlocking() :
1552 vmSwapOneObjectThreaded();
1553 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1554 zmalloc_used_memory() >
1555 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1557 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1559 /* Note that when using threade I/O we free just one object,
1560 * because anyway when the I/O thread in charge to swap this
1561 * object out will finish, the handler of completed jobs
1562 * will try to swap more objects if we are still out of memory. */
1563 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1567 /* Check if we should connect to a MASTER */
1568 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1569 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1570 if (syncWithMaster() == REDIS_OK
) {
1571 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1572 if (server
.appendonly
) rewriteAppendOnlyFileBackground();
1578 /* This function gets called every time Redis is entering the
1579 * main loop of the event driven library, that is, before to sleep
1580 * for ready file descriptors. */
1581 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1582 REDIS_NOTUSED(eventLoop
);
1584 /* Awake clients that got all the swapped keys they requested */
1585 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1589 listRewind(server
.io_ready_clients
,&li
);
1590 while((ln
= listNext(&li
))) {
1591 redisClient
*c
= ln
->value
;
1592 struct redisCommand
*cmd
;
1594 /* Resume the client. */
1595 listDelNode(server
.io_ready_clients
,ln
);
1596 c
->flags
&= (~REDIS_IO_WAIT
);
1597 server
.vm_blocked_clients
--;
1598 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1599 readQueryFromClient
, c
);
1600 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1601 assert(cmd
!= NULL
);
1604 /* There may be more data to process in the input buffer. */
1605 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1606 processInputBuffer(c
);
1609 /* Write the AOF buffer on disk */
1610 flushAppendOnlyFile();
1613 static void createSharedObjects(void) {
1616 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1617 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1618 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1619 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1620 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1621 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1622 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1623 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1624 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1625 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1626 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1627 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1628 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1629 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1630 "-ERR no such key\r\n"));
1631 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1632 "-ERR syntax error\r\n"));
1633 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1634 "-ERR source and destination objects are the same\r\n"));
1635 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1636 "-ERR index out of range\r\n"));
1637 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1638 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1639 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1640 shared
.select0
= createStringObject("select 0\r\n",10);
1641 shared
.select1
= createStringObject("select 1\r\n",10);
1642 shared
.select2
= createStringObject("select 2\r\n",10);
1643 shared
.select3
= createStringObject("select 3\r\n",10);
1644 shared
.select4
= createStringObject("select 4\r\n",10);
1645 shared
.select5
= createStringObject("select 5\r\n",10);
1646 shared
.select6
= createStringObject("select 6\r\n",10);
1647 shared
.select7
= createStringObject("select 7\r\n",10);
1648 shared
.select8
= createStringObject("select 8\r\n",10);
1649 shared
.select9
= createStringObject("select 9\r\n",10);
1650 shared
.messagebulk
= createStringObject("$7\r\nmessage\r\n",13);
1651 shared
.pmessagebulk
= createStringObject("$8\r\npmessage\r\n",14);
1652 shared
.subscribebulk
= createStringObject("$9\r\nsubscribe\r\n",15);
1653 shared
.unsubscribebulk
= createStringObject("$11\r\nunsubscribe\r\n",18);
1654 shared
.psubscribebulk
= createStringObject("$10\r\npsubscribe\r\n",17);
1655 shared
.punsubscribebulk
= createStringObject("$12\r\npunsubscribe\r\n",19);
1656 shared
.mbulk3
= createStringObject("*3\r\n",4);
1657 shared
.mbulk4
= createStringObject("*4\r\n",4);
1658 for (j
= 0; j
< REDIS_SHARED_INTEGERS
; j
++) {
1659 shared
.integers
[j
] = createObject(REDIS_STRING
,(void*)(long)j
);
1660 shared
.integers
[j
]->encoding
= REDIS_ENCODING_INT
;
1664 static void appendServerSaveParams(time_t seconds
, int changes
) {
1665 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1666 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1667 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1668 server
.saveparamslen
++;
1671 static void resetServerSaveParams() {
1672 zfree(server
.saveparams
);
1673 server
.saveparams
= NULL
;
1674 server
.saveparamslen
= 0;
1677 static void initServerConfig() {
1678 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1679 server
.port
= REDIS_SERVERPORT
;
1680 server
.verbosity
= REDIS_VERBOSE
;
1681 server
.maxidletime
= REDIS_MAXIDLETIME
;
1682 server
.saveparams
= NULL
;
1683 server
.logfile
= NULL
; /* NULL = log on standard output */
1684 server
.bindaddr
= NULL
;
1685 server
.glueoutputbuf
= 1;
1686 server
.daemonize
= 0;
1687 server
.appendonly
= 0;
1688 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1689 server
.no_appendfsync_on_rewrite
= 0;
1690 server
.lastfsync
= time(NULL
);
1691 server
.appendfd
= -1;
1692 server
.appendseldb
= -1; /* Make sure the first time will not match */
1693 server
.pidfile
= zstrdup("/var/run/redis.pid");
1694 server
.dbfilename
= zstrdup("dump.rdb");
1695 server
.appendfilename
= zstrdup("appendonly.aof");
1696 server
.requirepass
= NULL
;
1697 server
.rdbcompression
= 1;
1698 server
.activerehashing
= 1;
1699 server
.maxclients
= 0;
1700 server
.blpop_blocked_clients
= 0;
1701 server
.maxmemory
= 0;
1702 server
.vm_enabled
= 0;
1703 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1704 server
.vm_page_size
= 256; /* 256 bytes per page */
1705 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1706 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1707 server
.vm_max_threads
= 4;
1708 server
.vm_blocked_clients
= 0;
1709 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1710 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1711 server
.shutdown_asap
= 0;
1713 resetServerSaveParams();
1715 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1716 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1717 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1718 /* Replication related */
1720 server
.masterauth
= NULL
;
1721 server
.masterhost
= NULL
;
1722 server
.masterport
= 6379;
1723 server
.master
= NULL
;
1724 server
.replstate
= REDIS_REPL_NONE
;
1726 /* Double constants initialization */
1728 R_PosInf
= 1.0/R_Zero
;
1729 R_NegInf
= -1.0/R_Zero
;
1730 R_Nan
= R_Zero
/R_Zero
;
1733 static void initServer() {
1736 signal(SIGHUP
, SIG_IGN
);
1737 signal(SIGPIPE
, SIG_IGN
);
1738 setupSigSegvAction();
1740 server
.devnull
= fopen("/dev/null","w");
1741 if (server
.devnull
== NULL
) {
1742 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1745 server
.clients
= listCreate();
1746 server
.slaves
= listCreate();
1747 server
.monitors
= listCreate();
1748 server
.objfreelist
= listCreate();
1749 createSharedObjects();
1750 server
.el
= aeCreateEventLoop();
1751 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1752 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1753 if (server
.fd
== -1) {
1754 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1757 for (j
= 0; j
< server
.dbnum
; j
++) {
1758 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1759 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1760 server
.db
[j
].blocking_keys
= dictCreate(&keylistDictType
,NULL
);
1761 server
.db
[j
].watched_keys
= dictCreate(&keylistDictType
,NULL
);
1762 if (server
.vm_enabled
)
1763 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1764 server
.db
[j
].id
= j
;
1766 server
.pubsub_channels
= dictCreate(&keylistDictType
,NULL
);
1767 server
.pubsub_patterns
= listCreate();
1768 listSetFreeMethod(server
.pubsub_patterns
,freePubsubPattern
);
1769 listSetMatchMethod(server
.pubsub_patterns
,listMatchPubsubPattern
);
1770 server
.cronloops
= 0;
1771 server
.bgsavechildpid
= -1;
1772 server
.bgrewritechildpid
= -1;
1773 server
.bgrewritebuf
= sdsempty();
1774 server
.aofbuf
= sdsempty();
1775 server
.lastsave
= time(NULL
);
1777 server
.stat_numcommands
= 0;
1778 server
.stat_numconnections
= 0;
1779 server
.stat_expiredkeys
= 0;
1780 server
.stat_starttime
= time(NULL
);
1781 server
.unixtime
= time(NULL
);
1782 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1783 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1784 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1786 if (server
.appendonly
) {
1787 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1788 if (server
.appendfd
== -1) {
1789 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1795 if (server
.vm_enabled
) vmInit();
1798 /* Empty the whole database */
1799 static long long emptyDb() {
1801 long long removed
= 0;
1803 for (j
= 0; j
< server
.dbnum
; j
++) {
1804 removed
+= dictSize(server
.db
[j
].dict
);
1805 dictEmpty(server
.db
[j
].dict
);
1806 dictEmpty(server
.db
[j
].expires
);
1811 static int yesnotoi(char *s
) {
1812 if (!strcasecmp(s
,"yes")) return 1;
1813 else if (!strcasecmp(s
,"no")) return 0;
1817 /* I agree, this is a very rudimental way to load a configuration...
1818 will improve later if the config gets more complex */
1819 static void loadServerConfig(char *filename
) {
1821 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1825 if (filename
[0] == '-' && filename
[1] == '\0')
1828 if ((fp
= fopen(filename
,"r")) == NULL
) {
1829 redisLog(REDIS_WARNING
, "Fatal error, can't open config file '%s'", filename
);
1834 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1840 line
= sdstrim(line
," \t\r\n");
1842 /* Skip comments and blank lines*/
1843 if (line
[0] == '#' || line
[0] == '\0') {
1848 /* Split into arguments */
1849 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1850 sdstolower(argv
[0]);
1852 /* Execute config directives */
1853 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1854 server
.maxidletime
= atoi(argv
[1]);
1855 if (server
.maxidletime
< 0) {
1856 err
= "Invalid timeout value"; goto loaderr
;
1858 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1859 server
.port
= atoi(argv
[1]);
1860 if (server
.port
< 1 || server
.port
> 65535) {
1861 err
= "Invalid port"; goto loaderr
;
1863 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1864 server
.bindaddr
= zstrdup(argv
[1]);
1865 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1866 int seconds
= atoi(argv
[1]);
1867 int changes
= atoi(argv
[2]);
1868 if (seconds
< 1 || changes
< 0) {
1869 err
= "Invalid save parameters"; goto loaderr
;
1871 appendServerSaveParams(seconds
,changes
);
1872 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1873 if (chdir(argv
[1]) == -1) {
1874 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1875 argv
[1], strerror(errno
));
1878 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1879 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1880 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1881 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1882 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1884 err
= "Invalid log level. Must be one of debug, notice, warning";
1887 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1890 server
.logfile
= zstrdup(argv
[1]);
1891 if (!strcasecmp(server
.logfile
,"stdout")) {
1892 zfree(server
.logfile
);
1893 server
.logfile
= NULL
;
1895 if (server
.logfile
) {
1896 /* Test if we are able to open the file. The server will not
1897 * be able to abort just for this problem later... */
1898 logfp
= fopen(server
.logfile
,"a");
1899 if (logfp
== NULL
) {
1900 err
= sdscatprintf(sdsempty(),
1901 "Can't open the log file: %s", strerror(errno
));
1906 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1907 server
.dbnum
= atoi(argv
[1]);
1908 if (server
.dbnum
< 1) {
1909 err
= "Invalid number of databases"; goto loaderr
;
1911 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1912 loadServerConfig(argv
[1]);
1913 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1914 server
.maxclients
= atoi(argv
[1]);
1915 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1916 server
.maxmemory
= memtoll(argv
[1],NULL
);
1917 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1918 server
.masterhost
= sdsnew(argv
[1]);
1919 server
.masterport
= atoi(argv
[2]);
1920 server
.replstate
= REDIS_REPL_CONNECT
;
1921 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1922 server
.masterauth
= zstrdup(argv
[1]);
1923 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1924 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1925 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1927 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1928 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1929 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1931 } else if (!strcasecmp(argv
[0],"activerehashing") && argc
== 2) {
1932 if ((server
.activerehashing
= yesnotoi(argv
[1])) == -1) {
1933 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1935 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1936 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1937 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1939 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1940 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1941 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1943 } else if (!strcasecmp(argv
[0],"appendfilename") && argc
== 2) {
1944 zfree(server
.appendfilename
);
1945 server
.appendfilename
= zstrdup(argv
[1]);
1946 } else if (!strcasecmp(argv
[0],"no-appendfsync-on-rewrite")
1948 if ((server
.no_appendfsync_on_rewrite
= yesnotoi(argv
[1])) == -1) {
1949 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1951 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
1952 if (!strcasecmp(argv
[1],"no")) {
1953 server
.appendfsync
= APPENDFSYNC_NO
;
1954 } else if (!strcasecmp(argv
[1],"always")) {
1955 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1956 } else if (!strcasecmp(argv
[1],"everysec")) {
1957 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1959 err
= "argument must be 'no', 'always' or 'everysec'";
1962 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
1963 server
.requirepass
= zstrdup(argv
[1]);
1964 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
1965 zfree(server
.pidfile
);
1966 server
.pidfile
= zstrdup(argv
[1]);
1967 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
1968 zfree(server
.dbfilename
);
1969 server
.dbfilename
= zstrdup(argv
[1]);
1970 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
1971 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
1972 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1974 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
1975 zfree(server
.vm_swap_file
);
1976 server
.vm_swap_file
= zstrdup(argv
[1]);
1977 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
1978 server
.vm_max_memory
= memtoll(argv
[1],NULL
);
1979 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
1980 server
.vm_page_size
= memtoll(argv
[1], NULL
);
1981 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
1982 server
.vm_pages
= memtoll(argv
[1], NULL
);
1983 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1984 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1985 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
1986 server
.hash_max_zipmap_entries
= memtoll(argv
[1], NULL
);
1987 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
1988 server
.hash_max_zipmap_value
= memtoll(argv
[1], NULL
);
1990 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
1992 for (j
= 0; j
< argc
; j
++)
1997 if (fp
!= stdin
) fclose(fp
);
2001 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
2002 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
2003 fprintf(stderr
, ">>> '%s'\n", line
);
2004 fprintf(stderr
, "%s\n", err
);
2008 static void freeClientArgv(redisClient
*c
) {
2011 for (j
= 0; j
< c
->argc
; j
++)
2012 decrRefCount(c
->argv
[j
]);
2013 for (j
= 0; j
< c
->mbargc
; j
++)
2014 decrRefCount(c
->mbargv
[j
]);
2019 static void freeClient(redisClient
*c
) {
2022 /* Note that if the client we are freeing is blocked into a blocking
2023 * call, we have to set querybuf to NULL *before* to call
2024 * unblockClientWaitingData() to avoid processInputBuffer() will get
2025 * called. Also it is important to remove the file events after
2026 * this, because this call adds the READABLE event. */
2027 sdsfree(c
->querybuf
);
2029 if (c
->flags
& REDIS_BLOCKED
)
2030 unblockClientWaitingData(c
);
2032 /* UNWATCH all the keys */
2034 listRelease(c
->watched_keys
);
2035 /* Unsubscribe from all the pubsub channels */
2036 pubsubUnsubscribeAllChannels(c
,0);
2037 pubsubUnsubscribeAllPatterns(c
,0);
2038 dictRelease(c
->pubsub_channels
);
2039 listRelease(c
->pubsub_patterns
);
2040 /* Obvious cleanup */
2041 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
2042 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2043 listRelease(c
->reply
);
2046 /* Remove from the list of clients */
2047 ln
= listSearchKey(server
.clients
,c
);
2048 redisAssert(ln
!= NULL
);
2049 listDelNode(server
.clients
,ln
);
2050 /* Remove from the list of clients that are now ready to be restarted
2051 * after waiting for swapped keys */
2052 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
2053 ln
= listSearchKey(server
.io_ready_clients
,c
);
2055 listDelNode(server
.io_ready_clients
,ln
);
2056 server
.vm_blocked_clients
--;
2059 /* Remove from the list of clients waiting for swapped keys */
2060 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
2061 ln
= listFirst(c
->io_keys
);
2062 dontWaitForSwappedKey(c
,ln
->value
);
2064 listRelease(c
->io_keys
);
2065 /* Master/slave cleanup */
2066 if (c
->flags
& REDIS_SLAVE
) {
2067 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
2069 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
2070 ln
= listSearchKey(l
,c
);
2071 redisAssert(ln
!= NULL
);
2074 if (c
->flags
& REDIS_MASTER
) {
2075 server
.master
= NULL
;
2076 server
.replstate
= REDIS_REPL_CONNECT
;
2078 /* Release memory */
2081 freeClientMultiState(c
);
2085 #define GLUEREPLY_UP_TO (1024)
2086 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
2088 char buf
[GLUEREPLY_UP_TO
];
2093 listRewind(c
->reply
,&li
);
2094 while((ln
= listNext(&li
))) {
2098 objlen
= sdslen(o
->ptr
);
2099 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
2100 memcpy(buf
+copylen
,o
->ptr
,objlen
);
2102 listDelNode(c
->reply
,ln
);
2104 if (copylen
== 0) return;
2108 /* Now the output buffer is empty, add the new single element */
2109 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
2110 listAddNodeHead(c
->reply
,o
);
2113 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2114 redisClient
*c
= privdata
;
2115 int nwritten
= 0, totwritten
= 0, objlen
;
2118 REDIS_NOTUSED(mask
);
2120 /* Use writev() if we have enough buffers to send */
2121 if (!server
.glueoutputbuf
&&
2122 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
2123 !(c
->flags
& REDIS_MASTER
))
2125 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
2129 while(listLength(c
->reply
)) {
2130 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
2131 glueReplyBuffersIfNeeded(c
);
2133 o
= listNodeValue(listFirst(c
->reply
));
2134 objlen
= sdslen(o
->ptr
);
2137 listDelNode(c
->reply
,listFirst(c
->reply
));
2141 if (c
->flags
& REDIS_MASTER
) {
2142 /* Don't reply to a master */
2143 nwritten
= objlen
- c
->sentlen
;
2145 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
2146 if (nwritten
<= 0) break;
2148 c
->sentlen
+= nwritten
;
2149 totwritten
+= nwritten
;
2150 /* If we fully sent the object on head go to the next one */
2151 if (c
->sentlen
== objlen
) {
2152 listDelNode(c
->reply
,listFirst(c
->reply
));
2155 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
2156 * bytes, in a single threaded server it's a good idea to serve
2157 * other clients as well, even if a very large request comes from
2158 * super fast link that is always able to accept data (in real world
2159 * scenario think about 'KEYS *' against the loopback interfae) */
2160 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
2162 if (nwritten
== -1) {
2163 if (errno
== EAGAIN
) {
2166 redisLog(REDIS_VERBOSE
,
2167 "Error writing to client: %s", strerror(errno
));
2172 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
2173 if (listLength(c
->reply
) == 0) {
2175 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2179 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
2181 redisClient
*c
= privdata
;
2182 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
2184 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
2185 int offset
, ion
= 0;
2187 REDIS_NOTUSED(mask
);
2190 while (listLength(c
->reply
)) {
2191 offset
= c
->sentlen
;
2195 /* fill-in the iov[] array */
2196 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
2197 o
= listNodeValue(node
);
2198 objlen
= sdslen(o
->ptr
);
2200 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
2203 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2204 break; /* no more iovecs */
2206 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2207 iov
[ion
].iov_len
= objlen
- offset
;
2208 willwrite
+= objlen
- offset
;
2209 offset
= 0; /* just for the first item */
2216 /* write all collected blocks at once */
2217 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2218 if (errno
!= EAGAIN
) {
2219 redisLog(REDIS_VERBOSE
,
2220 "Error writing to client: %s", strerror(errno
));
2227 totwritten
+= nwritten
;
2228 offset
= c
->sentlen
;
2230 /* remove written robjs from c->reply */
2231 while (nwritten
&& listLength(c
->reply
)) {
2232 o
= listNodeValue(listFirst(c
->reply
));
2233 objlen
= sdslen(o
->ptr
);
2235 if(nwritten
>= objlen
- offset
) {
2236 listDelNode(c
->reply
, listFirst(c
->reply
));
2237 nwritten
-= objlen
- offset
;
2241 c
->sentlen
+= nwritten
;
2249 c
->lastinteraction
= time(NULL
);
2251 if (listLength(c
->reply
) == 0) {
2253 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2257 static int qsortRedisCommands(const void *r1
, const void *r2
) {
2259 ((struct redisCommand
*)r1
)->name
,
2260 ((struct redisCommand
*)r2
)->name
);
2263 static void sortCommandTable() {
2264 /* Copy and sort the read-only version of the command table */
2265 commandTable
= (struct redisCommand
*)malloc(sizeof(readonlyCommandTable
));
2266 memcpy(commandTable
,readonlyCommandTable
,sizeof(readonlyCommandTable
));
2268 sizeof(readonlyCommandTable
)/sizeof(struct redisCommand
),
2269 sizeof(struct redisCommand
),qsortRedisCommands
);
2272 static struct redisCommand
*lookupCommand(char *name
) {
2273 struct redisCommand tmp
= {name
,NULL
,0,0,NULL
,0,0,0};
2277 sizeof(readonlyCommandTable
)/sizeof(struct redisCommand
),
2278 sizeof(struct redisCommand
),
2279 qsortRedisCommands
);
2282 /* resetClient prepare the client to process the next command */
2283 static void resetClient(redisClient
*c
) {
2289 /* Call() is the core of Redis execution of a command */
2290 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2293 dirty
= server
.dirty
;
2295 dirty
= server
.dirty
-dirty
;
2297 if (server
.appendonly
&& dirty
)
2298 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2299 if ((dirty
|| cmd
->flags
& REDIS_CMD_FORCE_REPLICATION
) &&
2300 listLength(server
.slaves
))
2301 replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
);
2302 if (listLength(server
.monitors
))
2303 replicationFeedMonitors(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
);
2304 server
.stat_numcommands
++;
2307 /* If this function gets called we already read a whole
2308 * command, argments are in the client argv/argc fields.
2309 * processCommand() execute the command or prepare the
2310 * server for a bulk read from the client.
2312 * If 1 is returned the client is still alive and valid and
2313 * and other operations can be performed by the caller. Otherwise
2314 * if 0 is returned the client was destroied (i.e. after QUIT). */
2315 static int processCommand(redisClient
*c
) {
2316 struct redisCommand
*cmd
;
2318 /* Free some memory if needed (maxmemory setting) */
2319 if (server
.maxmemory
) freeMemoryIfNeeded();
2321 /* Handle the multi bulk command type. This is an alternative protocol
2322 * supported by Redis in order to receive commands that are composed of
2323 * multiple binary-safe "bulk" arguments. The latency of processing is
2324 * a bit higher but this allows things like multi-sets, so if this
2325 * protocol is used only for MSET and similar commands this is a big win. */
2326 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2327 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2328 if (c
->multibulk
<= 0) {
2332 decrRefCount(c
->argv
[c
->argc
-1]);
2336 } else if (c
->multibulk
) {
2337 if (c
->bulklen
== -1) {
2338 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2339 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2343 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2344 decrRefCount(c
->argv
[0]);
2345 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2347 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2352 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2356 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2357 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2361 if (c
->multibulk
== 0) {
2365 /* Here we need to swap the multi-bulk argc/argv with the
2366 * normal argc/argv of the client structure. */
2368 c
->argv
= c
->mbargv
;
2369 c
->mbargv
= auxargv
;
2372 c
->argc
= c
->mbargc
;
2373 c
->mbargc
= auxargc
;
2375 /* We need to set bulklen to something different than -1
2376 * in order for the code below to process the command without
2377 * to try to read the last argument of a bulk command as
2378 * a special argument. */
2380 /* continue below and process the command */
2387 /* -- end of multi bulk commands processing -- */
2389 /* The QUIT command is handled as a special case. Normal command
2390 * procs are unable to close the client connection safely */
2391 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2396 /* Now lookup the command and check ASAP about trivial error conditions
2397 * such wrong arity, bad command name and so forth. */
2398 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2401 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2402 (char*)c
->argv
[0]->ptr
));
2405 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2406 (c
->argc
< -cmd
->arity
)) {
2408 sdscatprintf(sdsempty(),
2409 "-ERR wrong number of arguments for '%s' command\r\n",
2413 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2414 /* This is a bulk command, we have to read the last argument yet. */
2415 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2417 decrRefCount(c
->argv
[c
->argc
-1]);
2418 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2420 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2425 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2426 /* It is possible that the bulk read is already in the
2427 * buffer. Check this condition and handle it accordingly.
2428 * This is just a fast path, alternative to call processInputBuffer().
2429 * It's a good idea since the code is small and this condition
2430 * happens most of the times. */
2431 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2432 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2434 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2436 /* Otherwise return... there is to read the last argument
2437 * from the socket. */
2441 /* Let's try to encode the bulk object to save space. */
2442 if (cmd
->flags
& REDIS_CMD_BULK
)
2443 c
->argv
[c
->argc
-1] = tryObjectEncoding(c
->argv
[c
->argc
-1]);
2445 /* Check if the user is authenticated */
2446 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2447 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2452 /* Handle the maxmemory directive */
2453 if (server
.maxmemory
&& (cmd
->flags
& REDIS_CMD_DENYOOM
) &&
2454 zmalloc_used_memory() > server
.maxmemory
)
2456 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2461 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
2462 if ((dictSize(c
->pubsub_channels
) > 0 || listLength(c
->pubsub_patterns
) > 0)
2464 cmd
->proc
!= subscribeCommand
&& cmd
->proc
!= unsubscribeCommand
&&
2465 cmd
->proc
!= psubscribeCommand
&& cmd
->proc
!= punsubscribeCommand
) {
2466 addReplySds(c
,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n"));
2471 /* Exec the command */
2472 if (c
->flags
& REDIS_MULTI
&&
2473 cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
&&
2474 cmd
->proc
!= multiCommand
&& cmd
->proc
!= watchCommand
)
2476 queueMultiCommand(c
,cmd
);
2477 addReply(c
,shared
.queued
);
2479 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2480 blockClientOnSwappedKeys(c
,cmd
)) return 1;
2484 /* Prepare the client for the next command */
2489 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
) {
2494 /* We need 1+(ARGS*3) objects since commands are using the new protocol
2495 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
2496 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
2497 robj
*static_outv
[REDIS_STATIC_ARGS
*3+1];
2500 if (argc
<= REDIS_STATIC_ARGS
) {
2503 outv
= zmalloc(sizeof(robj
*)*(argc
*3+1));
2506 lenobj
= createObject(REDIS_STRING
,
2507 sdscatprintf(sdsempty(), "*%d\r\n", argc
));
2508 lenobj
->refcount
= 0;
2509 outv
[outc
++] = lenobj
;
2510 for (j
= 0; j
< argc
; j
++) {
2511 lenobj
= createObject(REDIS_STRING
,
2512 sdscatprintf(sdsempty(),"$%lu\r\n",
2513 (unsigned long) stringObjectLen(argv
[j
])));
2514 lenobj
->refcount
= 0;
2515 outv
[outc
++] = lenobj
;
2516 outv
[outc
++] = argv
[j
];
2517 outv
[outc
++] = shared
.crlf
;
2520 /* Increment all the refcounts at start and decrement at end in order to
2521 * be sure to free objects if there is no slave in a replication state
2522 * able to be feed with commands */
2523 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2524 listRewind(slaves
,&li
);
2525 while((ln
= listNext(&li
))) {
2526 redisClient
*slave
= ln
->value
;
2528 /* Don't feed slaves that are still waiting for BGSAVE to start */
2529 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2531 /* Feed all the other slaves, MONITORs and so on */
2532 if (slave
->slaveseldb
!= dictid
) {
2536 case 0: selectcmd
= shared
.select0
; break;
2537 case 1: selectcmd
= shared
.select1
; break;
2538 case 2: selectcmd
= shared
.select2
; break;
2539 case 3: selectcmd
= shared
.select3
; break;
2540 case 4: selectcmd
= shared
.select4
; break;
2541 case 5: selectcmd
= shared
.select5
; break;
2542 case 6: selectcmd
= shared
.select6
; break;
2543 case 7: selectcmd
= shared
.select7
; break;
2544 case 8: selectcmd
= shared
.select8
; break;
2545 case 9: selectcmd
= shared
.select9
; break;
2547 selectcmd
= createObject(REDIS_STRING
,
2548 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2549 selectcmd
->refcount
= 0;
2552 addReply(slave
,selectcmd
);
2553 slave
->slaveseldb
= dictid
;
2555 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2557 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2558 if (outv
!= static_outv
) zfree(outv
);
2561 static sds
sdscatrepr(sds s
, char *p
, size_t len
) {
2562 s
= sdscatlen(s
,"\"",1);
2567 s
= sdscatprintf(s
,"\\%c",*p
);
2569 case '\n': s
= sdscatlen(s
,"\\n",1); break;
2570 case '\r': s
= sdscatlen(s
,"\\r",1); break;
2571 case '\t': s
= sdscatlen(s
,"\\t",1); break;
2572 case '\a': s
= sdscatlen(s
,"\\a",1); break;
2573 case '\b': s
= sdscatlen(s
,"\\b",1); break;
2576 s
= sdscatprintf(s
,"%c",*p
);
2578 s
= sdscatprintf(s
,"\\x%02x",(unsigned char)*p
);
2583 return sdscatlen(s
,"\"",1);
2586 static void replicationFeedMonitors(list
*monitors
, int dictid
, robj
**argv
, int argc
) {
2590 sds cmdrepr
= sdsnew("+");
2594 gettimeofday(&tv
,NULL
);
2595 cmdrepr
= sdscatprintf(cmdrepr
,"%ld.%ld ",(long)tv
.tv_sec
,(long)tv
.tv_usec
);
2596 if (dictid
!= 0) cmdrepr
= sdscatprintf(cmdrepr
,"(db %d) ", dictid
);
2598 for (j
= 0; j
< argc
; j
++) {
2599 if (argv
[j
]->encoding
== REDIS_ENCODING_INT
) {
2600 cmdrepr
= sdscatprintf(cmdrepr
, "%ld", (long)argv
[j
]->ptr
);
2602 cmdrepr
= sdscatrepr(cmdrepr
,(char*)argv
[j
]->ptr
,
2603 sdslen(argv
[j
]->ptr
));
2606 cmdrepr
= sdscatlen(cmdrepr
," ",1);
2608 cmdrepr
= sdscatlen(cmdrepr
,"\r\n",2);
2609 cmdobj
= createObject(REDIS_STRING
,cmdrepr
);
2611 listRewind(monitors
,&li
);
2612 while((ln
= listNext(&li
))) {
2613 redisClient
*monitor
= ln
->value
;
2614 addReply(monitor
,cmdobj
);
2616 decrRefCount(cmdobj
);
2619 static void processInputBuffer(redisClient
*c
) {
2621 /* Before to process the input buffer, make sure the client is not
2622 * waitig for a blocking operation such as BLPOP. Note that the first
2623 * iteration the client is never blocked, otherwise the processInputBuffer
2624 * would not be called at all, but after the execution of the first commands
2625 * in the input buffer the client may be blocked, and the "goto again"
2626 * will try to reiterate. The following line will make it return asap. */
2627 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2628 if (c
->bulklen
== -1) {
2629 /* Read the first line of the query */
2630 char *p
= strchr(c
->querybuf
,'\n');
2637 query
= c
->querybuf
;
2638 c
->querybuf
= sdsempty();
2639 querylen
= 1+(p
-(query
));
2640 if (sdslen(query
) > querylen
) {
2641 /* leave data after the first line of the query in the buffer */
2642 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2644 *p
= '\0'; /* remove "\n" */
2645 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2646 sdsupdatelen(query
);
2648 /* Now we can split the query in arguments */
2649 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2652 if (c
->argv
) zfree(c
->argv
);
2653 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2655 for (j
= 0; j
< argc
; j
++) {
2656 if (sdslen(argv
[j
])) {
2657 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2665 /* Execute the command. If the client is still valid
2666 * after processCommand() return and there is something
2667 * on the query buffer try to process the next command. */
2668 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2670 /* Nothing to process, argc == 0. Just process the query
2671 * buffer if it's not empty or return to the caller */
2672 if (sdslen(c
->querybuf
)) goto again
;
2675 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2676 redisLog(REDIS_VERBOSE
, "Client protocol error");
2681 /* Bulk read handling. Note that if we are at this point
2682 the client already sent a command terminated with a newline,
2683 we are reading the bulk data that is actually the last
2684 argument of the command. */
2685 int qbl
= sdslen(c
->querybuf
);
2687 if (c
->bulklen
<= qbl
) {
2688 /* Copy everything but the final CRLF as final argument */
2689 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2691 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2692 /* Process the command. If the client is still valid after
2693 * the processing and there is more data in the buffer
2694 * try to parse it. */
2695 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2701 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2702 redisClient
*c
= (redisClient
*) privdata
;
2703 char buf
[REDIS_IOBUF_LEN
];
2706 REDIS_NOTUSED(mask
);
2708 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2710 if (errno
== EAGAIN
) {
2713 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2717 } else if (nread
== 0) {
2718 redisLog(REDIS_VERBOSE
, "Client closed connection");
2723 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2724 c
->lastinteraction
= time(NULL
);
2728 processInputBuffer(c
);
2731 static int selectDb(redisClient
*c
, int id
) {
2732 if (id
< 0 || id
>= server
.dbnum
)
2734 c
->db
= &server
.db
[id
];
2738 static void *dupClientReplyValue(void *o
) {
2739 incrRefCount((robj
*)o
);
2743 static int listMatchObjects(void *a
, void *b
) {
2744 return equalStringObjects(a
,b
);
2747 static redisClient
*createClient(int fd
) {
2748 redisClient
*c
= zmalloc(sizeof(*c
));
2750 anetNonBlock(NULL
,fd
);
2751 anetTcpNoDelay(NULL
,fd
);
2752 if (!c
) return NULL
;
2755 c
->querybuf
= sdsempty();
2764 c
->lastinteraction
= time(NULL
);
2765 c
->authenticated
= 0;
2766 c
->replstate
= REDIS_REPL_NONE
;
2767 c
->reply
= listCreate();
2768 listSetFreeMethod(c
->reply
,decrRefCount
);
2769 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2770 c
->blocking_keys
= NULL
;
2771 c
->blocking_keys_num
= 0;
2772 c
->io_keys
= listCreate();
2773 c
->watched_keys
= listCreate();
2774 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2775 c
->pubsub_channels
= dictCreate(&setDictType
,NULL
);
2776 c
->pubsub_patterns
= listCreate();
2777 listSetFreeMethod(c
->pubsub_patterns
,decrRefCount
);
2778 listSetMatchMethod(c
->pubsub_patterns
,listMatchObjects
);
2779 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2780 readQueryFromClient
, c
) == AE_ERR
) {
2784 listAddNodeTail(server
.clients
,c
);
2785 initClientMultiState(c
);
2789 static void addReply(redisClient
*c
, robj
*obj
) {
2790 if (listLength(c
->reply
) == 0 &&
2791 (c
->replstate
== REDIS_REPL_NONE
||
2792 c
->replstate
== REDIS_REPL_ONLINE
) &&
2793 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2794 sendReplyToClient
, c
) == AE_ERR
) return;
2796 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2797 obj
= dupStringObject(obj
);
2798 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2800 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2803 static void addReplySds(redisClient
*c
, sds s
) {
2804 robj
*o
= createObject(REDIS_STRING
,s
);
2809 static void addReplyDouble(redisClient
*c
, double d
) {
2812 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2813 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2814 (unsigned long) strlen(buf
),buf
));
2817 static void addReplyLongLong(redisClient
*c
, long long ll
) {
2822 addReply(c
,shared
.czero
);
2824 } else if (ll
== 1) {
2825 addReply(c
,shared
.cone
);
2829 len
= ll2string(buf
+1,sizeof(buf
)-1,ll
);
2832 addReplySds(c
,sdsnewlen(buf
,len
+3));
2835 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2840 addReply(c
,shared
.czero
);
2842 } else if (ul
== 1) {
2843 addReply(c
,shared
.cone
);
2846 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2847 addReplySds(c
,sdsnewlen(buf
,len
));
2850 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2854 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2855 len
= sdslen(obj
->ptr
);
2857 long n
= (long)obj
->ptr
;
2859 /* Compute how many bytes will take this integer as a radix 10 string */
2865 while((n
= n
/10) != 0) {
2870 intlen
= ll2string(buf
+1,sizeof(buf
)-1,(long long)len
);
2871 buf
[intlen
+1] = '\r';
2872 buf
[intlen
+2] = '\n';
2873 addReplySds(c
,sdsnewlen(buf
,intlen
+3));
2876 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2877 addReplyBulkLen(c
,obj
);
2879 addReply(c
,shared
.crlf
);
2882 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2883 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2885 addReply(c
,shared
.nullbulk
);
2887 robj
*o
= createStringObject(s
,strlen(s
));
2893 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2898 REDIS_NOTUSED(mask
);
2899 REDIS_NOTUSED(privdata
);
2901 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2902 if (cfd
== AE_ERR
) {
2903 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2906 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2907 if ((c
= createClient(cfd
)) == NULL
) {
2908 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2909 close(cfd
); /* May be already closed, just ingore errors */
2912 /* If maxclient directive is set and this is one client more... close the
2913 * connection. Note that we create the client instead to check before
2914 * for this condition, since now the socket is already set in nonblocking
2915 * mode and we can send an error for free using the Kernel I/O */
2916 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2917 char *err
= "-ERR max number of clients reached\r\n";
2919 /* That's a best effort error message, don't check write errors */
2920 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2921 /* Nothing to do, Just to avoid the warning... */
2926 server
.stat_numconnections
++;
2929 /* ======================= Redis objects implementation ===================== */
2931 static robj
*createObject(int type
, void *ptr
) {
2934 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2935 if (listLength(server
.objfreelist
)) {
2936 listNode
*head
= listFirst(server
.objfreelist
);
2937 o
= listNodeValue(head
);
2938 listDelNode(server
.objfreelist
,head
);
2939 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2941 if (server
.vm_enabled
) {
2942 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2943 o
= zmalloc(sizeof(*o
));
2945 o
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
));
2949 o
->encoding
= REDIS_ENCODING_RAW
;
2952 if (server
.vm_enabled
) {
2953 /* Note that this code may run in the context of an I/O thread
2954 * and accessing to server.unixtime in theory is an error
2955 * (no locks). But in practice this is safe, and even if we read
2956 * garbage Redis will not fail, as it's just a statistical info */
2957 o
->vm
.atime
= server
.unixtime
;
2958 o
->storage
= REDIS_VM_MEMORY
;
2963 static robj
*createStringObject(char *ptr
, size_t len
) {
2964 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
2967 static robj
*createStringObjectFromLongLong(long long value
) {
2969 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
2970 incrRefCount(shared
.integers
[value
]);
2971 o
= shared
.integers
[value
];
2973 if (value
>= LONG_MIN
&& value
<= LONG_MAX
) {
2974 o
= createObject(REDIS_STRING
, NULL
);
2975 o
->encoding
= REDIS_ENCODING_INT
;
2976 o
->ptr
= (void*)((long)value
);
2978 o
= createObject(REDIS_STRING
,sdsfromlonglong(value
));
2984 static robj
*dupStringObject(robj
*o
) {
2985 assert(o
->encoding
== REDIS_ENCODING_RAW
);
2986 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
2989 static robj
*createListObject(void) {
2990 list
*l
= listCreate();
2992 listSetFreeMethod(l
,decrRefCount
);
2993 return createObject(REDIS_LIST
,l
);
2996 static robj
*createSetObject(void) {
2997 dict
*d
= dictCreate(&setDictType
,NULL
);
2998 return createObject(REDIS_SET
,d
);
3001 static robj
*createHashObject(void) {
3002 /* All the Hashes start as zipmaps. Will be automatically converted
3003 * into hash tables if there are enough elements or big elements
3005 unsigned char *zm
= zipmapNew();
3006 robj
*o
= createObject(REDIS_HASH
,zm
);
3007 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
3011 static robj
*createZsetObject(void) {
3012 zset
*zs
= zmalloc(sizeof(*zs
));
3014 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
3015 zs
->zsl
= zslCreate();
3016 return createObject(REDIS_ZSET
,zs
);
3019 static void freeStringObject(robj
*o
) {
3020 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3025 static void freeListObject(robj
*o
) {
3026 listRelease((list
*) o
->ptr
);
3029 static void freeSetObject(robj
*o
) {
3030 dictRelease((dict
*) o
->ptr
);
3033 static void freeZsetObject(robj
*o
) {
3036 dictRelease(zs
->dict
);
3041 static void freeHashObject(robj
*o
) {
3042 switch (o
->encoding
) {
3043 case REDIS_ENCODING_HT
:
3044 dictRelease((dict
*) o
->ptr
);
3046 case REDIS_ENCODING_ZIPMAP
:
3050 redisPanic("Unknown hash encoding type");
3055 static void incrRefCount(robj
*o
) {
3059 static void decrRefCount(void *obj
) {
3062 if (o
->refcount
<= 0) redisPanic("decrRefCount against refcount <= 0");
3063 /* Object is a key of a swapped out value, or in the process of being
3065 if (server
.vm_enabled
&&
3066 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
3068 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
);
3069 redisAssert(o
->type
== REDIS_STRING
);
3070 freeStringObject(o
);
3071 vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
);
3072 pthread_mutex_lock(&server
.obj_freelist_mutex
);
3073 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
3074 !listAddNodeHead(server
.objfreelist
,o
))
3076 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
3077 server
.vm_stats_swapped_objects
--;
3080 /* Object is in memory, or in the process of being swapped out. */
3081 if (--(o
->refcount
) == 0) {
3082 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
3083 vmCancelThreadedIOJob(obj
);
3085 case REDIS_STRING
: freeStringObject(o
); break;
3086 case REDIS_LIST
: freeListObject(o
); break;
3087 case REDIS_SET
: freeSetObject(o
); break;
3088 case REDIS_ZSET
: freeZsetObject(o
); break;
3089 case REDIS_HASH
: freeHashObject(o
); break;
3090 default: redisPanic("Unknown object type"); break;
3092 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
3093 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
3094 !listAddNodeHead(server
.objfreelist
,o
))
3096 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
3100 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
3101 dictEntry
*de
= dictFind(db
->dict
,key
);
3103 robj
*key
= dictGetEntryKey(de
);
3104 robj
*val
= dictGetEntryVal(de
);
3106 if (server
.vm_enabled
) {
3107 if (key
->storage
== REDIS_VM_MEMORY
||
3108 key
->storage
== REDIS_VM_SWAPPING
)
3110 /* If we were swapping the object out, stop it, this key
3112 if (key
->storage
== REDIS_VM_SWAPPING
)
3113 vmCancelThreadedIOJob(key
);
3114 /* Update the access time of the key for the aging algorithm. */
3115 key
->vm
.atime
= server
.unixtime
;
3117 int notify
= (key
->storage
== REDIS_VM_LOADING
);
3119 /* Our value was swapped on disk. Bring it at home. */
3120 redisAssert(val
== NULL
);
3121 val
= vmLoadObject(key
);
3122 dictGetEntryVal(de
) = val
;
3124 /* Clients blocked by the VM subsystem may be waiting for
3126 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
3135 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
3136 expireIfNeeded(db
,key
);
3137 return lookupKey(db
,key
);
3140 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
3141 deleteIfVolatile(db
,key
);
3142 touchWatchedKey(db
,key
);
3143 return lookupKey(db
,key
);
3146 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3147 robj
*o
= lookupKeyRead(c
->db
, key
);
3148 if (!o
) addReply(c
,reply
);
3152 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3153 robj
*o
= lookupKeyWrite(c
->db
, key
);
3154 if (!o
) addReply(c
,reply
);
3158 static int checkType(redisClient
*c
, robj
*o
, int type
) {
3159 if (o
->type
!= type
) {
3160 addReply(c
,shared
.wrongtypeerr
);
3166 static int deleteKey(redisDb
*db
, robj
*key
) {
3169 /* We need to protect key from destruction: after the first dictDelete()
3170 * it may happen that 'key' is no longer valid if we don't increment
3171 * it's count. This may happen when we get the object reference directly
3172 * from the hash table with dictRandomKey() or dict iterators */
3174 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
);
3175 retval
= dictDelete(db
->dict
,key
);
3178 return retval
== DICT_OK
;
3181 /* Check if the nul-terminated string 's' can be represented by a long
3182 * (that is, is a number that fits into long without any other space or
3183 * character before or after the digits).
3185 * If so, the function returns REDIS_OK and *longval is set to the value
3186 * of the number. Otherwise REDIS_ERR is returned */
3187 static int isStringRepresentableAsLong(sds s
, long *longval
) {
3188 char buf
[32], *endptr
;
3192 value
= strtol(s
, &endptr
, 10);
3193 if (endptr
[0] != '\0') return REDIS_ERR
;
3194 slen
= ll2string(buf
,32,value
);
3196 /* If the number converted back into a string is not identical
3197 * then it's not possible to encode the string as integer */
3198 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
3199 if (longval
) *longval
= value
;
3203 /* Try to encode a string object in order to save space */
3204 static robj
*tryObjectEncoding(robj
*o
) {
3208 if (o
->encoding
!= REDIS_ENCODING_RAW
)
3209 return o
; /* Already encoded */
3211 /* It's not safe to encode shared objects: shared objects can be shared
3212 * everywhere in the "object space" of Redis. Encoded objects can only
3213 * appear as "values" (and not, for instance, as keys) */
3214 if (o
->refcount
> 1) return o
;
3216 /* Currently we try to encode only strings */
3217 redisAssert(o
->type
== REDIS_STRING
);
3219 /* Check if we can represent this string as a long integer */
3220 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return o
;
3222 /* Ok, this object can be encoded */
3223 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
3225 incrRefCount(shared
.integers
[value
]);
3226 return shared
.integers
[value
];
3228 o
->encoding
= REDIS_ENCODING_INT
;
3230 o
->ptr
= (void*) value
;
3235 /* Get a decoded version of an encoded object (returned as a new object).
3236 * If the object is already raw-encoded just increment the ref count. */
3237 static robj
*getDecodedObject(robj
*o
) {
3240 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3244 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
3247 ll2string(buf
,32,(long)o
->ptr
);
3248 dec
= createStringObject(buf
,strlen(buf
));
3251 redisPanic("Unknown encoding type");
3255 /* Compare two string objects via strcmp() or alike.
3256 * Note that the objects may be integer-encoded. In such a case we
3257 * use ll2string() to get a string representation of the numbers on the stack
3258 * and compare the strings, it's much faster than calling getDecodedObject().
3260 * Important note: if objects are not integer encoded, but binary-safe strings,
3261 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
3263 static int compareStringObjects(robj
*a
, robj
*b
) {
3264 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
3265 char bufa
[128], bufb
[128], *astr
, *bstr
;
3268 if (a
== b
) return 0;
3269 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
3270 ll2string(bufa
,sizeof(bufa
),(long) a
->ptr
);
3276 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
3277 ll2string(bufb
,sizeof(bufb
),(long) b
->ptr
);
3283 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
3286 /* Equal string objects return 1 if the two objects are the same from the
3287 * point of view of a string comparison, otherwise 0 is returned. Note that
3288 * this function is faster then checking for (compareStringObject(a,b) == 0)
3289 * because it can perform some more optimization. */
3290 static int equalStringObjects(robj
*a
, robj
*b
) {
3291 if (a
->encoding
!= REDIS_ENCODING_RAW
&& b
->encoding
!= REDIS_ENCODING_RAW
){
3292 return a
->ptr
== b
->ptr
;
3294 return compareStringObjects(a
,b
) == 0;
3298 static size_t stringObjectLen(robj
*o
) {
3299 redisAssert(o
->type
== REDIS_STRING
);
3300 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3301 return sdslen(o
->ptr
);
3305 return ll2string(buf
,32,(long)o
->ptr
);
3309 static int getDoubleFromObject(robj
*o
, double *target
) {
3316 redisAssert(o
->type
== REDIS_STRING
);
3317 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3318 value
= strtod(o
->ptr
, &eptr
);
3319 if (eptr
[0] != '\0') return REDIS_ERR
;
3320 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3321 value
= (long)o
->ptr
;
3323 redisPanic("Unknown string encoding");
3331 static int getDoubleFromObjectOrReply(redisClient
*c
, robj
*o
, double *target
, const char *msg
) {
3333 if (getDoubleFromObject(o
, &value
) != REDIS_OK
) {
3335 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3337 addReplySds(c
, sdsnew("-ERR value is not a double\r\n"));
3346 static int getLongLongFromObject(robj
*o
, long long *target
) {
3353 redisAssert(o
->type
== REDIS_STRING
);
3354 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3355 value
= strtoll(o
->ptr
, &eptr
, 10);
3356 if (eptr
[0] != '\0') return REDIS_ERR
;
3357 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3358 value
= (long)o
->ptr
;
3360 redisPanic("Unknown string encoding");
3368 static int getLongLongFromObjectOrReply(redisClient
*c
, robj
*o
, long long *target
, const char *msg
) {
3370 if (getLongLongFromObject(o
, &value
) != REDIS_OK
) {
3372 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3374 addReplySds(c
, sdsnew("-ERR value is not an integer\r\n"));
3383 static int getLongFromObjectOrReply(redisClient
*c
, robj
*o
, long *target
, const char *msg
) {
3386 if (getLongLongFromObjectOrReply(c
, o
, &value
, msg
) != REDIS_OK
) return REDIS_ERR
;
3387 if (value
< LONG_MIN
|| value
> LONG_MAX
) {
3389 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3391 addReplySds(c
, sdsnew("-ERR value is out of range\r\n"));
3400 /*============================ RDB saving/loading =========================== */
3402 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3403 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3407 static int rdbSaveTime(FILE *fp
, time_t t
) {
3408 int32_t t32
= (int32_t) t
;
3409 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3413 /* check rdbLoadLen() comments for more info */
3414 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3415 unsigned char buf
[2];
3418 /* Save a 6 bit len */
3419 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3420 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3421 } else if (len
< (1<<14)) {
3422 /* Save a 14 bit len */
3423 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3425 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3427 /* Save a 32 bit len */
3428 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3429 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3431 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3436 /* Encode 'value' as an integer if possible (if integer will fit the
3437 * supported range). If the function sucessful encoded the integer
3438 * then the (up to 5 bytes) encoded representation is written in the
3439 * string pointed by 'enc' and the length is returned. Otherwise
3441 static int rdbEncodeInteger(long long value
, unsigned char *enc
) {
3442 /* Finally check if it fits in our ranges */
3443 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3444 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3445 enc
[1] = value
&0xFF;
3447 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3448 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3449 enc
[1] = value
&0xFF;
3450 enc
[2] = (value
>>8)&0xFF;
3452 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3453 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3454 enc
[1] = value
&0xFF;
3455 enc
[2] = (value
>>8)&0xFF;
3456 enc
[3] = (value
>>16)&0xFF;
3457 enc
[4] = (value
>>24)&0xFF;
3464 /* String objects in the form "2391" "-100" without any space and with a
3465 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3466 * encoded as integers to save space */
3467 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3469 char *endptr
, buf
[32];
3471 /* Check if it's possible to encode this value as a number */
3472 value
= strtoll(s
, &endptr
, 10);
3473 if (endptr
[0] != '\0') return 0;
3474 ll2string(buf
,32,value
);
3476 /* If the number converted back into a string is not identical
3477 * then it's not possible to encode the string as integer */
3478 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3480 return rdbEncodeInteger(value
,enc
);
3483 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3484 size_t comprlen
, outlen
;
3488 /* We require at least four bytes compression for this to be worth it */
3489 if (len
<= 4) return 0;
3491 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3492 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3493 if (comprlen
== 0) {
3497 /* Data compressed! Let's save it on disk */
3498 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3499 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3500 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3501 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3502 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3511 /* Save a string objet as [len][data] on disk. If the object is a string
3512 * representation of an integer value we try to safe it in a special form */
3513 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3516 /* Try integer encoding */
3518 unsigned char buf
[5];
3519 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3520 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3525 /* Try LZF compression - under 20 bytes it's unable to compress even
3526 * aaaaaaaaaaaaaaaaaa so skip it */
3527 if (server
.rdbcompression
&& len
> 20) {
3530 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3531 if (retval
== -1) return -1;
3532 if (retval
> 0) return 0;
3533 /* retval == 0 means data can't be compressed, save the old way */
3536 /* Store verbatim */
3537 if (rdbSaveLen(fp
,len
) == -1) return -1;
3538 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3542 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3543 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3546 /* Avoid to decode the object, then encode it again, if the
3547 * object is alrady integer encoded. */
3548 if (obj
->encoding
== REDIS_ENCODING_INT
) {
3549 long val
= (long) obj
->ptr
;
3550 unsigned char buf
[5];
3553 if ((enclen
= rdbEncodeInteger(val
,buf
)) > 0) {
3554 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3557 /* otherwise... fall throught and continue with the usual
3561 /* Avoid incr/decr ref count business when possible.
3562 * This plays well with copy-on-write given that we are probably
3563 * in a child process (BGSAVE). Also this makes sure key objects
3564 * of swapped objects are not incRefCount-ed (an assert does not allow
3565 * this in order to avoid bugs) */
3566 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
3567 obj
= getDecodedObject(obj
);
3568 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3571 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3576 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3577 * 8 bit integer specifing the length of the representation.
3578 * This 8 bit integer has special values in order to specify the following
3584 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3585 unsigned char buf
[128];
3591 } else if (!isfinite(val
)) {
3593 buf
[0] = (val
< 0) ? 255 : 254;
3595 #if (DBL_MANT_DIG >= 52) && (LLONG_MAX == 0x7fffffffffffffffLL)
3596 /* Check if the float is in a safe range to be casted into a
3597 * long long. We are assuming that long long is 64 bit here.
3598 * Also we are assuming that there are no implementations around where
3599 * double has precision < 52 bit.
3601 * Under this assumptions we test if a double is inside an interval
3602 * where casting to long long is safe. Then using two castings we
3603 * make sure the decimal part is zero. If all this is true we use
3604 * integer printing function that is much faster. */
3605 double min
= -4503599627370495; /* (2^52)-1 */
3606 double max
= 4503599627370496; /* -(2^52) */
3607 if (val
> min
&& val
< max
&& val
== ((double)((long long)val
)))
3608 ll2string((char*)buf
+1,sizeof(buf
),(long long)val
);
3611 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3612 buf
[0] = strlen((char*)buf
+1);
3615 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3619 /* Save a Redis object. */
3620 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3621 if (o
->type
== REDIS_STRING
) {
3622 /* Save a string value */
3623 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3624 } else if (o
->type
== REDIS_LIST
) {
3625 /* Save a list value */
3626 list
*list
= o
->ptr
;
3630 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3631 listRewind(list
,&li
);
3632 while((ln
= listNext(&li
))) {
3633 robj
*eleobj
= listNodeValue(ln
);
3635 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3637 } else if (o
->type
== REDIS_SET
) {
3638 /* Save a set value */
3640 dictIterator
*di
= dictGetIterator(set
);
3643 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3644 while((de
= dictNext(di
)) != NULL
) {
3645 robj
*eleobj
= dictGetEntryKey(de
);
3647 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3649 dictReleaseIterator(di
);
3650 } else if (o
->type
== REDIS_ZSET
) {
3651 /* Save a set value */
3653 dictIterator
*di
= dictGetIterator(zs
->dict
);
3656 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3657 while((de
= dictNext(di
)) != NULL
) {
3658 robj
*eleobj
= dictGetEntryKey(de
);
3659 double *score
= dictGetEntryVal(de
);
3661 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3662 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3664 dictReleaseIterator(di
);
3665 } else if (o
->type
== REDIS_HASH
) {
3666 /* Save a hash value */
3667 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3668 unsigned char *p
= zipmapRewind(o
->ptr
);
3669 unsigned int count
= zipmapLen(o
->ptr
);
3670 unsigned char *key
, *val
;
3671 unsigned int klen
, vlen
;
3673 if (rdbSaveLen(fp
,count
) == -1) return -1;
3674 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3675 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3676 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3679 dictIterator
*di
= dictGetIterator(o
->ptr
);
3682 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3683 while((de
= dictNext(di
)) != NULL
) {
3684 robj
*key
= dictGetEntryKey(de
);
3685 robj
*val
= dictGetEntryVal(de
);
3687 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3688 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3690 dictReleaseIterator(di
);
3693 redisPanic("Unknown object type");
3698 /* Return the length the object will have on disk if saved with
3699 * the rdbSaveObject() function. Currently we use a trick to get
3700 * this length with very little changes to the code. In the future
3701 * we could switch to a faster solution. */
3702 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3703 if (fp
== NULL
) fp
= server
.devnull
;
3705 assert(rdbSaveObject(fp
,o
) != 1);
3709 /* Return the number of pages required to save this object in the swap file */
3710 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3711 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3713 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3716 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3717 static int rdbSave(char *filename
) {
3718 dictIterator
*di
= NULL
;
3723 time_t now
= time(NULL
);
3725 /* Wait for I/O therads to terminate, just in case this is a
3726 * foreground-saving, to avoid seeking the swap file descriptor at the
3728 if (server
.vm_enabled
)
3729 waitEmptyIOJobsQueue();
3731 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3732 fp
= fopen(tmpfile
,"w");
3734 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3737 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3738 for (j
= 0; j
< server
.dbnum
; j
++) {
3739 redisDb
*db
= server
.db
+j
;
3741 if (dictSize(d
) == 0) continue;
3742 di
= dictGetIterator(d
);
3748 /* Write the SELECT DB opcode */
3749 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3750 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3752 /* Iterate this DB writing every entry */
3753 while((de
= dictNext(di
)) != NULL
) {
3754 robj
*key
= dictGetEntryKey(de
);
3755 robj
*o
= dictGetEntryVal(de
);
3756 time_t expiretime
= getExpire(db
,key
);
3758 /* Save the expire time */
3759 if (expiretime
!= -1) {
3760 /* If this key is already expired skip it */
3761 if (expiretime
< now
) continue;
3762 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3763 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3765 /* Save the key and associated value. This requires special
3766 * handling if the value is swapped out. */
3767 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
3768 key
->storage
== REDIS_VM_SWAPPING
) {
3769 /* Save type, key, value */
3770 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3771 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3772 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3774 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3776 /* Get a preview of the object in memory */
3777 po
= vmPreviewObject(key
);
3778 /* Save type, key, value */
3779 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
;
3780 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3781 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3782 /* Remove the loaded object from memory */
3786 dictReleaseIterator(di
);
3789 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3791 /* Make sure data will not remain on the OS's output buffers */
3796 /* Use RENAME to make sure the DB file is changed atomically only
3797 * if the generate DB file is ok. */
3798 if (rename(tmpfile
,filename
) == -1) {
3799 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3803 redisLog(REDIS_NOTICE
,"DB saved on disk");
3805 server
.lastsave
= time(NULL
);
3811 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3812 if (di
) dictReleaseIterator(di
);
3816 static int rdbSaveBackground(char *filename
) {
3819 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3820 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3821 if ((childpid
= fork()) == 0) {
3823 if (server
.vm_enabled
) vmReopenSwapFile();
3825 if (rdbSave(filename
) == REDIS_OK
) {
3832 if (childpid
== -1) {
3833 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3837 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3838 server
.bgsavechildpid
= childpid
;
3839 updateDictResizePolicy();
3842 return REDIS_OK
; /* unreached */
3845 static void rdbRemoveTempFile(pid_t childpid
) {
3848 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
3852 static int rdbLoadType(FILE *fp
) {
3854 if (fread(&type
,1,1,fp
) == 0) return -1;
3858 static time_t rdbLoadTime(FILE *fp
) {
3860 if (fread(&t32
,4,1,fp
) == 0) return -1;
3861 return (time_t) t32
;
3864 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3865 * of this file for a description of how this are stored on disk.
3867 * isencoded is set to 1 if the readed length is not actually a length but
3868 * an "encoding type", check the above comments for more info */
3869 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
3870 unsigned char buf
[2];
3874 if (isencoded
) *isencoded
= 0;
3875 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3876 type
= (buf
[0]&0xC0)>>6;
3877 if (type
== REDIS_RDB_6BITLEN
) {
3878 /* Read a 6 bit len */
3880 } else if (type
== REDIS_RDB_ENCVAL
) {
3881 /* Read a 6 bit len encoding type */
3882 if (isencoded
) *isencoded
= 1;
3884 } else if (type
== REDIS_RDB_14BITLEN
) {
3885 /* Read a 14 bit len */
3886 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3887 return ((buf
[0]&0x3F)<<8)|buf
[1];
3889 /* Read a 32 bit len */
3890 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
3895 /* Load an integer-encoded object from file 'fp', with the specified
3896 * encoding type 'enctype'. If encode is true the function may return
3897 * an integer-encoded object as reply, otherwise the returned object
3898 * will always be encoded as a raw string. */
3899 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
, int encode
) {
3900 unsigned char enc
[4];
3903 if (enctype
== REDIS_RDB_ENC_INT8
) {
3904 if (fread(enc
,1,1,fp
) == 0) return NULL
;
3905 val
= (signed char)enc
[0];
3906 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
3908 if (fread(enc
,2,1,fp
) == 0) return NULL
;
3909 v
= enc
[0]|(enc
[1]<<8);
3911 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
3913 if (fread(enc
,4,1,fp
) == 0) return NULL
;
3914 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
3917 val
= 0; /* anti-warning */
3918 redisPanic("Unknown RDB integer encoding type");
3921 return createStringObjectFromLongLong(val
);
3923 return createObject(REDIS_STRING
,sdsfromlonglong(val
));
3926 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
3927 unsigned int len
, clen
;
3928 unsigned char *c
= NULL
;
3931 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3932 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3933 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
3934 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
3935 if (fread(c
,clen
,1,fp
) == 0) goto err
;
3936 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
3938 return createObject(REDIS_STRING
,val
);
3945 static robj
*rdbGenericLoadStringObject(FILE*fp
, int encode
) {
3950 len
= rdbLoadLen(fp
,&isencoded
);
3953 case REDIS_RDB_ENC_INT8
:
3954 case REDIS_RDB_ENC_INT16
:
3955 case REDIS_RDB_ENC_INT32
:
3956 return rdbLoadIntegerObject(fp
,len
,encode
);
3957 case REDIS_RDB_ENC_LZF
:
3958 return rdbLoadLzfStringObject(fp
);
3960 redisPanic("Unknown RDB encoding type");
3964 if (len
== REDIS_RDB_LENERR
) return NULL
;
3965 val
= sdsnewlen(NULL
,len
);
3966 if (len
&& fread(val
,len
,1,fp
) == 0) {
3970 return createObject(REDIS_STRING
,val
);
3973 static robj
*rdbLoadStringObject(FILE *fp
) {
3974 return rdbGenericLoadStringObject(fp
,0);
3977 static robj
*rdbLoadEncodedStringObject(FILE *fp
) {
3978 return rdbGenericLoadStringObject(fp
,1);
3981 /* For information about double serialization check rdbSaveDoubleValue() */
3982 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
3986 if (fread(&len
,1,1,fp
) == 0) return -1;
3988 case 255: *val
= R_NegInf
; return 0;
3989 case 254: *val
= R_PosInf
; return 0;
3990 case 253: *val
= R_Nan
; return 0;
3992 if (fread(buf
,len
,1,fp
) == 0) return -1;
3994 sscanf(buf
, "%lg", val
);
3999 /* Load a Redis object of the specified type from the specified file.
4000 * On success a newly allocated object is returned, otherwise NULL. */
4001 static robj
*rdbLoadObject(int type
, FILE *fp
) {
4004 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
4005 if (type
== REDIS_STRING
) {
4006 /* Read string value */
4007 if ((o
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4008 o
= tryObjectEncoding(o
);
4009 } else if (type
== REDIS_LIST
|| type
== REDIS_SET
) {
4010 /* Read list/set value */
4013 if ((listlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4014 o
= (type
== REDIS_LIST
) ? createListObject() : createSetObject();
4015 /* It's faster to expand the dict to the right size asap in order
4016 * to avoid rehashing */
4017 if (type
== REDIS_SET
&& listlen
> DICT_HT_INITIAL_SIZE
)
4018 dictExpand(o
->ptr
,listlen
);
4019 /* Load every single element of the list/set */
4023 if ((ele
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4024 ele
= tryObjectEncoding(ele
);
4025 if (type
== REDIS_LIST
) {
4026 listAddNodeTail((list
*)o
->ptr
,ele
);
4028 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
4031 } else if (type
== REDIS_ZSET
) {
4032 /* Read list/set value */
4036 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4037 o
= createZsetObject();
4039 /* Load every single element of the list/set */
4042 double *score
= zmalloc(sizeof(double));
4044 if ((ele
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4045 ele
= tryObjectEncoding(ele
);
4046 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
4047 dictAdd(zs
->dict
,ele
,score
);
4048 zslInsert(zs
->zsl
,*score
,ele
);
4049 incrRefCount(ele
); /* added to skiplist */
4051 } else if (type
== REDIS_HASH
) {
4054 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4055 o
= createHashObject();
4056 /* Too many entries? Use an hash table. */
4057 if (hashlen
> server
.hash_max_zipmap_entries
)
4058 convertToRealHash(o
);
4059 /* Load every key/value, then set it into the zipmap or hash
4060 * table, as needed. */
4064 if ((key
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
4065 if ((val
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
4066 /* If we are using a zipmap and there are too big values
4067 * the object is converted to real hash table encoding. */
4068 if (o
->encoding
!= REDIS_ENCODING_HT
&&
4069 (sdslen(key
->ptr
) > server
.hash_max_zipmap_value
||
4070 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
))
4072 convertToRealHash(o
);
4075 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
4076 unsigned char *zm
= o
->ptr
;
4078 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
4079 val
->ptr
,sdslen(val
->ptr
),NULL
);
4084 key
= tryObjectEncoding(key
);
4085 val
= tryObjectEncoding(val
);
4086 dictAdd((dict
*)o
->ptr
,key
,val
);
4090 redisPanic("Unknown object type");
4095 static int rdbLoad(char *filename
) {
4098 int type
, retval
, rdbver
;
4099 int swap_all_values
= 0;
4100 dict
*d
= server
.db
[0].dict
;
4101 redisDb
*db
= server
.db
+0;
4103 time_t expiretime
, now
= time(NULL
);
4104 long long loadedkeys
= 0;
4106 fp
= fopen(filename
,"r");
4107 if (!fp
) return REDIS_ERR
;
4108 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
4110 if (memcmp(buf
,"REDIS",5) != 0) {
4112 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
4115 rdbver
= atoi(buf
+5);
4118 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
4127 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
4128 if (type
== REDIS_EXPIRETIME
) {
4129 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
4130 /* We read the time so we need to read the object type again */
4131 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
4133 if (type
== REDIS_EOF
) break;
4134 /* Handle SELECT DB opcode as a special case */
4135 if (type
== REDIS_SELECTDB
) {
4136 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
4138 if (dbid
>= (unsigned)server
.dbnum
) {
4139 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
4142 db
= server
.db
+dbid
;
4147 if ((key
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
4149 if ((val
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
4150 /* Check if the key already expired */
4151 if (expiretime
!= -1 && expiretime
< now
) {
4156 /* Add the new object in the hash table */
4157 retval
= dictAdd(d
,key
,val
);
4158 if (retval
== DICT_ERR
) {
4159 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", key
->ptr
);
4163 /* Set the expire time if needed */
4164 if (expiretime
!= -1) setExpire(db
,key
,expiretime
);
4166 /* Handle swapping while loading big datasets when VM is on */
4168 /* If we detecter we are hopeless about fitting something in memory
4169 * we just swap every new key on disk. Directly...
4170 * Note that's important to check for this condition before resorting
4171 * to random sampling, otherwise we may try to swap already
4173 if (swap_all_values
) {
4174 dictEntry
*de
= dictFind(d
,key
);
4176 /* de may be NULL since the key already expired */
4178 key
= dictGetEntryKey(de
);
4179 val
= dictGetEntryVal(de
);
4181 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
4182 dictGetEntryVal(de
) = NULL
;
4189 if ((zmalloc_used_memory() - server
.vm_max_memory
) > 1024*1024*32)
4192 /* If we have still some hope of having some value fitting memory
4193 * then we try random sampling. */
4194 if (!swap_all_values
&& server
.vm_enabled
&& force_swapout
) {
4195 while (zmalloc_used_memory() > server
.vm_max_memory
) {
4196 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
4198 if (zmalloc_used_memory() > server
.vm_max_memory
)
4199 swap_all_values
= 1; /* We are already using too much mem */
4205 eoferr
: /* unexpected end of file is handled here with a fatal exit */
4206 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
4208 return REDIS_ERR
; /* Just to avoid warning */
4211 /*================================== Shutdown =============================== */
4212 static int prepareForShutdown() {
4213 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4214 /* Kill the saving child if there is a background saving in progress.
4215 We want to avoid race conditions, for instance our saving child may
4216 overwrite the synchronous saving did by SHUTDOWN. */
4217 if (server
.bgsavechildpid
!= -1) {
4218 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4219 kill(server
.bgsavechildpid
,SIGKILL
);
4220 rdbRemoveTempFile(server
.bgsavechildpid
);
4222 if (server
.appendonly
) {
4223 /* Append only file: fsync() the AOF and exit */
4224 aof_fsync(server
.appendfd
);
4225 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4227 /* Snapshotting. Perform a SYNC SAVE and exit */
4228 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4229 if (server
.daemonize
)
4230 unlink(server
.pidfile
);
4231 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4233 /* Ooops.. error saving! The best we can do is to continue
4234 * operating. Note that if there was a background saving process,
4235 * in the next cron() Redis will be notified that the background
4236 * saving aborted, handling special stuff like slaves pending for
4237 * synchronization... */
4238 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4242 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4246 /*================================== Commands =============================== */
4248 static void authCommand(redisClient
*c
) {
4249 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
4250 c
->authenticated
= 1;
4251 addReply(c
,shared
.ok
);
4253 c
->authenticated
= 0;
4254 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
4258 static void pingCommand(redisClient
*c
) {
4259 addReply(c
,shared
.pong
);
4262 static void echoCommand(redisClient
*c
) {
4263 addReplyBulk(c
,c
->argv
[1]);
4266 /*=================================== Strings =============================== */
4268 static void setGenericCommand(redisClient
*c
, int nx
, robj
*key
, robj
*val
, robj
*expire
) {
4270 long seconds
= 0; /* initialized to avoid an harmness warning */
4273 if (getLongFromObjectOrReply(c
, expire
, &seconds
, NULL
) != REDIS_OK
)
4276 addReplySds(c
,sdsnew("-ERR invalid expire time in SETEX\r\n"));
4281 touchWatchedKey(c
->db
,key
);
4282 if (nx
) deleteIfVolatile(c
->db
,key
);
4283 retval
= dictAdd(c
->db
->dict
,key
,val
);
4284 if (retval
== DICT_ERR
) {
4286 /* If the key is about a swapped value, we want a new key object
4287 * to overwrite the old. So we delete the old key in the database.
4288 * This will also make sure that swap pages about the old object
4289 * will be marked as free. */
4290 if (server
.vm_enabled
&& deleteIfSwapped(c
->db
,key
))
4292 dictReplace(c
->db
->dict
,key
,val
);
4295 addReply(c
,shared
.czero
);
4303 removeExpire(c
->db
,key
);
4304 if (expire
) setExpire(c
->db
,key
,time(NULL
)+seconds
);
4305 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4308 static void setCommand(redisClient
*c
) {
4309 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[2],NULL
);
4312 static void setnxCommand(redisClient
*c
) {
4313 setGenericCommand(c
,1,c
->argv
[1],c
->argv
[2],NULL
);
4316 static void setexCommand(redisClient
*c
) {
4317 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[3],c
->argv
[2]);
4320 static int getGenericCommand(redisClient
*c
) {
4323 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
4326 if (o
->type
!= REDIS_STRING
) {
4327 addReply(c
,shared
.wrongtypeerr
);
4335 static void getCommand(redisClient
*c
) {
4336 getGenericCommand(c
);
4339 static void getsetCommand(redisClient
*c
) {
4340 if (getGenericCommand(c
) == REDIS_ERR
) return;
4341 if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) {
4342 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4344 incrRefCount(c
->argv
[1]);
4346 incrRefCount(c
->argv
[2]);
4348 removeExpire(c
->db
,c
->argv
[1]);
4351 static void mgetCommand(redisClient
*c
) {
4354 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
4355 for (j
= 1; j
< c
->argc
; j
++) {
4356 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
4358 addReply(c
,shared
.nullbulk
);
4360 if (o
->type
!= REDIS_STRING
) {
4361 addReply(c
,shared
.nullbulk
);
4369 static void msetGenericCommand(redisClient
*c
, int nx
) {
4370 int j
, busykeys
= 0;
4372 if ((c
->argc
% 2) == 0) {
4373 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
4376 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
4377 * set nothing at all if at least one already key exists. */
4379 for (j
= 1; j
< c
->argc
; j
+= 2) {
4380 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
4386 addReply(c
, shared
.czero
);
4390 for (j
= 1; j
< c
->argc
; j
+= 2) {
4393 c
->argv
[j
+1] = tryObjectEncoding(c
->argv
[j
+1]);
4394 retval
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
4395 if (retval
== DICT_ERR
) {
4396 dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
4397 incrRefCount(c
->argv
[j
+1]);
4399 incrRefCount(c
->argv
[j
]);
4400 incrRefCount(c
->argv
[j
+1]);
4402 removeExpire(c
->db
,c
->argv
[j
]);
4404 server
.dirty
+= (c
->argc
-1)/2;
4405 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4408 static void msetCommand(redisClient
*c
) {
4409 msetGenericCommand(c
,0);
4412 static void msetnxCommand(redisClient
*c
) {
4413 msetGenericCommand(c
,1);
4416 static void incrDecrCommand(redisClient
*c
, long long incr
) {
4421 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4422 if (o
!= NULL
&& checkType(c
,o
,REDIS_STRING
)) return;
4423 if (getLongLongFromObjectOrReply(c
,o
,&value
,NULL
) != REDIS_OK
) return;
4426 o
= createStringObjectFromLongLong(value
);
4427 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],o
);
4428 if (retval
== DICT_ERR
) {
4429 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4430 removeExpire(c
->db
,c
->argv
[1]);
4432 incrRefCount(c
->argv
[1]);
4435 addReply(c
,shared
.colon
);
4437 addReply(c
,shared
.crlf
);
4440 static void incrCommand(redisClient
*c
) {
4441 incrDecrCommand(c
,1);
4444 static void decrCommand(redisClient
*c
) {
4445 incrDecrCommand(c
,-1);
4448 static void incrbyCommand(redisClient
*c
) {
4451 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4452 incrDecrCommand(c
,incr
);
4455 static void decrbyCommand(redisClient
*c
) {
4458 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4459 incrDecrCommand(c
,-incr
);
4462 static void appendCommand(redisClient
*c
) {
4467 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4469 /* Create the key */
4470 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4471 incrRefCount(c
->argv
[1]);
4472 incrRefCount(c
->argv
[2]);
4473 totlen
= stringObjectLen(c
->argv
[2]);
4477 de
= dictFind(c
->db
->dict
,c
->argv
[1]);
4480 o
= dictGetEntryVal(de
);
4481 if (o
->type
!= REDIS_STRING
) {
4482 addReply(c
,shared
.wrongtypeerr
);
4485 /* If the object is specially encoded or shared we have to make
4487 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
4488 robj
*decoded
= getDecodedObject(o
);
4490 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
4491 decrRefCount(decoded
);
4492 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4495 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
4496 o
->ptr
= sdscatlen(o
->ptr
,
4497 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
4499 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
4500 (unsigned long) c
->argv
[2]->ptr
);
4502 totlen
= sdslen(o
->ptr
);
4505 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
4508 static void substrCommand(redisClient
*c
) {
4510 long start
= atoi(c
->argv
[2]->ptr
);
4511 long end
= atoi(c
->argv
[3]->ptr
);
4512 size_t rangelen
, strlen
;
4515 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4516 checkType(c
,o
,REDIS_STRING
)) return;
4518 o
= getDecodedObject(o
);
4519 strlen
= sdslen(o
->ptr
);
4521 /* convert negative indexes */
4522 if (start
< 0) start
= strlen
+start
;
4523 if (end
< 0) end
= strlen
+end
;
4524 if (start
< 0) start
= 0;
4525 if (end
< 0) end
= 0;
4527 /* indexes sanity checks */
4528 if (start
> end
|| (size_t)start
>= strlen
) {
4529 /* Out of range start or start > end result in null reply */
4530 addReply(c
,shared
.nullbulk
);
4534 if ((size_t)end
>= strlen
) end
= strlen
-1;
4535 rangelen
= (end
-start
)+1;
4537 /* Return the result */
4538 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4539 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4540 addReplySds(c
,range
);
4541 addReply(c
,shared
.crlf
);
4545 /* ========================= Type agnostic commands ========================= */
4547 static void delCommand(redisClient
*c
) {
4550 for (j
= 1; j
< c
->argc
; j
++) {
4551 if (deleteKey(c
->db
,c
->argv
[j
])) {
4552 touchWatchedKey(c
->db
,c
->argv
[j
]);
4557 addReplyLongLong(c
,deleted
);
4560 static void existsCommand(redisClient
*c
) {
4561 expireIfNeeded(c
->db
,c
->argv
[1]);
4562 if (dictFind(c
->db
->dict
,c
->argv
[1])) {
4563 addReply(c
, shared
.cone
);
4565 addReply(c
, shared
.czero
);
4569 static void selectCommand(redisClient
*c
) {
4570 int id
= atoi(c
->argv
[1]->ptr
);
4572 if (selectDb(c
,id
) == REDIS_ERR
) {
4573 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4575 addReply(c
,shared
.ok
);
4579 static void randomkeyCommand(redisClient
*c
) {
4584 de
= dictGetRandomKey(c
->db
->dict
);
4585 if (!de
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break;
4589 addReply(c
,shared
.nullbulk
);
4593 key
= dictGetEntryKey(de
);
4594 if (server
.vm_enabled
) {
4595 key
= dupStringObject(key
);
4596 addReplyBulk(c
,key
);
4599 addReplyBulk(c
,key
);
4603 static void keysCommand(redisClient
*c
) {
4606 sds pattern
= c
->argv
[1]->ptr
;
4607 int plen
= sdslen(pattern
);
4608 unsigned long numkeys
= 0;
4609 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4611 di
= dictGetIterator(c
->db
->dict
);
4613 decrRefCount(lenobj
);
4614 while((de
= dictNext(di
)) != NULL
) {
4615 robj
*keyobj
= dictGetEntryKey(de
);
4617 sds key
= keyobj
->ptr
;
4618 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4619 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4620 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4621 addReplyBulk(c
,keyobj
);
4626 dictReleaseIterator(di
);
4627 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4630 static void dbsizeCommand(redisClient
*c
) {
4632 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4635 static void lastsaveCommand(redisClient
*c
) {
4637 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4640 static void typeCommand(redisClient
*c
) {
4644 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4649 case REDIS_STRING
: type
= "+string"; break;
4650 case REDIS_LIST
: type
= "+list"; break;
4651 case REDIS_SET
: type
= "+set"; break;
4652 case REDIS_ZSET
: type
= "+zset"; break;
4653 case REDIS_HASH
: type
= "+hash"; break;
4654 default: type
= "+unknown"; break;
4657 addReplySds(c
,sdsnew(type
));
4658 addReply(c
,shared
.crlf
);
4661 static void saveCommand(redisClient
*c
) {
4662 if (server
.bgsavechildpid
!= -1) {
4663 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4666 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4667 addReply(c
,shared
.ok
);
4669 addReply(c
,shared
.err
);
4673 static void bgsaveCommand(redisClient
*c
) {
4674 if (server
.bgsavechildpid
!= -1) {
4675 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4678 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4679 char *status
= "+Background saving started\r\n";
4680 addReplySds(c
,sdsnew(status
));
4682 addReply(c
,shared
.err
);
4686 static void shutdownCommand(redisClient
*c
) {
4687 if (prepareForShutdown() == REDIS_OK
)
4689 addReplySds(c
, sdsnew("-ERR Errors trying to SHUTDOWN. Check logs.\r\n"));
4692 static void renameGenericCommand(redisClient
*c
, int nx
) {
4695 /* To use the same key as src and dst is probably an error */
4696 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4697 addReply(c
,shared
.sameobjecterr
);
4701 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4705 deleteIfVolatile(c
->db
,c
->argv
[2]);
4706 if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) {
4709 addReply(c
,shared
.czero
);
4712 dictReplace(c
->db
->dict
,c
->argv
[2],o
);
4714 incrRefCount(c
->argv
[2]);
4716 deleteKey(c
->db
,c
->argv
[1]);
4717 touchWatchedKey(c
->db
,c
->argv
[2]);
4719 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4722 static void renameCommand(redisClient
*c
) {
4723 renameGenericCommand(c
,0);
4726 static void renamenxCommand(redisClient
*c
) {
4727 renameGenericCommand(c
,1);
4730 static void moveCommand(redisClient
*c
) {
4735 /* Obtain source and target DB pointers */
4738 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4739 addReply(c
,shared
.outofrangeerr
);
4743 selectDb(c
,srcid
); /* Back to the source DB */
4745 /* If the user is moving using as target the same
4746 * DB as the source DB it is probably an error. */
4748 addReply(c
,shared
.sameobjecterr
);
4752 /* Check if the element exists and get a reference */
4753 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4755 addReply(c
,shared
.czero
);
4759 /* Try to add the element to the target DB */
4760 deleteIfVolatile(dst
,c
->argv
[1]);
4761 if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) {
4762 addReply(c
,shared
.czero
);
4765 incrRefCount(c
->argv
[1]);
4768 /* OK! key moved, free the entry in the source DB */
4769 deleteKey(src
,c
->argv
[1]);
4771 addReply(c
,shared
.cone
);
4774 /* =================================== Lists ================================ */
4775 static void pushGenericCommand(redisClient
*c
, int where
) {
4779 lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4781 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4782 addReply(c
,shared
.cone
);
4785 lobj
= createListObject();
4787 if (where
== REDIS_HEAD
) {
4788 listAddNodeHead(list
,c
->argv
[2]);
4790 listAddNodeTail(list
,c
->argv
[2]);
4792 dictAdd(c
->db
->dict
,c
->argv
[1],lobj
);
4793 incrRefCount(c
->argv
[1]);
4794 incrRefCount(c
->argv
[2]);
4796 if (lobj
->type
!= REDIS_LIST
) {
4797 addReply(c
,shared
.wrongtypeerr
);
4800 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4801 addReply(c
,shared
.cone
);
4805 if (where
== REDIS_HEAD
) {
4806 listAddNodeHead(list
,c
->argv
[2]);
4808 listAddNodeTail(list
,c
->argv
[2]);
4810 incrRefCount(c
->argv
[2]);
4813 addReplyLongLong(c
,listLength(list
));
4816 static void lpushCommand(redisClient
*c
) {
4817 pushGenericCommand(c
,REDIS_HEAD
);
4820 static void rpushCommand(redisClient
*c
) {
4821 pushGenericCommand(c
,REDIS_TAIL
);
4824 static void llenCommand(redisClient
*c
) {
4828 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4829 checkType(c
,o
,REDIS_LIST
)) return;
4832 addReplyUlong(c
,listLength(l
));
4835 static void lindexCommand(redisClient
*c
) {
4837 int index
= atoi(c
->argv
[2]->ptr
);
4841 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4842 checkType(c
,o
,REDIS_LIST
)) return;
4845 ln
= listIndex(list
, index
);
4847 addReply(c
,shared
.nullbulk
);
4849 robj
*ele
= listNodeValue(ln
);
4850 addReplyBulk(c
,ele
);
4854 static void lsetCommand(redisClient
*c
) {
4856 int index
= atoi(c
->argv
[2]->ptr
);
4860 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
||
4861 checkType(c
,o
,REDIS_LIST
)) return;
4864 ln
= listIndex(list
, index
);
4866 addReply(c
,shared
.outofrangeerr
);
4868 robj
*ele
= listNodeValue(ln
);
4871 listNodeValue(ln
) = c
->argv
[3];
4872 incrRefCount(c
->argv
[3]);
4873 addReply(c
,shared
.ok
);
4878 static void popGenericCommand(redisClient
*c
, int where
) {
4883 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4884 checkType(c
,o
,REDIS_LIST
)) return;
4887 if (where
== REDIS_HEAD
)
4888 ln
= listFirst(list
);
4890 ln
= listLast(list
);
4893 addReply(c
,shared
.nullbulk
);
4895 robj
*ele
= listNodeValue(ln
);
4896 addReplyBulk(c
,ele
);
4897 listDelNode(list
,ln
);
4898 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4903 static void lpopCommand(redisClient
*c
) {
4904 popGenericCommand(c
,REDIS_HEAD
);
4907 static void rpopCommand(redisClient
*c
) {
4908 popGenericCommand(c
,REDIS_TAIL
);
4911 static void lrangeCommand(redisClient
*c
) {
4913 int start
= atoi(c
->argv
[2]->ptr
);
4914 int end
= atoi(c
->argv
[3]->ptr
);
4921 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
4922 || checkType(c
,o
,REDIS_LIST
)) return;
4924 llen
= listLength(list
);
4926 /* convert negative indexes */
4927 if (start
< 0) start
= llen
+start
;
4928 if (end
< 0) end
= llen
+end
;
4929 if (start
< 0) start
= 0;
4930 if (end
< 0) end
= 0;
4932 /* indexes sanity checks */
4933 if (start
> end
|| start
>= llen
) {
4934 /* Out of range start or start > end result in empty list */
4935 addReply(c
,shared
.emptymultibulk
);
4938 if (end
>= llen
) end
= llen
-1;
4939 rangelen
= (end
-start
)+1;
4941 /* Return the result in form of a multi-bulk reply */
4942 ln
= listIndex(list
, start
);
4943 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
4944 for (j
= 0; j
< rangelen
; j
++) {
4945 ele
= listNodeValue(ln
);
4946 addReplyBulk(c
,ele
);
4951 static void ltrimCommand(redisClient
*c
) {
4953 int start
= atoi(c
->argv
[2]->ptr
);
4954 int end
= atoi(c
->argv
[3]->ptr
);
4956 int j
, ltrim
, rtrim
;
4960 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
4961 checkType(c
,o
,REDIS_LIST
)) return;
4963 llen
= listLength(list
);
4965 /* convert negative indexes */
4966 if (start
< 0) start
= llen
+start
;
4967 if (end
< 0) end
= llen
+end
;
4968 if (start
< 0) start
= 0;
4969 if (end
< 0) end
= 0;
4971 /* indexes sanity checks */
4972 if (start
> end
|| start
>= llen
) {
4973 /* Out of range start or start > end result in empty list */
4977 if (end
>= llen
) end
= llen
-1;
4982 /* Remove list elements to perform the trim */
4983 for (j
= 0; j
< ltrim
; j
++) {
4984 ln
= listFirst(list
);
4985 listDelNode(list
,ln
);
4987 for (j
= 0; j
< rtrim
; j
++) {
4988 ln
= listLast(list
);
4989 listDelNode(list
,ln
);
4991 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4993 addReply(c
,shared
.ok
);
4996 static void lremCommand(redisClient
*c
) {
4999 listNode
*ln
, *next
;
5000 int toremove
= atoi(c
->argv
[2]->ptr
);
5004 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5005 checkType(c
,o
,REDIS_LIST
)) return;
5009 toremove
= -toremove
;
5012 ln
= fromtail
? list
->tail
: list
->head
;
5014 robj
*ele
= listNodeValue(ln
);
5016 next
= fromtail
? ln
->prev
: ln
->next
;
5017 if (equalStringObjects(ele
,c
->argv
[3])) {
5018 listDelNode(list
,ln
);
5021 if (toremove
&& removed
== toremove
) break;
5025 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5026 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
5029 /* This is the semantic of this command:
5030 * RPOPLPUSH srclist dstlist:
5031 * IF LLEN(srclist) > 0
5032 * element = RPOP srclist
5033 * LPUSH dstlist element
5040 * The idea is to be able to get an element from a list in a reliable way
5041 * since the element is not just returned but pushed against another list
5042 * as well. This command was originally proposed by Ezra Zygmuntowicz.
5044 static void rpoplpushcommand(redisClient
*c
) {
5049 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5050 checkType(c
,sobj
,REDIS_LIST
)) return;
5051 srclist
= sobj
->ptr
;
5052 ln
= listLast(srclist
);
5055 addReply(c
,shared
.nullbulk
);
5057 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
5058 robj
*ele
= listNodeValue(ln
);
5061 if (dobj
&& dobj
->type
!= REDIS_LIST
) {
5062 addReply(c
,shared
.wrongtypeerr
);
5066 /* Add the element to the target list (unless it's directly
5067 * passed to some BLPOP-ing client */
5068 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) {
5070 /* Create the list if the key does not exist */
5071 dobj
= createListObject();
5072 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
);
5073 incrRefCount(c
->argv
[2]);
5075 dstlist
= dobj
->ptr
;
5076 listAddNodeHead(dstlist
,ele
);
5080 /* Send the element to the client as reply as well */
5081 addReplyBulk(c
,ele
);
5083 /* Finally remove the element from the source list */
5084 listDelNode(srclist
,ln
);
5085 if (listLength(srclist
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5090 /* ==================================== Sets ================================ */
5092 static void saddCommand(redisClient
*c
) {
5095 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5097 set
= createSetObject();
5098 dictAdd(c
->db
->dict
,c
->argv
[1],set
);
5099 incrRefCount(c
->argv
[1]);
5101 if (set
->type
!= REDIS_SET
) {
5102 addReply(c
,shared
.wrongtypeerr
);
5106 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
5107 incrRefCount(c
->argv
[2]);
5109 addReply(c
,shared
.cone
);
5111 addReply(c
,shared
.czero
);
5115 static void sremCommand(redisClient
*c
) {
5118 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5119 checkType(c
,set
,REDIS_SET
)) return;
5121 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
5123 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
5124 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5125 addReply(c
,shared
.cone
);
5127 addReply(c
,shared
.czero
);
5131 static void smoveCommand(redisClient
*c
) {
5132 robj
*srcset
, *dstset
;
5134 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5135 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
5137 /* If the source key does not exist return 0, if it's of the wrong type
5139 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
5140 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
5143 /* Error if the destination key is not a set as well */
5144 if (dstset
&& dstset
->type
!= REDIS_SET
) {
5145 addReply(c
,shared
.wrongtypeerr
);
5148 /* Remove the element from the source set */
5149 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
5150 /* Key not found in the src set! return zero */
5151 addReply(c
,shared
.czero
);
5154 if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset
!= dstset
)
5155 deleteKey(c
->db
,c
->argv
[1]);
5157 /* Add the element to the destination set */
5159 dstset
= createSetObject();
5160 dictAdd(c
->db
->dict
,c
->argv
[2],dstset
);
5161 incrRefCount(c
->argv
[2]);
5163 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
5164 incrRefCount(c
->argv
[3]);
5165 addReply(c
,shared
.cone
);
5168 static void sismemberCommand(redisClient
*c
) {
5171 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5172 checkType(c
,set
,REDIS_SET
)) return;
5174 if (dictFind(set
->ptr
,c
->argv
[2]))
5175 addReply(c
,shared
.cone
);
5177 addReply(c
,shared
.czero
);
5180 static void scardCommand(redisClient
*c
) {
5184 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5185 checkType(c
,o
,REDIS_SET
)) return;
5188 addReplyUlong(c
,dictSize(s
));
5191 static void spopCommand(redisClient
*c
) {
5195 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5196 checkType(c
,set
,REDIS_SET
)) return;
5198 de
= dictGetRandomKey(set
->ptr
);
5200 addReply(c
,shared
.nullbulk
);
5202 robj
*ele
= dictGetEntryKey(de
);
5204 addReplyBulk(c
,ele
);
5205 dictDelete(set
->ptr
,ele
);
5206 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
5207 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5212 static void srandmemberCommand(redisClient
*c
) {
5216 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5217 checkType(c
,set
,REDIS_SET
)) return;
5219 de
= dictGetRandomKey(set
->ptr
);
5221 addReply(c
,shared
.nullbulk
);
5223 robj
*ele
= dictGetEntryKey(de
);
5225 addReplyBulk(c
,ele
);
5229 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
5230 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
5232 return dictSize(*d1
)-dictSize(*d2
);
5235 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
5236 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
5239 robj
*lenobj
= NULL
, *dstset
= NULL
;
5240 unsigned long j
, cardinality
= 0;
5242 for (j
= 0; j
< setsnum
; j
++) {
5246 lookupKeyWrite(c
->db
,setskeys
[j
]) :
5247 lookupKeyRead(c
->db
,setskeys
[j
]);
5251 if (deleteKey(c
->db
,dstkey
))
5253 addReply(c
,shared
.czero
);
5255 addReply(c
,shared
.emptymultibulk
);
5259 if (setobj
->type
!= REDIS_SET
) {
5261 addReply(c
,shared
.wrongtypeerr
);
5264 dv
[j
] = setobj
->ptr
;
5266 /* Sort sets from the smallest to largest, this will improve our
5267 * algorithm's performace */
5268 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
5270 /* The first thing we should output is the total number of elements...
5271 * since this is a multi-bulk write, but at this stage we don't know
5272 * the intersection set size, so we use a trick, append an empty object
5273 * to the output list and save the pointer to later modify it with the
5276 lenobj
= createObject(REDIS_STRING
,NULL
);
5278 decrRefCount(lenobj
);
5280 /* If we have a target key where to store the resulting set
5281 * create this key with an empty set inside */
5282 dstset
= createSetObject();
5285 /* Iterate all the elements of the first (smallest) set, and test
5286 * the element against all the other sets, if at least one set does
5287 * not include the element it is discarded */
5288 di
= dictGetIterator(dv
[0]);
5290 while((de
= dictNext(di
)) != NULL
) {
5293 for (j
= 1; j
< setsnum
; j
++)
5294 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
5296 continue; /* at least one set does not contain the member */
5297 ele
= dictGetEntryKey(de
);
5299 addReplyBulk(c
,ele
);
5302 dictAdd(dstset
->ptr
,ele
,NULL
);
5306 dictReleaseIterator(di
);
5309 /* Store the resulting set into the target, if the intersection
5310 * is not an empty set. */
5311 deleteKey(c
->db
,dstkey
);
5312 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5313 dictAdd(c
->db
->dict
,dstkey
,dstset
);
5314 incrRefCount(dstkey
);
5315 addReplyLongLong(c
,dictSize((dict
*)dstset
->ptr
));
5317 decrRefCount(dstset
);
5318 addReply(c
,shared
.czero
);
5322 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
5327 static void sinterCommand(redisClient
*c
) {
5328 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
5331 static void sinterstoreCommand(redisClient
*c
) {
5332 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
5335 #define REDIS_OP_UNION 0
5336 #define REDIS_OP_DIFF 1
5337 #define REDIS_OP_INTER 2
5339 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
5340 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
5343 robj
*dstset
= NULL
;
5344 int j
, cardinality
= 0;
5346 for (j
= 0; j
< setsnum
; j
++) {
5350 lookupKeyWrite(c
->db
,setskeys
[j
]) :
5351 lookupKeyRead(c
->db
,setskeys
[j
]);
5356 if (setobj
->type
!= REDIS_SET
) {
5358 addReply(c
,shared
.wrongtypeerr
);
5361 dv
[j
] = setobj
->ptr
;
5364 /* We need a temp set object to store our union. If the dstkey
5365 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
5366 * this set object will be the resulting object to set into the target key*/
5367 dstset
= createSetObject();
5369 /* Iterate all the elements of all the sets, add every element a single
5370 * time to the result set */
5371 for (j
= 0; j
< setsnum
; j
++) {
5372 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
5373 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
5375 di
= dictGetIterator(dv
[j
]);
5377 while((de
= dictNext(di
)) != NULL
) {
5380 /* dictAdd will not add the same element multiple times */
5381 ele
= dictGetEntryKey(de
);
5382 if (op
== REDIS_OP_UNION
|| j
== 0) {
5383 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
5387 } else if (op
== REDIS_OP_DIFF
) {
5388 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
5393 dictReleaseIterator(di
);
5395 /* result set is empty? Exit asap. */
5396 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
5399 /* Output the content of the resulting set, if not in STORE mode */
5401 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
5402 di
= dictGetIterator(dstset
->ptr
);
5403 while((de
= dictNext(di
)) != NULL
) {
5406 ele
= dictGetEntryKey(de
);
5407 addReplyBulk(c
,ele
);
5409 dictReleaseIterator(di
);
5410 decrRefCount(dstset
);
5412 /* If we have a target key where to store the resulting set
5413 * create this key with the result set inside */
5414 deleteKey(c
->db
,dstkey
);
5415 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5416 dictAdd(c
->db
->dict
,dstkey
,dstset
);
5417 incrRefCount(dstkey
);
5418 addReplyLongLong(c
,dictSize((dict
*)dstset
->ptr
));
5420 decrRefCount(dstset
);
5421 addReply(c
,shared
.czero
);
5428 static void sunionCommand(redisClient
*c
) {
5429 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
5432 static void sunionstoreCommand(redisClient
*c
) {
5433 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
5436 static void sdiffCommand(redisClient
*c
) {
5437 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
5440 static void sdiffstoreCommand(redisClient
*c
) {
5441 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
5444 /* ==================================== ZSets =============================== */
5446 /* ZSETs are ordered sets using two data structures to hold the same elements
5447 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
5450 * The elements are added to an hash table mapping Redis objects to scores.
5451 * At the same time the elements are added to a skip list mapping scores
5452 * to Redis objects (so objects are sorted by scores in this "view"). */
5454 /* This skiplist implementation is almost a C translation of the original
5455 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
5456 * Alternative to Balanced Trees", modified in three ways:
5457 * a) this implementation allows for repeated values.
5458 * b) the comparison is not just by key (our 'score') but by satellite data.
5459 * c) there is a back pointer, so it's a doubly linked list with the back
5460 * pointers being only at "level 1". This allows to traverse the list
5461 * from tail to head, useful for ZREVRANGE. */
5463 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
5464 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
5466 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
5468 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
5476 static zskiplist
*zslCreate(void) {
5480 zsl
= zmalloc(sizeof(*zsl
));
5483 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
5484 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
5485 zsl
->header
->forward
[j
] = NULL
;
5487 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
5488 if (j
< ZSKIPLIST_MAXLEVEL
-1)
5489 zsl
->header
->span
[j
] = 0;
5491 zsl
->header
->backward
= NULL
;
5496 static void zslFreeNode(zskiplistNode
*node
) {
5497 decrRefCount(node
->obj
);
5498 zfree(node
->forward
);
5503 static void zslFree(zskiplist
*zsl
) {
5504 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
5506 zfree(zsl
->header
->forward
);
5507 zfree(zsl
->header
->span
);
5510 next
= node
->forward
[0];
5517 static int zslRandomLevel(void) {
5519 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
5521 return (level
<ZSKIPLIST_MAXLEVEL
) ? level
: ZSKIPLIST_MAXLEVEL
;
5524 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
5525 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5526 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
5530 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5531 /* store rank that is crossed to reach the insert position */
5532 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
5534 while (x
->forward
[i
] &&
5535 (x
->forward
[i
]->score
< score
||
5536 (x
->forward
[i
]->score
== score
&&
5537 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
5538 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
5543 /* we assume the key is not already inside, since we allow duplicated
5544 * scores, and the re-insertion of score and redis object should never
5545 * happpen since the caller of zslInsert() should test in the hash table
5546 * if the element is already inside or not. */
5547 level
= zslRandomLevel();
5548 if (level
> zsl
->level
) {
5549 for (i
= zsl
->level
; i
< level
; i
++) {
5551 update
[i
] = zsl
->header
;
5552 update
[i
]->span
[i
-1] = zsl
->length
;
5556 x
= zslCreateNode(level
,score
,obj
);
5557 for (i
= 0; i
< level
; i
++) {
5558 x
->forward
[i
] = update
[i
]->forward
[i
];
5559 update
[i
]->forward
[i
] = x
;
5561 /* update span covered by update[i] as x is inserted here */
5563 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5564 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5568 /* increment span for untouched levels */
5569 for (i
= level
; i
< zsl
->level
; i
++) {
5570 update
[i
]->span
[i
-1]++;
5573 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
5575 x
->forward
[0]->backward
= x
;
5581 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
5582 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
5584 for (i
= 0; i
< zsl
->level
; i
++) {
5585 if (update
[i
]->forward
[i
] == x
) {
5587 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5589 update
[i
]->forward
[i
] = x
->forward
[i
];
5591 /* invariant: i > 0, because update[0]->forward[0]
5592 * is always equal to x */
5593 update
[i
]->span
[i
-1] -= 1;
5596 if (x
->forward
[0]) {
5597 x
->forward
[0]->backward
= x
->backward
;
5599 zsl
->tail
= x
->backward
;
5601 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5606 /* Delete an element with matching score/object from the skiplist. */
5607 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
5608 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5612 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5613 while (x
->forward
[i
] &&
5614 (x
->forward
[i
]->score
< score
||
5615 (x
->forward
[i
]->score
== score
&&
5616 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
5620 /* We may have multiple elements with the same score, what we need
5621 * is to find the element with both the right score and object. */
5623 if (x
&& score
== x
->score
&& equalStringObjects(x
->obj
,obj
)) {
5624 zslDeleteNode(zsl
, x
, update
);
5628 return 0; /* not found */
5630 return 0; /* not found */
5633 /* Delete all the elements with score between min and max from the skiplist.
5634 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5635 * Note that this function takes the reference to the hash table view of the
5636 * sorted set, in order to remove the elements from the hash table too. */
5637 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
5638 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5639 unsigned long removed
= 0;
5643 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5644 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
5648 /* We may have multiple elements with the same score, what we need
5649 * is to find the element with both the right score and object. */
5651 while (x
&& x
->score
<= max
) {
5652 zskiplistNode
*next
= x
->forward
[0];
5653 zslDeleteNode(zsl
, x
, update
);
5654 dictDelete(dict
,x
->obj
);
5659 return removed
; /* not found */
5662 /* Delete all the elements with rank between start and end from the skiplist.
5663 * Start and end are inclusive. Note that start and end need to be 1-based */
5664 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
5665 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5666 unsigned long traversed
= 0, removed
= 0;
5670 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5671 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
5672 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5680 while (x
&& traversed
<= end
) {
5681 zskiplistNode
*next
= x
->forward
[0];
5682 zslDeleteNode(zsl
, x
, update
);
5683 dictDelete(dict
,x
->obj
);
5692 /* Find the first node having a score equal or greater than the specified one.
5693 * Returns NULL if there is no match. */
5694 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
5699 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5700 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
5703 /* We may have multiple elements with the same score, what we need
5704 * is to find the element with both the right score and object. */
5705 return x
->forward
[0];
5708 /* Find the rank for an element by both score and key.
5709 * Returns 0 when the element cannot be found, rank otherwise.
5710 * Note that the rank is 1-based due to the span of zsl->header to the
5712 static unsigned long zslGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
5714 unsigned long rank
= 0;
5718 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5719 while (x
->forward
[i
] &&
5720 (x
->forward
[i
]->score
< score
||
5721 (x
->forward
[i
]->score
== score
&&
5722 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
5723 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
5727 /* x might be equal to zsl->header, so test if obj is non-NULL */
5728 if (x
->obj
&& equalStringObjects(x
->obj
,o
)) {
5735 /* Finds an element by its rank. The rank argument needs to be 1-based. */
5736 zskiplistNode
* zslGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
5738 unsigned long traversed
= 0;
5742 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5743 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
5745 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5748 if (traversed
== rank
) {
5755 /* The actual Z-commands implementations */
5757 /* This generic command implements both ZADD and ZINCRBY.
5758 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
5759 * the increment if the operation is a ZINCRBY (doincrement == 1). */
5760 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
5765 if (isnan(scoreval
)) {
5766 addReplySds(c
,sdsnew("-ERR provide score is Not A Number (nan)\r\n"));
5770 zsetobj
= lookupKeyWrite(c
->db
,key
);
5771 if (zsetobj
== NULL
) {
5772 zsetobj
= createZsetObject();
5773 dictAdd(c
->db
->dict
,key
,zsetobj
);
5776 if (zsetobj
->type
!= REDIS_ZSET
) {
5777 addReply(c
,shared
.wrongtypeerr
);
5783 /* Ok now since we implement both ZADD and ZINCRBY here the code
5784 * needs to handle the two different conditions. It's all about setting
5785 * '*score', that is, the new score to set, to the right value. */
5786 score
= zmalloc(sizeof(double));
5790 /* Read the old score. If the element was not present starts from 0 */
5791 de
= dictFind(zs
->dict
,ele
);
5793 double *oldscore
= dictGetEntryVal(de
);
5794 *score
= *oldscore
+ scoreval
;
5798 if (isnan(*score
)) {
5800 sdsnew("-ERR resulting score is Not A Number (nan)\r\n"));
5802 /* Note that we don't need to check if the zset may be empty and
5803 * should be removed here, as we can only obtain Nan as score if
5804 * there was already an element in the sorted set. */
5811 /* What follows is a simple remove and re-insert operation that is common
5812 * to both ZADD and ZINCRBY... */
5813 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
5814 /* case 1: New element */
5815 incrRefCount(ele
); /* added to hash */
5816 zslInsert(zs
->zsl
,*score
,ele
);
5817 incrRefCount(ele
); /* added to skiplist */
5820 addReplyDouble(c
,*score
);
5822 addReply(c
,shared
.cone
);
5827 /* case 2: Score update operation */
5828 de
= dictFind(zs
->dict
,ele
);
5829 redisAssert(de
!= NULL
);
5830 oldscore
= dictGetEntryVal(de
);
5831 if (*score
!= *oldscore
) {
5834 /* Remove and insert the element in the skip list with new score */
5835 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
5836 redisAssert(deleted
!= 0);
5837 zslInsert(zs
->zsl
,*score
,ele
);
5839 /* Update the score in the hash table */
5840 dictReplace(zs
->dict
,ele
,score
);
5846 addReplyDouble(c
,*score
);
5848 addReply(c
,shared
.czero
);
5852 static void zaddCommand(redisClient
*c
) {
5855 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
5856 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
5859 static void zincrbyCommand(redisClient
*c
) {
5862 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
5863 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
5866 static void zremCommand(redisClient
*c
) {
5873 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5874 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5877 de
= dictFind(zs
->dict
,c
->argv
[2]);
5879 addReply(c
,shared
.czero
);
5882 /* Delete from the skiplist */
5883 oldscore
= dictGetEntryVal(de
);
5884 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
5885 redisAssert(deleted
!= 0);
5887 /* Delete from the hash table */
5888 dictDelete(zs
->dict
,c
->argv
[2]);
5889 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5890 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5892 addReply(c
,shared
.cone
);
5895 static void zremrangebyscoreCommand(redisClient
*c
) {
5902 if ((getDoubleFromObjectOrReply(c
, c
->argv
[2], &min
, NULL
) != REDIS_OK
) ||
5903 (getDoubleFromObjectOrReply(c
, c
->argv
[3], &max
, NULL
) != REDIS_OK
)) return;
5905 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5906 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5909 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
5910 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5911 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5912 server
.dirty
+= deleted
;
5913 addReplyLongLong(c
,deleted
);
5916 static void zremrangebyrankCommand(redisClient
*c
) {
5924 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
5925 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
5927 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5928 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5930 llen
= zs
->zsl
->length
;
5932 /* convert negative indexes */
5933 if (start
< 0) start
= llen
+start
;
5934 if (end
< 0) end
= llen
+end
;
5935 if (start
< 0) start
= 0;
5936 if (end
< 0) end
= 0;
5938 /* indexes sanity checks */
5939 if (start
> end
|| start
>= llen
) {
5940 addReply(c
,shared
.czero
);
5943 if (end
>= llen
) end
= llen
-1;
5945 /* increment start and end because zsl*Rank functions
5946 * use 1-based rank */
5947 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
5948 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5949 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5950 server
.dirty
+= deleted
;
5951 addReplyLongLong(c
, deleted
);
5959 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
5960 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
5961 unsigned long size1
, size2
;
5962 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
5963 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
5964 return size1
- size2
;
5967 #define REDIS_AGGR_SUM 1
5968 #define REDIS_AGGR_MIN 2
5969 #define REDIS_AGGR_MAX 3
5970 #define zunionInterDictValue(_e) (dictGetEntryVal(_e) == NULL ? 1.0 : *(double*)dictGetEntryVal(_e))
5972 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
5973 if (aggregate
== REDIS_AGGR_SUM
) {
5974 *target
= *target
+ val
;
5975 } else if (aggregate
== REDIS_AGGR_MIN
) {
5976 *target
= val
< *target
? val
: *target
;
5977 } else if (aggregate
== REDIS_AGGR_MAX
) {
5978 *target
= val
> *target
? val
: *target
;
5981 redisPanic("Unknown ZUNION/INTER aggregate type");
5985 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
5987 int aggregate
= REDIS_AGGR_SUM
;
5994 /* expect setnum input keys to be given */
5995 setnum
= atoi(c
->argv
[2]->ptr
);
5997 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNIONSTORE/ZINTERSTORE\r\n"));
6001 /* test if the expected number of keys would overflow */
6002 if (3+setnum
> c
->argc
) {
6003 addReply(c
,shared
.syntaxerr
);
6007 /* read keys to be used for input */
6008 src
= zmalloc(sizeof(zsetopsrc
) * setnum
);
6009 for (i
= 0, j
= 3; i
< setnum
; i
++, j
++) {
6010 robj
*obj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
6014 if (obj
->type
== REDIS_ZSET
) {
6015 src
[i
].dict
= ((zset
*)obj
->ptr
)->dict
;
6016 } else if (obj
->type
== REDIS_SET
) {
6017 src
[i
].dict
= (obj
->ptr
);
6020 addReply(c
,shared
.wrongtypeerr
);
6025 /* default all weights to 1 */
6026 src
[i
].weight
= 1.0;
6029 /* parse optional extra arguments */
6031 int remaining
= c
->argc
- j
;
6034 if (remaining
>= (setnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
6036 for (i
= 0; i
< setnum
; i
++, j
++, remaining
--) {
6037 if (getDoubleFromObjectOrReply(c
, c
->argv
[j
], &src
[i
].weight
, NULL
) != REDIS_OK
)
6040 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
6042 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
6043 aggregate
= REDIS_AGGR_SUM
;
6044 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
6045 aggregate
= REDIS_AGGR_MIN
;
6046 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
6047 aggregate
= REDIS_AGGR_MAX
;
6050 addReply(c
,shared
.syntaxerr
);
6056 addReply(c
,shared
.syntaxerr
);
6062 /* sort sets from the smallest to largest, this will improve our
6063 * algorithm's performance */
6064 qsort(src
,setnum
,sizeof(zsetopsrc
),qsortCompareZsetopsrcByCardinality
);
6066 dstobj
= createZsetObject();
6067 dstzset
= dstobj
->ptr
;
6069 if (op
== REDIS_OP_INTER
) {
6070 /* skip going over all entries if the smallest zset is NULL or empty */
6071 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
6072 /* precondition: as src[0].dict is non-empty and the zsets are ordered
6073 * from small to large, all src[i > 0].dict are non-empty too */
6074 di
= dictGetIterator(src
[0].dict
);
6075 while((de
= dictNext(di
)) != NULL
) {
6076 double *score
= zmalloc(sizeof(double)), value
;
6077 *score
= src
[0].weight
* zunionInterDictValue(de
);
6079 for (j
= 1; j
< setnum
; j
++) {
6080 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
6082 value
= src
[j
].weight
* zunionInterDictValue(other
);
6083 zunionInterAggregate(score
, value
, aggregate
);
6089 /* skip entry when not present in every source dict */
6093 robj
*o
= dictGetEntryKey(de
);
6094 dictAdd(dstzset
->dict
,o
,score
);
6095 incrRefCount(o
); /* added to dictionary */
6096 zslInsert(dstzset
->zsl
,*score
,o
);
6097 incrRefCount(o
); /* added to skiplist */
6100 dictReleaseIterator(di
);
6102 } else if (op
== REDIS_OP_UNION
) {
6103 for (i
= 0; i
< setnum
; i
++) {
6104 if (!src
[i
].dict
) continue;
6106 di
= dictGetIterator(src
[i
].dict
);
6107 while((de
= dictNext(di
)) != NULL
) {
6108 /* skip key when already processed */
6109 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
6111 double *score
= zmalloc(sizeof(double)), value
;
6112 *score
= src
[i
].weight
* zunionInterDictValue(de
);
6114 /* because the zsets are sorted by size, its only possible
6115 * for sets at larger indices to hold this entry */
6116 for (j
= (i
+1); j
< setnum
; j
++) {
6117 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
6119 value
= src
[j
].weight
* zunionInterDictValue(other
);
6120 zunionInterAggregate(score
, value
, aggregate
);
6124 robj
*o
= dictGetEntryKey(de
);
6125 dictAdd(dstzset
->dict
,o
,score
);
6126 incrRefCount(o
); /* added to dictionary */
6127 zslInsert(dstzset
->zsl
,*score
,o
);
6128 incrRefCount(o
); /* added to skiplist */
6130 dictReleaseIterator(di
);
6133 /* unknown operator */
6134 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
6137 deleteKey(c
->db
,dstkey
);
6138 if (dstzset
->zsl
->length
) {
6139 dictAdd(c
->db
->dict
,dstkey
,dstobj
);
6140 incrRefCount(dstkey
);
6141 addReplyLongLong(c
, dstzset
->zsl
->length
);
6144 decrRefCount(dstobj
);
6145 addReply(c
, shared
.czero
);
6150 static void zunionstoreCommand(redisClient
*c
) {
6151 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
6154 static void zinterstoreCommand(redisClient
*c
) {
6155 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
6158 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
6170 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
6171 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
6173 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
6175 } else if (c
->argc
>= 5) {
6176 addReply(c
,shared
.syntaxerr
);
6180 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6181 || checkType(c
,o
,REDIS_ZSET
)) return;
6186 /* convert negative indexes */
6187 if (start
< 0) start
= llen
+start
;
6188 if (end
< 0) end
= llen
+end
;
6189 if (start
< 0) start
= 0;
6190 if (end
< 0) end
= 0;
6192 /* indexes sanity checks */
6193 if (start
> end
|| start
>= llen
) {
6194 /* Out of range start or start > end result in empty list */
6195 addReply(c
,shared
.emptymultibulk
);
6198 if (end
>= llen
) end
= llen
-1;
6199 rangelen
= (end
-start
)+1;
6201 /* check if starting point is trivial, before searching
6202 * the element in log(N) time */
6204 ln
= start
== 0 ? zsl
->tail
: zslGetElementByRank(zsl
, llen
-start
);
6207 zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1);
6210 /* Return the result in form of a multi-bulk reply */
6211 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
6212 withscores
? (rangelen
*2) : rangelen
));
6213 for (j
= 0; j
< rangelen
; j
++) {
6215 addReplyBulk(c
,ele
);
6217 addReplyDouble(c
,ln
->score
);
6218 ln
= reverse
? ln
->backward
: ln
->forward
[0];
6222 static void zrangeCommand(redisClient
*c
) {
6223 zrangeGenericCommand(c
,0);
6226 static void zrevrangeCommand(redisClient
*c
) {
6227 zrangeGenericCommand(c
,1);
6230 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
6231 * If justcount is non-zero, just the count is returned. */
6232 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
6235 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
6236 int offset
= 0, limit
= -1;
6240 /* Parse the min-max interval. If one of the values is prefixed
6241 * by the "(" character, it's considered "open". For instance
6242 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
6243 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
6244 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
6245 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
6248 min
= strtod(c
->argv
[2]->ptr
,NULL
);
6250 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
6251 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
6254 max
= strtod(c
->argv
[3]->ptr
,NULL
);
6257 /* Parse "WITHSCORES": note that if the command was called with
6258 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
6259 * enter the following paths to parse WITHSCORES and LIMIT. */
6260 if (c
->argc
== 5 || c
->argc
== 8) {
6261 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
6266 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
6270 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
6275 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
6276 addReply(c
,shared
.syntaxerr
);
6278 } else if (c
->argc
== (7 + withscores
)) {
6279 offset
= atoi(c
->argv
[5]->ptr
);
6280 limit
= atoi(c
->argv
[6]->ptr
);
6281 if (offset
< 0) offset
= 0;
6284 /* Ok, lookup the key and get the range */
6285 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
6287 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6289 if (o
->type
!= REDIS_ZSET
) {
6290 addReply(c
,shared
.wrongtypeerr
);
6292 zset
*zsetobj
= o
->ptr
;
6293 zskiplist
*zsl
= zsetobj
->zsl
;
6295 robj
*ele
, *lenobj
= NULL
;
6296 unsigned long rangelen
= 0;
6298 /* Get the first node with the score >= min, or with
6299 * score > min if 'minex' is true. */
6300 ln
= zslFirstWithScore(zsl
,min
);
6301 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
6304 /* No element matching the speciifed interval */
6305 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6309 /* We don't know in advance how many matching elements there
6310 * are in the list, so we push this object that will represent
6311 * the multi-bulk length in the output buffer, and will "fix"
6314 lenobj
= createObject(REDIS_STRING
,NULL
);
6316 decrRefCount(lenobj
);
6319 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
6322 ln
= ln
->forward
[0];
6325 if (limit
== 0) break;
6328 addReplyBulk(c
,ele
);
6330 addReplyDouble(c
,ln
->score
);
6332 ln
= ln
->forward
[0];
6334 if (limit
> 0) limit
--;
6337 addReplyLongLong(c
,(long)rangelen
);
6339 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
6340 withscores
? (rangelen
*2) : rangelen
);
6346 static void zrangebyscoreCommand(redisClient
*c
) {
6347 genericZrangebyscoreCommand(c
,0);
6350 static void zcountCommand(redisClient
*c
) {
6351 genericZrangebyscoreCommand(c
,1);
6354 static void zcardCommand(redisClient
*c
) {
6358 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6359 checkType(c
,o
,REDIS_ZSET
)) return;
6362 addReplyUlong(c
,zs
->zsl
->length
);
6365 static void zscoreCommand(redisClient
*c
) {
6370 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6371 checkType(c
,o
,REDIS_ZSET
)) return;
6374 de
= dictFind(zs
->dict
,c
->argv
[2]);
6376 addReply(c
,shared
.nullbulk
);
6378 double *score
= dictGetEntryVal(de
);
6380 addReplyDouble(c
,*score
);
6384 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
6392 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6393 checkType(c
,o
,REDIS_ZSET
)) return;
6397 de
= dictFind(zs
->dict
,c
->argv
[2]);
6399 addReply(c
,shared
.nullbulk
);
6403 score
= dictGetEntryVal(de
);
6404 rank
= zslGetRank(zsl
, *score
, c
->argv
[2]);
6407 addReplyLongLong(c
, zsl
->length
- rank
);
6409 addReplyLongLong(c
, rank
-1);
6412 addReply(c
,shared
.nullbulk
);
6416 static void zrankCommand(redisClient
*c
) {
6417 zrankGenericCommand(c
, 0);
6420 static void zrevrankCommand(redisClient
*c
) {
6421 zrankGenericCommand(c
, 1);
6424 /* ========================= Hashes utility functions ======================= */
6425 #define REDIS_HASH_KEY 1
6426 #define REDIS_HASH_VALUE 2
6428 /* Check the length of a number of objects to see if we need to convert a
6429 * zipmap to a real hash. Note that we only check string encoded objects
6430 * as their string length can be queried in constant time. */
6431 static void hashTryConversion(robj
*subject
, robj
**argv
, int start
, int end
) {
6433 if (subject
->encoding
!= REDIS_ENCODING_ZIPMAP
) return;
6435 for (i
= start
; i
<= end
; i
++) {
6436 if (argv
[i
]->encoding
== REDIS_ENCODING_RAW
&&
6437 sdslen(argv
[i
]->ptr
) > server
.hash_max_zipmap_value
)
6439 convertToRealHash(subject
);
6445 /* Encode given objects in-place when the hash uses a dict. */
6446 static void hashTryObjectEncoding(robj
*subject
, robj
**o1
, robj
**o2
) {
6447 if (subject
->encoding
== REDIS_ENCODING_HT
) {
6448 if (o1
) *o1
= tryObjectEncoding(*o1
);
6449 if (o2
) *o2
= tryObjectEncoding(*o2
);
6453 /* Get the value from a hash identified by key. Returns either a string
6454 * object or NULL if the value cannot be found. The refcount of the object
6455 * is always increased by 1 when the value was found. */
6456 static robj
*hashGet(robj
*o
, robj
*key
) {
6458 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6461 key
= getDecodedObject(key
);
6462 if (zipmapGet(o
->ptr
,key
->ptr
,sdslen(key
->ptr
),&v
,&vlen
)) {
6463 value
= createStringObject((char*)v
,vlen
);
6467 dictEntry
*de
= dictFind(o
->ptr
,key
);
6469 value
= dictGetEntryVal(de
);
6470 incrRefCount(value
);
6476 /* Test if the key exists in the given hash. Returns 1 if the key
6477 * exists and 0 when it doesn't. */
6478 static int hashExists(robj
*o
, robj
*key
) {
6479 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6480 key
= getDecodedObject(key
);
6481 if (zipmapExists(o
->ptr
,key
->ptr
,sdslen(key
->ptr
))) {
6487 if (dictFind(o
->ptr
,key
) != NULL
) {
6494 /* Add an element, discard the old if the key already exists.
6495 * Return 0 on insert and 1 on update. */
6496 static int hashSet(robj
*o
, robj
*key
, robj
*value
) {
6498 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6499 key
= getDecodedObject(key
);
6500 value
= getDecodedObject(value
);
6501 o
->ptr
= zipmapSet(o
->ptr
,
6502 key
->ptr
,sdslen(key
->ptr
),
6503 value
->ptr
,sdslen(value
->ptr
), &update
);
6505 decrRefCount(value
);
6507 /* Check if the zipmap needs to be upgraded to a real hash table */
6508 if (zipmapLen(o
->ptr
) > server
.hash_max_zipmap_entries
)
6509 convertToRealHash(o
);
6511 if (dictReplace(o
->ptr
,key
,value
)) {
6518 incrRefCount(value
);
6523 /* Delete an element from a hash.
6524 * Return 1 on deleted and 0 on not found. */
6525 static int hashDelete(robj
*o
, robj
*key
) {
6527 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6528 key
= getDecodedObject(key
);
6529 o
->ptr
= zipmapDel(o
->ptr
,key
->ptr
,sdslen(key
->ptr
), &deleted
);
6532 deleted
= dictDelete((dict
*)o
->ptr
,key
) == DICT_OK
;
6533 /* Always check if the dictionary needs a resize after a delete. */
6534 if (deleted
&& htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
6539 /* Return the number of elements in a hash. */
6540 static unsigned long hashLength(robj
*o
) {
6541 return (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
6542 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
6545 /* Structure to hold hash iteration abstration. Note that iteration over
6546 * hashes involves both fields and values. Because it is possible that
6547 * not both are required, store pointers in the iterator to avoid
6548 * unnecessary memory allocation for fields/values. */
6552 unsigned char *zk
, *zv
;
6553 unsigned int zklen
, zvlen
;
6559 static hashIterator
*hashInitIterator(robj
*subject
) {
6560 hashIterator
*hi
= zmalloc(sizeof(hashIterator
));
6561 hi
->encoding
= subject
->encoding
;
6562 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6563 hi
->zi
= zipmapRewind(subject
->ptr
);
6564 } else if (hi
->encoding
== REDIS_ENCODING_HT
) {
6565 hi
->di
= dictGetIterator(subject
->ptr
);
6572 static void hashReleaseIterator(hashIterator
*hi
) {
6573 if (hi
->encoding
== REDIS_ENCODING_HT
) {
6574 dictReleaseIterator(hi
->di
);
6579 /* Move to the next entry in the hash. Return REDIS_OK when the next entry
6580 * could be found and REDIS_ERR when the iterator reaches the end. */
6581 static int hashNext(hashIterator
*hi
) {
6582 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6583 if ((hi
->zi
= zipmapNext(hi
->zi
, &hi
->zk
, &hi
->zklen
,
6584 &hi
->zv
, &hi
->zvlen
)) == NULL
) return REDIS_ERR
;
6586 if ((hi
->de
= dictNext(hi
->di
)) == NULL
) return REDIS_ERR
;
6591 /* Get key or value object at current iteration position.
6592 * This increases the refcount of the field object by 1. */
6593 static robj
*hashCurrent(hashIterator
*hi
, int what
) {
6595 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6596 if (what
& REDIS_HASH_KEY
) {
6597 o
= createStringObject((char*)hi
->zk
,hi
->zklen
);
6599 o
= createStringObject((char*)hi
->zv
,hi
->zvlen
);
6602 if (what
& REDIS_HASH_KEY
) {
6603 o
= dictGetEntryKey(hi
->de
);
6605 o
= dictGetEntryVal(hi
->de
);
6612 static robj
*hashLookupWriteOrCreate(redisClient
*c
, robj
*key
) {
6613 robj
*o
= lookupKeyWrite(c
->db
,key
);
6615 o
= createHashObject();
6616 dictAdd(c
->db
->dict
,key
,o
);
6619 if (o
->type
!= REDIS_HASH
) {
6620 addReply(c
,shared
.wrongtypeerr
);
6627 /* ============================= Hash commands ============================== */
6628 static void hsetCommand(redisClient
*c
) {
6632 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6633 hashTryConversion(o
,c
->argv
,2,3);
6634 hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
6635 update
= hashSet(o
,c
->argv
[2],c
->argv
[3]);
6636 addReply(c
, update
? shared
.czero
: shared
.cone
);
6640 static void hsetnxCommand(redisClient
*c
) {
6642 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6643 hashTryConversion(o
,c
->argv
,2,3);
6645 if (hashExists(o
, c
->argv
[2])) {
6646 addReply(c
, shared
.czero
);
6648 hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
6649 hashSet(o
,c
->argv
[2],c
->argv
[3]);
6650 addReply(c
, shared
.cone
);
6655 static void hmsetCommand(redisClient
*c
) {
6659 if ((c
->argc
% 2) == 1) {
6660 addReplySds(c
,sdsnew("-ERR wrong number of arguments for HMSET\r\n"));
6664 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6665 hashTryConversion(o
,c
->argv
,2,c
->argc
-1);
6666 for (i
= 2; i
< c
->argc
; i
+= 2) {
6667 hashTryObjectEncoding(o
,&c
->argv
[i
], &c
->argv
[i
+1]);
6668 hashSet(o
,c
->argv
[i
],c
->argv
[i
+1]);
6670 addReply(c
, shared
.ok
);
6674 static void hincrbyCommand(redisClient
*c
) {
6675 long long value
, incr
;
6676 robj
*o
, *current
, *new;
6678 if (getLongLongFromObjectOrReply(c
,c
->argv
[3],&incr
,NULL
) != REDIS_OK
) return;
6679 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6680 if ((current
= hashGet(o
,c
->argv
[2])) != NULL
) {
6681 if (getLongLongFromObjectOrReply(c
,current
,&value
,
6682 "hash value is not an integer") != REDIS_OK
) {
6683 decrRefCount(current
);
6686 decrRefCount(current
);
6692 new = createStringObjectFromLongLong(value
);
6693 hashTryObjectEncoding(o
,&c
->argv
[2],NULL
);
6694 hashSet(o
,c
->argv
[2],new);
6696 addReplyLongLong(c
,value
);
6700 static void hgetCommand(redisClient
*c
) {
6702 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6703 checkType(c
,o
,REDIS_HASH
)) return;
6705 if ((value
= hashGet(o
,c
->argv
[2])) != NULL
) {
6706 addReplyBulk(c
,value
);
6707 decrRefCount(value
);
6709 addReply(c
,shared
.nullbulk
);
6713 static void hmgetCommand(redisClient
*c
) {
6716 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
6717 if (o
!= NULL
&& o
->type
!= REDIS_HASH
) {
6718 addReply(c
,shared
.wrongtypeerr
);
6721 /* Note the check for o != NULL happens inside the loop. This is
6722 * done because objects that cannot be found are considered to be
6723 * an empty hash. The reply should then be a series of NULLs. */
6724 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-2));
6725 for (i
= 2; i
< c
->argc
; i
++) {
6726 if (o
!= NULL
&& (value
= hashGet(o
,c
->argv
[i
])) != NULL
) {
6727 addReplyBulk(c
,value
);
6728 decrRefCount(value
);
6730 addReply(c
,shared
.nullbulk
);
6735 static void hdelCommand(redisClient
*c
) {
6737 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6738 checkType(c
,o
,REDIS_HASH
)) return;
6740 if (hashDelete(o
,c
->argv
[2])) {
6741 if (hashLength(o
) == 0) deleteKey(c
->db
,c
->argv
[1]);
6742 addReply(c
,shared
.cone
);
6745 addReply(c
,shared
.czero
);
6749 static void hlenCommand(redisClient
*c
) {
6751 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6752 checkType(c
,o
,REDIS_HASH
)) return;
6754 addReplyUlong(c
,hashLength(o
));
6757 static void genericHgetallCommand(redisClient
*c
, int flags
) {
6758 robj
*o
, *lenobj
, *obj
;
6759 unsigned long count
= 0;
6762 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6763 || checkType(c
,o
,REDIS_HASH
)) return;
6765 lenobj
= createObject(REDIS_STRING
,NULL
);
6767 decrRefCount(lenobj
);
6769 hi
= hashInitIterator(o
);
6770 while (hashNext(hi
) != REDIS_ERR
) {
6771 if (flags
& REDIS_HASH_KEY
) {
6772 obj
= hashCurrent(hi
,REDIS_HASH_KEY
);
6773 addReplyBulk(c
,obj
);
6777 if (flags
& REDIS_HASH_VALUE
) {
6778 obj
= hashCurrent(hi
,REDIS_HASH_VALUE
);
6779 addReplyBulk(c
,obj
);
6784 hashReleaseIterator(hi
);
6786 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
6789 static void hkeysCommand(redisClient
*c
) {
6790 genericHgetallCommand(c
,REDIS_HASH_KEY
);
6793 static void hvalsCommand(redisClient
*c
) {
6794 genericHgetallCommand(c
,REDIS_HASH_VALUE
);
6797 static void hgetallCommand(redisClient
*c
) {
6798 genericHgetallCommand(c
,REDIS_HASH_KEY
|REDIS_HASH_VALUE
);
6801 static void hexistsCommand(redisClient
*c
) {
6803 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6804 checkType(c
,o
,REDIS_HASH
)) return;
6806 addReply(c
, hashExists(o
,c
->argv
[2]) ? shared
.cone
: shared
.czero
);
6809 static void convertToRealHash(robj
*o
) {
6810 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
6811 unsigned int klen
, vlen
;
6812 dict
*dict
= dictCreate(&hashDictType
,NULL
);
6814 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
6815 p
= zipmapRewind(zm
);
6816 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
6817 robj
*keyobj
, *valobj
;
6819 keyobj
= createStringObject((char*)key
,klen
);
6820 valobj
= createStringObject((char*)val
,vlen
);
6821 keyobj
= tryObjectEncoding(keyobj
);
6822 valobj
= tryObjectEncoding(valobj
);
6823 dictAdd(dict
,keyobj
,valobj
);
6825 o
->encoding
= REDIS_ENCODING_HT
;
6830 /* ========================= Non type-specific commands ==================== */
6832 static void flushdbCommand(redisClient
*c
) {
6833 server
.dirty
+= dictSize(c
->db
->dict
);
6834 touchWatchedKeysOnFlush(c
->db
->id
);
6835 dictEmpty(c
->db
->dict
);
6836 dictEmpty(c
->db
->expires
);
6837 addReply(c
,shared
.ok
);
6840 static void flushallCommand(redisClient
*c
) {
6841 touchWatchedKeysOnFlush(-1);
6842 server
.dirty
+= emptyDb();
6843 addReply(c
,shared
.ok
);
6844 if (server
.bgsavechildpid
!= -1) {
6845 kill(server
.bgsavechildpid
,SIGKILL
);
6846 rdbRemoveTempFile(server
.bgsavechildpid
);
6848 rdbSave(server
.dbfilename
);
6852 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
6853 redisSortOperation
*so
= zmalloc(sizeof(*so
));
6855 so
->pattern
= pattern
;
6859 /* Return the value associated to the key with a name obtained
6860 * substituting the first occurence of '*' in 'pattern' with 'subst'.
6861 * The returned object will always have its refcount increased by 1
6862 * when it is non-NULL. */
6863 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
6866 robj keyobj
, fieldobj
, *o
;
6867 int prefixlen
, sublen
, postfixlen
, fieldlen
;
6868 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
6872 char buf
[REDIS_SORTKEY_MAX
+1];
6873 } keyname
, fieldname
;
6875 /* If the pattern is "#" return the substitution object itself in order
6876 * to implement the "SORT ... GET #" feature. */
6877 spat
= pattern
->ptr
;
6878 if (spat
[0] == '#' && spat
[1] == '\0') {
6879 incrRefCount(subst
);
6883 /* The substitution object may be specially encoded. If so we create
6884 * a decoded object on the fly. Otherwise getDecodedObject will just
6885 * increment the ref count, that we'll decrement later. */
6886 subst
= getDecodedObject(subst
);
6889 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
6890 p
= strchr(spat
,'*');
6892 decrRefCount(subst
);
6896 /* Find out if we're dealing with a hash dereference. */
6897 if ((f
= strstr(p
+1, "->")) != NULL
) {
6898 fieldlen
= sdslen(spat
)-(f
-spat
);
6899 /* this also copies \0 character */
6900 memcpy(fieldname
.buf
,f
+2,fieldlen
-1);
6901 fieldname
.len
= fieldlen
-2;
6907 sublen
= sdslen(ssub
);
6908 postfixlen
= sdslen(spat
)-(prefixlen
+1)-fieldlen
;
6909 memcpy(keyname
.buf
,spat
,prefixlen
);
6910 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
6911 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
6912 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
6913 keyname
.len
= prefixlen
+sublen
+postfixlen
;
6914 decrRefCount(subst
);
6916 /* Lookup substituted key */
6917 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2));
6918 o
= lookupKeyRead(db
,&keyobj
);
6919 if (o
== NULL
) return NULL
;
6922 if (o
->type
!= REDIS_HASH
|| fieldname
.len
< 1) return NULL
;
6924 /* Retrieve value from hash by the field name. This operation
6925 * already increases the refcount of the returned object. */
6926 initStaticStringObject(fieldobj
,((char*)&fieldname
)+(sizeof(long)*2));
6927 o
= hashGet(o
, &fieldobj
);
6929 if (o
->type
!= REDIS_STRING
) return NULL
;
6931 /* Every object that this function returns needs to have its refcount
6932 * increased. sortCommand decreases it again. */
6939 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
6940 * the additional parameter is not standard but a BSD-specific we have to
6941 * pass sorting parameters via the global 'server' structure */
6942 static int sortCompare(const void *s1
, const void *s2
) {
6943 const redisSortObject
*so1
= s1
, *so2
= s2
;
6946 if (!server
.sort_alpha
) {
6947 /* Numeric sorting. Here it's trivial as we precomputed scores */
6948 if (so1
->u
.score
> so2
->u
.score
) {
6950 } else if (so1
->u
.score
< so2
->u
.score
) {
6956 /* Alphanumeric sorting */
6957 if (server
.sort_bypattern
) {
6958 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
6959 /* At least one compare object is NULL */
6960 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
6962 else if (so1
->u
.cmpobj
== NULL
)
6967 /* We have both the objects, use strcoll */
6968 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
6971 /* Compare elements directly. */
6972 cmp
= compareStringObjects(so1
->obj
,so2
->obj
);
6975 return server
.sort_desc
? -cmp
: cmp
;
6978 /* The SORT command is the most complex command in Redis. Warning: this code
6979 * is optimized for speed and a bit less for readability */
6980 static void sortCommand(redisClient
*c
) {
6983 int desc
= 0, alpha
= 0;
6984 int limit_start
= 0, limit_count
= -1, start
, end
;
6985 int j
, dontsort
= 0, vectorlen
;
6986 int getop
= 0; /* GET operation counter */
6987 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
6988 redisSortObject
*vector
; /* Resulting vector to sort */
6990 /* Lookup the key to sort. It must be of the right types */
6991 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
6992 if (sortval
== NULL
) {
6993 addReply(c
,shared
.emptymultibulk
);
6996 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
6997 sortval
->type
!= REDIS_ZSET
)
6999 addReply(c
,shared
.wrongtypeerr
);
7003 /* Create a list of operations to perform for every sorted element.
7004 * Operations can be GET/DEL/INCR/DECR */
7005 operations
= listCreate();
7006 listSetFreeMethod(operations
,zfree
);
7009 /* Now we need to protect sortval incrementing its count, in the future
7010 * SORT may have options able to overwrite/delete keys during the sorting
7011 * and the sorted key itself may get destroied */
7012 incrRefCount(sortval
);
7014 /* The SORT command has an SQL-alike syntax, parse it */
7015 while(j
< c
->argc
) {
7016 int leftargs
= c
->argc
-j
-1;
7017 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
7019 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
7021 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
7023 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
7024 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
7025 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
7027 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
7028 storekey
= c
->argv
[j
+1];
7030 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
7031 sortby
= c
->argv
[j
+1];
7032 /* If the BY pattern does not contain '*', i.e. it is constant,
7033 * we don't need to sort nor to lookup the weight keys. */
7034 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
7036 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
7037 listAddNodeTail(operations
,createSortOperation(
7038 REDIS_SORT_GET
,c
->argv
[j
+1]));
7042 decrRefCount(sortval
);
7043 listRelease(operations
);
7044 addReply(c
,shared
.syntaxerr
);
7050 /* Load the sorting vector with all the objects to sort */
7051 switch(sortval
->type
) {
7052 case REDIS_LIST
: vectorlen
= listLength((list
*)sortval
->ptr
); break;
7053 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
7054 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
7055 default: vectorlen
= 0; redisPanic("Bad SORT type"); /* Avoid GCC warning */
7057 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
7060 if (sortval
->type
== REDIS_LIST
) {
7061 list
*list
= sortval
->ptr
;
7065 listRewind(list
,&li
);
7066 while((ln
= listNext(&li
))) {
7067 robj
*ele
= ln
->value
;
7068 vector
[j
].obj
= ele
;
7069 vector
[j
].u
.score
= 0;
7070 vector
[j
].u
.cmpobj
= NULL
;
7078 if (sortval
->type
== REDIS_SET
) {
7081 zset
*zs
= sortval
->ptr
;
7085 di
= dictGetIterator(set
);
7086 while((setele
= dictNext(di
)) != NULL
) {
7087 vector
[j
].obj
= dictGetEntryKey(setele
);
7088 vector
[j
].u
.score
= 0;
7089 vector
[j
].u
.cmpobj
= NULL
;
7092 dictReleaseIterator(di
);
7094 redisAssert(j
== vectorlen
);
7096 /* Now it's time to load the right scores in the sorting vector */
7097 if (dontsort
== 0) {
7098 for (j
= 0; j
< vectorlen
; j
++) {
7101 /* lookup value to sort by */
7102 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
7103 if (!byval
) continue;
7105 /* use object itself to sort by */
7106 byval
= vector
[j
].obj
;
7110 if (sortby
) vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
7112 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
7113 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
7114 } else if (byval
->encoding
== REDIS_ENCODING_INT
) {
7115 /* Don't need to decode the object if it's
7116 * integer-encoded (the only encoding supported) so
7117 * far. We can just cast it */
7118 vector
[j
].u
.score
= (long)byval
->ptr
;
7120 redisAssert(1 != 1);
7124 /* when the object was retrieved using lookupKeyByPattern,
7125 * its refcount needs to be decreased. */
7127 decrRefCount(byval
);
7132 /* We are ready to sort the vector... perform a bit of sanity check
7133 * on the LIMIT option too. We'll use a partial version of quicksort. */
7134 start
= (limit_start
< 0) ? 0 : limit_start
;
7135 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
7136 if (start
>= vectorlen
) {
7137 start
= vectorlen
-1;
7140 if (end
>= vectorlen
) end
= vectorlen
-1;
7142 if (dontsort
== 0) {
7143 server
.sort_desc
= desc
;
7144 server
.sort_alpha
= alpha
;
7145 server
.sort_bypattern
= sortby
? 1 : 0;
7146 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
7147 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
7149 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
7152 /* Send command output to the output buffer, performing the specified
7153 * GET/DEL/INCR/DECR operations if any. */
7154 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
7155 if (storekey
== NULL
) {
7156 /* STORE option not specified, sent the sorting result to client */
7157 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
7158 for (j
= start
; j
<= end
; j
++) {
7162 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
7163 listRewind(operations
,&li
);
7164 while((ln
= listNext(&li
))) {
7165 redisSortOperation
*sop
= ln
->value
;
7166 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
7169 if (sop
->type
== REDIS_SORT_GET
) {
7171 addReply(c
,shared
.nullbulk
);
7173 addReplyBulk(c
,val
);
7177 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
7182 robj
*listObject
= createListObject();
7183 list
*listPtr
= (list
*) listObject
->ptr
;
7185 /* STORE option specified, set the sorting result as a List object */
7186 for (j
= start
; j
<= end
; j
++) {
7191 listAddNodeTail(listPtr
,vector
[j
].obj
);
7192 incrRefCount(vector
[j
].obj
);
7194 listRewind(operations
,&li
);
7195 while((ln
= listNext(&li
))) {
7196 redisSortOperation
*sop
= ln
->value
;
7197 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
7200 if (sop
->type
== REDIS_SORT_GET
) {
7202 listAddNodeTail(listPtr
,createStringObject("",0));
7204 /* We should do a incrRefCount on val because it is
7205 * added to the list, but also a decrRefCount because
7206 * it is returned by lookupKeyByPattern. This results
7207 * in doing nothing at all. */
7208 listAddNodeTail(listPtr
,val
);
7211 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
7215 if (dictReplace(c
->db
->dict
,storekey
,listObject
)) {
7216 incrRefCount(storekey
);
7218 /* Note: we add 1 because the DB is dirty anyway since even if the
7219 * SORT result is empty a new key is set and maybe the old content
7221 server
.dirty
+= 1+outputlen
;
7222 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
7226 decrRefCount(sortval
);
7227 listRelease(operations
);
7228 for (j
= 0; j
< vectorlen
; j
++) {
7229 if (alpha
&& vector
[j
].u
.cmpobj
)
7230 decrRefCount(vector
[j
].u
.cmpobj
);
7235 /* Convert an amount of bytes into a human readable string in the form
7236 * of 100B, 2G, 100M, 4K, and so forth. */
7237 static void bytesToHuman(char *s
, unsigned long long n
) {
7242 sprintf(s
,"%lluB",n
);
7244 } else if (n
< (1024*1024)) {
7245 d
= (double)n
/(1024);
7246 sprintf(s
,"%.2fK",d
);
7247 } else if (n
< (1024LL*1024*1024)) {
7248 d
= (double)n
/(1024*1024);
7249 sprintf(s
,"%.2fM",d
);
7250 } else if (n
< (1024LL*1024*1024*1024)) {
7251 d
= (double)n
/(1024LL*1024*1024);
7252 sprintf(s
,"%.2fG",d
);
7256 /* Create the string returned by the INFO command. This is decoupled
7257 * by the INFO command itself as we need to report the same information
7258 * on memory corruption problems. */
7259 static sds
genRedisInfoString(void) {
7261 time_t uptime
= time(NULL
)-server
.stat_starttime
;
7265 bytesToHuman(hmem
,zmalloc_used_memory());
7266 info
= sdscatprintf(sdsempty(),
7267 "redis_version:%s\r\n"
7268 "redis_git_sha1:%s\r\n"
7269 "redis_git_dirty:%d\r\n"
7271 "multiplexing_api:%s\r\n"
7272 "process_id:%ld\r\n"
7273 "uptime_in_seconds:%ld\r\n"
7274 "uptime_in_days:%ld\r\n"
7275 "connected_clients:%d\r\n"
7276 "connected_slaves:%d\r\n"
7277 "blocked_clients:%d\r\n"
7278 "used_memory:%zu\r\n"
7279 "used_memory_human:%s\r\n"
7280 "changes_since_last_save:%lld\r\n"
7281 "bgsave_in_progress:%d\r\n"
7282 "last_save_time:%ld\r\n"
7283 "bgrewriteaof_in_progress:%d\r\n"
7284 "total_connections_received:%lld\r\n"
7285 "total_commands_processed:%lld\r\n"
7286 "expired_keys:%lld\r\n"
7287 "hash_max_zipmap_entries:%zu\r\n"
7288 "hash_max_zipmap_value:%zu\r\n"
7289 "pubsub_channels:%ld\r\n"
7290 "pubsub_patterns:%u\r\n"
7295 strtol(REDIS_GIT_DIRTY
,NULL
,10) > 0,
7296 (sizeof(long) == 8) ? "64" : "32",
7301 listLength(server
.clients
)-listLength(server
.slaves
),
7302 listLength(server
.slaves
),
7303 server
.blpop_blocked_clients
,
7304 zmalloc_used_memory(),
7307 server
.bgsavechildpid
!= -1,
7309 server
.bgrewritechildpid
!= -1,
7310 server
.stat_numconnections
,
7311 server
.stat_numcommands
,
7312 server
.stat_expiredkeys
,
7313 server
.hash_max_zipmap_entries
,
7314 server
.hash_max_zipmap_value
,
7315 dictSize(server
.pubsub_channels
),
7316 listLength(server
.pubsub_patterns
),
7317 server
.vm_enabled
!= 0,
7318 server
.masterhost
== NULL
? "master" : "slave"
7320 if (server
.masterhost
) {
7321 info
= sdscatprintf(info
,
7322 "master_host:%s\r\n"
7323 "master_port:%d\r\n"
7324 "master_link_status:%s\r\n"
7325 "master_last_io_seconds_ago:%d\r\n"
7328 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
7330 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
7333 if (server
.vm_enabled
) {
7335 info
= sdscatprintf(info
,
7336 "vm_conf_max_memory:%llu\r\n"
7337 "vm_conf_page_size:%llu\r\n"
7338 "vm_conf_pages:%llu\r\n"
7339 "vm_stats_used_pages:%llu\r\n"
7340 "vm_stats_swapped_objects:%llu\r\n"
7341 "vm_stats_swappin_count:%llu\r\n"
7342 "vm_stats_swappout_count:%llu\r\n"
7343 "vm_stats_io_newjobs_len:%lu\r\n"
7344 "vm_stats_io_processing_len:%lu\r\n"
7345 "vm_stats_io_processed_len:%lu\r\n"
7346 "vm_stats_io_active_threads:%lu\r\n"
7347 "vm_stats_blocked_clients:%lu\r\n"
7348 ,(unsigned long long) server
.vm_max_memory
,
7349 (unsigned long long) server
.vm_page_size
,
7350 (unsigned long long) server
.vm_pages
,
7351 (unsigned long long) server
.vm_stats_used_pages
,
7352 (unsigned long long) server
.vm_stats_swapped_objects
,
7353 (unsigned long long) server
.vm_stats_swapins
,
7354 (unsigned long long) server
.vm_stats_swapouts
,
7355 (unsigned long) listLength(server
.io_newjobs
),
7356 (unsigned long) listLength(server
.io_processing
),
7357 (unsigned long) listLength(server
.io_processed
),
7358 (unsigned long) server
.io_active_threads
,
7359 (unsigned long) server
.vm_blocked_clients
7363 for (j
= 0; j
< server
.dbnum
; j
++) {
7364 long long keys
, vkeys
;
7366 keys
= dictSize(server
.db
[j
].dict
);
7367 vkeys
= dictSize(server
.db
[j
].expires
);
7368 if (keys
|| vkeys
) {
7369 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
7376 static void infoCommand(redisClient
*c
) {
7377 sds info
= genRedisInfoString();
7378 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
7379 (unsigned long)sdslen(info
)));
7380 addReplySds(c
,info
);
7381 addReply(c
,shared
.crlf
);
7384 static void monitorCommand(redisClient
*c
) {
7385 /* ignore MONITOR if aleady slave or in monitor mode */
7386 if (c
->flags
& REDIS_SLAVE
) return;
7388 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
7390 listAddNodeTail(server
.monitors
,c
);
7391 addReply(c
,shared
.ok
);
7394 /* ================================= Expire ================================= */
7395 static int removeExpire(redisDb
*db
, robj
*key
) {
7396 if (dictDelete(db
->expires
,key
) == DICT_OK
) {
7403 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
7404 if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) {
7412 /* Return the expire time of the specified key, or -1 if no expire
7413 * is associated with this key (i.e. the key is non volatile) */
7414 static time_t getExpire(redisDb
*db
, robj
*key
) {
7417 /* No expire? return ASAP */
7418 if (dictSize(db
->expires
) == 0 ||
7419 (de
= dictFind(db
->expires
,key
)) == NULL
) return -1;
7421 return (time_t) dictGetEntryVal(de
);
7424 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
7428 /* No expire? return ASAP */
7429 if (dictSize(db
->expires
) == 0 ||
7430 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7432 /* Lookup the expire */
7433 when
= (time_t) dictGetEntryVal(de
);
7434 if (time(NULL
) <= when
) return 0;
7436 /* Delete the key */
7437 dictDelete(db
->expires
,key
);
7438 server
.stat_expiredkeys
++;
7439 return dictDelete(db
->dict
,key
) == DICT_OK
;
7442 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
7445 /* No expire? return ASAP */
7446 if (dictSize(db
->expires
) == 0 ||
7447 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7449 /* Delete the key */
7451 server
.stat_expiredkeys
++;
7452 dictDelete(db
->expires
,key
);
7453 return dictDelete(db
->dict
,key
) == DICT_OK
;
7456 static void expireGenericCommand(redisClient
*c
, robj
*key
, robj
*param
, long offset
) {
7460 if (getLongFromObjectOrReply(c
, param
, &seconds
, NULL
) != REDIS_OK
) return;
7464 de
= dictFind(c
->db
->dict
,key
);
7466 addReply(c
,shared
.czero
);
7470 if (deleteKey(c
->db
,key
)) server
.dirty
++;
7471 addReply(c
, shared
.cone
);
7474 time_t when
= time(NULL
)+seconds
;
7475 if (setExpire(c
->db
,key
,when
)) {
7476 addReply(c
,shared
.cone
);
7479 addReply(c
,shared
.czero
);
7485 static void expireCommand(redisClient
*c
) {
7486 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],0);
7489 static void expireatCommand(redisClient
*c
) {
7490 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],time(NULL
));
7493 static void ttlCommand(redisClient
*c
) {
7497 expire
= getExpire(c
->db
,c
->argv
[1]);
7499 ttl
= (int) (expire
-time(NULL
));
7500 if (ttl
< 0) ttl
= -1;
7502 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
7505 /* ================================ MULTI/EXEC ============================== */
7507 /* Client state initialization for MULTI/EXEC */
7508 static void initClientMultiState(redisClient
*c
) {
7509 c
->mstate
.commands
= NULL
;
7510 c
->mstate
.count
= 0;
7513 /* Release all the resources associated with MULTI/EXEC state */
7514 static void freeClientMultiState(redisClient
*c
) {
7517 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7519 multiCmd
*mc
= c
->mstate
.commands
+j
;
7521 for (i
= 0; i
< mc
->argc
; i
++)
7522 decrRefCount(mc
->argv
[i
]);
7525 zfree(c
->mstate
.commands
);
7528 /* Add a new command into the MULTI commands queue */
7529 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
7533 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
7534 sizeof(multiCmd
)*(c
->mstate
.count
+1));
7535 mc
= c
->mstate
.commands
+c
->mstate
.count
;
7538 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
7539 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
7540 for (j
= 0; j
< c
->argc
; j
++)
7541 incrRefCount(mc
->argv
[j
]);
7545 static void multiCommand(redisClient
*c
) {
7546 if (c
->flags
& REDIS_MULTI
) {
7547 addReplySds(c
,sdsnew("-ERR MULTI calls can not be nested\r\n"));
7550 c
->flags
|= REDIS_MULTI
;
7551 addReply(c
,shared
.ok
);
7554 static void discardCommand(redisClient
*c
) {
7555 if (!(c
->flags
& REDIS_MULTI
)) {
7556 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
7560 freeClientMultiState(c
);
7561 initClientMultiState(c
);
7562 c
->flags
&= (~REDIS_MULTI
);
7563 addReply(c
,shared
.ok
);
7566 /* Send a MULTI command to all the slaves and AOF file. Check the execCommand
7567 * implememntation for more information. */
7568 static void execCommandReplicateMulti(redisClient
*c
) {
7569 struct redisCommand
*cmd
;
7570 robj
*multistring
= createStringObject("MULTI",5);
7572 cmd
= lookupCommand("multi");
7573 if (server
.appendonly
)
7574 feedAppendOnlyFile(cmd
,c
->db
->id
,&multistring
,1);
7575 if (listLength(server
.slaves
))
7576 replicationFeedSlaves(server
.slaves
,c
->db
->id
,&multistring
,1);
7577 decrRefCount(multistring
);
7580 static void execCommand(redisClient
*c
) {
7585 if (!(c
->flags
& REDIS_MULTI
)) {
7586 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
7590 /* Check if we need to abort the EXEC if some WATCHed key was touched.
7591 * A failed EXEC will return a multi bulk nil object. */
7592 if (c
->flags
& REDIS_DIRTY_CAS
) {
7593 freeClientMultiState(c
);
7594 initClientMultiState(c
);
7595 c
->flags
&= ~(REDIS_MULTI
|REDIS_DIRTY_CAS
);
7597 addReply(c
,shared
.nullmultibulk
);
7601 /* Replicate a MULTI request now that we are sure the block is executed.
7602 * This way we'll deliver the MULTI/..../EXEC block as a whole and
7603 * both the AOF and the replication link will have the same consistency
7604 * and atomicity guarantees. */
7605 execCommandReplicateMulti(c
);
7607 /* Exec all the queued commands */
7608 unwatchAllKeys(c
); /* Unwatch ASAP otherwise we'll waste CPU cycles */
7609 orig_argv
= c
->argv
;
7610 orig_argc
= c
->argc
;
7611 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
7612 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7613 c
->argc
= c
->mstate
.commands
[j
].argc
;
7614 c
->argv
= c
->mstate
.commands
[j
].argv
;
7615 call(c
,c
->mstate
.commands
[j
].cmd
);
7617 c
->argv
= orig_argv
;
7618 c
->argc
= orig_argc
;
7619 freeClientMultiState(c
);
7620 initClientMultiState(c
);
7621 c
->flags
&= ~(REDIS_MULTI
|REDIS_DIRTY_CAS
);
7622 /* Make sure the EXEC command is always replicated / AOF, since we
7623 * always send the MULTI command (we can't know beforehand if the
7624 * next operations will contain at least a modification to the DB). */
7628 /* =========================== Blocking Operations ========================= */
7630 /* Currently Redis blocking operations support is limited to list POP ops,
7631 * so the current implementation is not fully generic, but it is also not
7632 * completely specific so it will not require a rewrite to support new
7633 * kind of blocking operations in the future.
7635 * Still it's important to note that list blocking operations can be already
7636 * used as a notification mechanism in order to implement other blocking
7637 * operations at application level, so there must be a very strong evidence
7638 * of usefulness and generality before new blocking operations are implemented.
7640 * This is how the current blocking POP works, we use BLPOP as example:
7641 * - If the user calls BLPOP and the key exists and contains a non empty list
7642 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
7643 * if there is not to block.
7644 * - If instead BLPOP is called and the key does not exists or the list is
7645 * empty we need to block. In order to do so we remove the notification for
7646 * new data to read in the client socket (so that we'll not serve new
7647 * requests if the blocking request is not served). Also we put the client
7648 * in a dictionary (db->blocking_keys) mapping keys to a list of clients
7649 * blocking for this keys.
7650 * - If a PUSH operation against a key with blocked clients waiting is
7651 * performed, we serve the first in the list: basically instead to push
7652 * the new element inside the list we return it to the (first / oldest)
7653 * blocking client, unblock the client, and remove it form the list.
7655 * The above comment and the source code should be enough in order to understand
7656 * the implementation and modify / fix it later.
7659 /* Set a client in blocking mode for the specified key, with the specified
7661 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
7666 c
->blocking_keys
= zmalloc(sizeof(robj
*)*numkeys
);
7667 c
->blocking_keys_num
= numkeys
;
7668 c
->blockingto
= timeout
;
7669 for (j
= 0; j
< numkeys
; j
++) {
7670 /* Add the key in the client structure, to map clients -> keys */
7671 c
->blocking_keys
[j
] = keys
[j
];
7672 incrRefCount(keys
[j
]);
7674 /* And in the other "side", to map keys -> clients */
7675 de
= dictFind(c
->db
->blocking_keys
,keys
[j
]);
7679 /* For every key we take a list of clients blocked for it */
7681 retval
= dictAdd(c
->db
->blocking_keys
,keys
[j
],l
);
7682 incrRefCount(keys
[j
]);
7683 assert(retval
== DICT_OK
);
7685 l
= dictGetEntryVal(de
);
7687 listAddNodeTail(l
,c
);
7689 /* Mark the client as a blocked client */
7690 c
->flags
|= REDIS_BLOCKED
;
7691 server
.blpop_blocked_clients
++;
7694 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
7695 static void unblockClientWaitingData(redisClient
*c
) {
7700 assert(c
->blocking_keys
!= NULL
);
7701 /* The client may wait for multiple keys, so unblock it for every key. */
7702 for (j
= 0; j
< c
->blocking_keys_num
; j
++) {
7703 /* Remove this client from the list of clients waiting for this key. */
7704 de
= dictFind(c
->db
->blocking_keys
,c
->blocking_keys
[j
]);
7706 l
= dictGetEntryVal(de
);
7707 listDelNode(l
,listSearchKey(l
,c
));
7708 /* If the list is empty we need to remove it to avoid wasting memory */
7709 if (listLength(l
) == 0)
7710 dictDelete(c
->db
->blocking_keys
,c
->blocking_keys
[j
]);
7711 decrRefCount(c
->blocking_keys
[j
]);
7713 /* Cleanup the client structure */
7714 zfree(c
->blocking_keys
);
7715 c
->blocking_keys
= NULL
;
7716 c
->flags
&= (~REDIS_BLOCKED
);
7717 server
.blpop_blocked_clients
--;
7718 /* We want to process data if there is some command waiting
7719 * in the input buffer. Note that this is safe even if
7720 * unblockClientWaitingData() gets called from freeClient() because
7721 * freeClient() will be smart enough to call this function
7722 * *after* c->querybuf was set to NULL. */
7723 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
7726 /* This should be called from any function PUSHing into lists.
7727 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
7728 * 'ele' is the element pushed.
7730 * If the function returns 0 there was no client waiting for a list push
7733 * If the function returns 1 there was a client waiting for a list push
7734 * against this key, the element was passed to this client thus it's not
7735 * needed to actually add it to the list and the caller should return asap. */
7736 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
7737 struct dictEntry
*de
;
7738 redisClient
*receiver
;
7742 de
= dictFind(c
->db
->blocking_keys
,key
);
7743 if (de
== NULL
) return 0;
7744 l
= dictGetEntryVal(de
);
7747 receiver
= ln
->value
;
7749 addReplySds(receiver
,sdsnew("*2\r\n"));
7750 addReplyBulk(receiver
,key
);
7751 addReplyBulk(receiver
,ele
);
7752 unblockClientWaitingData(receiver
);
7756 /* Blocking RPOP/LPOP */
7757 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
7762 for (j
= 1; j
< c
->argc
-1; j
++) {
7763 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
7765 if (o
->type
!= REDIS_LIST
) {
7766 addReply(c
,shared
.wrongtypeerr
);
7769 list
*list
= o
->ptr
;
7770 if (listLength(list
) != 0) {
7771 /* If the list contains elements fall back to the usual
7772 * non-blocking POP operation */
7773 robj
*argv
[2], **orig_argv
;
7776 /* We need to alter the command arguments before to call
7777 * popGenericCommand() as the command takes a single key. */
7778 orig_argv
= c
->argv
;
7779 orig_argc
= c
->argc
;
7780 argv
[1] = c
->argv
[j
];
7784 /* Also the return value is different, we need to output
7785 * the multi bulk reply header and the key name. The
7786 * "real" command will add the last element (the value)
7787 * for us. If this souds like an hack to you it's just
7788 * because it is... */
7789 addReplySds(c
,sdsnew("*2\r\n"));
7790 addReplyBulk(c
,argv
[1]);
7791 popGenericCommand(c
,where
);
7793 /* Fix the client structure with the original stuff */
7794 c
->argv
= orig_argv
;
7795 c
->argc
= orig_argc
;
7801 /* If the list is empty or the key does not exists we must block */
7802 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
7803 if (timeout
> 0) timeout
+= time(NULL
);
7804 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
7807 static void blpopCommand(redisClient
*c
) {
7808 blockingPopGenericCommand(c
,REDIS_HEAD
);
7811 static void brpopCommand(redisClient
*c
) {
7812 blockingPopGenericCommand(c
,REDIS_TAIL
);
7815 /* =============================== Replication ============================= */
7817 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7818 ssize_t nwritten
, ret
= size
;
7819 time_t start
= time(NULL
);
7823 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
7824 nwritten
= write(fd
,ptr
,size
);
7825 if (nwritten
== -1) return -1;
7829 if ((time(NULL
)-start
) > timeout
) {
7837 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7838 ssize_t nread
, totread
= 0;
7839 time_t start
= time(NULL
);
7843 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
7844 nread
= read(fd
,ptr
,size
);
7845 if (nread
== -1) return -1;
7850 if ((time(NULL
)-start
) > timeout
) {
7858 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7865 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
7868 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
7879 static void syncCommand(redisClient
*c
) {
7880 /* ignore SYNC if aleady slave or in monitor mode */
7881 if (c
->flags
& REDIS_SLAVE
) return;
7883 /* SYNC can't be issued when the server has pending data to send to
7884 * the client about already issued commands. We need a fresh reply
7885 * buffer registering the differences between the BGSAVE and the current
7886 * dataset, so that we can copy to other slaves if needed. */
7887 if (listLength(c
->reply
) != 0) {
7888 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
7892 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
7893 /* Here we need to check if there is a background saving operation
7894 * in progress, or if it is required to start one */
7895 if (server
.bgsavechildpid
!= -1) {
7896 /* Ok a background save is in progress. Let's check if it is a good
7897 * one for replication, i.e. if there is another slave that is
7898 * registering differences since the server forked to save */
7903 listRewind(server
.slaves
,&li
);
7904 while((ln
= listNext(&li
))) {
7906 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
7909 /* Perfect, the server is already registering differences for
7910 * another slave. Set the right state, and copy the buffer. */
7911 listRelease(c
->reply
);
7912 c
->reply
= listDup(slave
->reply
);
7913 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7914 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
7916 /* No way, we need to wait for the next BGSAVE in order to
7917 * register differences */
7918 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7919 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
7922 /* Ok we don't have a BGSAVE in progress, let's start one */
7923 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
7924 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7925 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
7926 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
7929 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7932 c
->flags
|= REDIS_SLAVE
;
7934 listAddNodeTail(server
.slaves
,c
);
7938 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
7939 redisClient
*slave
= privdata
;
7941 REDIS_NOTUSED(mask
);
7942 char buf
[REDIS_IOBUF_LEN
];
7943 ssize_t nwritten
, buflen
;
7945 if (slave
->repldboff
== 0) {
7946 /* Write the bulk write count before to transfer the DB. In theory here
7947 * we don't know how much room there is in the output buffer of the
7948 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
7949 * operations) will never be smaller than the few bytes we need. */
7952 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
7954 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
7962 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
7963 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
7965 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
7966 (buflen
== 0) ? "premature EOF" : strerror(errno
));
7970 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
7971 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
7976 slave
->repldboff
+= nwritten
;
7977 if (slave
->repldboff
== slave
->repldbsize
) {
7978 close(slave
->repldbfd
);
7979 slave
->repldbfd
= -1;
7980 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7981 slave
->replstate
= REDIS_REPL_ONLINE
;
7982 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
7983 sendReplyToClient
, slave
) == AE_ERR
) {
7987 addReplySds(slave
,sdsempty());
7988 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
7992 /* This function is called at the end of every backgrond saving.
7993 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
7994 * otherwise REDIS_ERR is passed to the function.
7996 * The goal of this function is to handle slaves waiting for a successful
7997 * background saving in order to perform non-blocking synchronization. */
7998 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
8000 int startbgsave
= 0;
8003 listRewind(server
.slaves
,&li
);
8004 while((ln
= listNext(&li
))) {
8005 redisClient
*slave
= ln
->value
;
8007 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
8009 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
8010 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
8011 struct redis_stat buf
;
8013 if (bgsaveerr
!= REDIS_OK
) {
8015 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
8018 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
8019 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
8021 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
8024 slave
->repldboff
= 0;
8025 slave
->repldbsize
= buf
.st_size
;
8026 slave
->replstate
= REDIS_REPL_SEND_BULK
;
8027 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
8028 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
8035 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
8038 listRewind(server
.slaves
,&li
);
8039 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
8040 while((ln
= listNext(&li
))) {
8041 redisClient
*slave
= ln
->value
;
8043 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
8050 static int syncWithMaster(void) {
8051 char buf
[1024], tmpfile
[256], authcmd
[1024];
8053 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
8054 int dfd
, maxtries
= 5;
8057 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
8062 /* AUTH with the master if required. */
8063 if(server
.masterauth
) {
8064 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
8065 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
8067 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
8071 /* Read the AUTH result. */
8072 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
8074 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
8078 if (buf
[0] != '+') {
8080 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
8085 /* Issue the SYNC command */
8086 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
8088 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
8092 /* Read the bulk write count */
8093 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
8095 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
8099 if (buf
[0] != '$') {
8101 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
8104 dumpsize
= strtol(buf
+1,NULL
,10);
8105 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
8106 /* Read the bulk write data on a temp file */
8108 snprintf(tmpfile
,256,
8109 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
8110 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
8111 if (dfd
!= -1) break;
8116 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
8120 int nread
, nwritten
;
8122 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
8124 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
8130 nwritten
= write(dfd
,buf
,nread
);
8131 if (nwritten
== -1) {
8132 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
8140 if (rename(tmpfile
,server
.dbfilename
) == -1) {
8141 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
8147 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
8148 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
8152 server
.master
= createClient(fd
);
8153 server
.master
->flags
|= REDIS_MASTER
;
8154 server
.master
->authenticated
= 1;
8155 server
.replstate
= REDIS_REPL_CONNECTED
;
8159 static void slaveofCommand(redisClient
*c
) {
8160 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
8161 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
8162 if (server
.masterhost
) {
8163 sdsfree(server
.masterhost
);
8164 server
.masterhost
= NULL
;
8165 if (server
.master
) freeClient(server
.master
);
8166 server
.replstate
= REDIS_REPL_NONE
;
8167 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
8170 sdsfree(server
.masterhost
);
8171 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
8172 server
.masterport
= atoi(c
->argv
[2]->ptr
);
8173 if (server
.master
) freeClient(server
.master
);
8174 server
.replstate
= REDIS_REPL_CONNECT
;
8175 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
8176 server
.masterhost
, server
.masterport
);
8178 addReply(c
,shared
.ok
);
8181 /* ============================ Maxmemory directive ======================== */
8183 /* Try to free one object form the pre-allocated objects free list.
8184 * This is useful under low mem conditions as by default we take 1 million
8185 * free objects allocated. On success REDIS_OK is returned, otherwise
8187 static int tryFreeOneObjectFromFreelist(void) {
8190 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
8191 if (listLength(server
.objfreelist
)) {
8192 listNode
*head
= listFirst(server
.objfreelist
);
8193 o
= listNodeValue(head
);
8194 listDelNode(server
.objfreelist
,head
);
8195 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
8199 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
8204 /* This function gets called when 'maxmemory' is set on the config file to limit
8205 * the max memory used by the server, and we are out of memory.
8206 * This function will try to, in order:
8208 * - Free objects from the free list
8209 * - Try to remove keys with an EXPIRE set
8211 * It is not possible to free enough memory to reach used-memory < maxmemory
8212 * the server will start refusing commands that will enlarge even more the
8215 static void freeMemoryIfNeeded(void) {
8216 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
8217 int j
, k
, freed
= 0;
8219 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
8220 for (j
= 0; j
< server
.dbnum
; j
++) {
8222 robj
*minkey
= NULL
;
8223 struct dictEntry
*de
;
8225 if (dictSize(server
.db
[j
].expires
)) {
8227 /* From a sample of three keys drop the one nearest to
8228 * the natural expire */
8229 for (k
= 0; k
< 3; k
++) {
8232 de
= dictGetRandomKey(server
.db
[j
].expires
);
8233 t
= (time_t) dictGetEntryVal(de
);
8234 if (minttl
== -1 || t
< minttl
) {
8235 minkey
= dictGetEntryKey(de
);
8239 deleteKey(server
.db
+j
,minkey
);
8242 if (!freed
) return; /* nothing to free... */
8246 /* ============================== Append Only file ========================== */
8248 /* Write the append only file buffer on disk.
8250 * Since we are required to write the AOF before replying to the client,
8251 * and the only way the client socket can get a write is entering when the
8252 * the event loop, we accumulate all the AOF writes in a memory
8253 * buffer and write it on disk using this function just before entering
8254 * the event loop again. */
8255 static void flushAppendOnlyFile(void) {
8259 if (sdslen(server
.aofbuf
) == 0) return;
8261 /* We want to perform a single write. This should be guaranteed atomic
8262 * at least if the filesystem we are writing is a real physical one.
8263 * While this will save us against the server being killed I don't think
8264 * there is much to do about the whole server stopping for power problems
8266 nwritten
= write(server
.appendfd
,server
.aofbuf
,sdslen(server
.aofbuf
));
8267 if (nwritten
!= (signed)sdslen(server
.aofbuf
)) {
8268 /* Ooops, we are in troubles. The best thing to do for now is
8269 * aborting instead of giving the illusion that everything is
8270 * working as expected. */
8271 if (nwritten
== -1) {
8272 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
8274 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
8278 sdsfree(server
.aofbuf
);
8279 server
.aofbuf
= sdsempty();
8281 /* Don't Fsync if no-appendfsync-on-rewrite is set to yes and we have
8282 * childs performing heavy I/O on disk. */
8283 if (server
.no_appendfsync_on_rewrite
&&
8284 (server
.bgrewritechildpid
!= -1 || server
.bgsavechildpid
!= -1))
8286 /* Fsync if needed */
8288 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
8289 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
8290 now
-server
.lastfsync
> 1))
8292 /* aof_fsync is defined as fdatasync() for Linux in order to avoid
8293 * flushing metadata. */
8294 aof_fsync(server
.appendfd
); /* Let's try to get this data on the disk */
8295 server
.lastfsync
= now
;
8299 static sds
catAppendOnlyGenericCommand(sds buf
, int argc
, robj
**argv
) {
8301 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
8302 for (j
= 0; j
< argc
; j
++) {
8303 robj
*o
= getDecodedObject(argv
[j
]);
8304 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
8305 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
8306 buf
= sdscatlen(buf
,"\r\n",2);
8312 static sds
catAppendOnlyExpireAtCommand(sds buf
, robj
*key
, robj
*seconds
) {
8317 /* Make sure we can use strtol */
8318 seconds
= getDecodedObject(seconds
);
8319 when
= time(NULL
)+strtol(seconds
->ptr
,NULL
,10);
8320 decrRefCount(seconds
);
8322 argv
[0] = createStringObject("EXPIREAT",8);
8324 argv
[2] = createObject(REDIS_STRING
,
8325 sdscatprintf(sdsempty(),"%ld",when
));
8326 buf
= catAppendOnlyGenericCommand(buf
, argc
, argv
);
8327 decrRefCount(argv
[0]);
8328 decrRefCount(argv
[2]);
8332 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
8333 sds buf
= sdsempty();
8336 /* The DB this command was targetting is not the same as the last command
8337 * we appendend. To issue a SELECT command is needed. */
8338 if (dictid
!= server
.appendseldb
) {
8341 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
8342 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
8343 (unsigned long)strlen(seldb
),seldb
);
8344 server
.appendseldb
= dictid
;
8347 if (cmd
->proc
== expireCommand
) {
8348 /* Translate EXPIRE into EXPIREAT */
8349 buf
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]);
8350 } else if (cmd
->proc
== setexCommand
) {
8351 /* Translate SETEX to SET and EXPIREAT */
8352 tmpargv
[0] = createStringObject("SET",3);
8353 tmpargv
[1] = argv
[1];
8354 tmpargv
[2] = argv
[3];
8355 buf
= catAppendOnlyGenericCommand(buf
,3,tmpargv
);
8356 decrRefCount(tmpargv
[0]);
8357 buf
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]);
8359 buf
= catAppendOnlyGenericCommand(buf
,argc
,argv
);
8362 /* Append to the AOF buffer. This will be flushed on disk just before
8363 * of re-entering the event loop, so before the client will get a
8364 * positive reply about the operation performed. */
8365 server
.aofbuf
= sdscatlen(server
.aofbuf
,buf
,sdslen(buf
));
8367 /* If a background append only file rewriting is in progress we want to
8368 * accumulate the differences between the child DB and the current one
8369 * in a buffer, so that when the child process will do its work we
8370 * can append the differences to the new append only file. */
8371 if (server
.bgrewritechildpid
!= -1)
8372 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
8377 /* In Redis commands are always executed in the context of a client, so in
8378 * order to load the append only file we need to create a fake client. */
8379 static struct redisClient
*createFakeClient(void) {
8380 struct redisClient
*c
= zmalloc(sizeof(*c
));
8384 c
->querybuf
= sdsempty();
8388 /* We set the fake client as a slave waiting for the synchronization
8389 * so that Redis will not try to send replies to this client. */
8390 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
8391 c
->reply
= listCreate();
8392 listSetFreeMethod(c
->reply
,decrRefCount
);
8393 listSetDupMethod(c
->reply
,dupClientReplyValue
);
8394 initClientMultiState(c
);
8398 static void freeFakeClient(struct redisClient
*c
) {
8399 sdsfree(c
->querybuf
);
8400 listRelease(c
->reply
);
8401 freeClientMultiState(c
);
8405 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
8406 * error (the append only file is zero-length) REDIS_ERR is returned. On
8407 * fatal error an error message is logged and the program exists. */
8408 int loadAppendOnlyFile(char *filename
) {
8409 struct redisClient
*fakeClient
;
8410 FILE *fp
= fopen(filename
,"r");
8411 struct redis_stat sb
;
8412 unsigned long long loadedkeys
= 0;
8413 int appendonly
= server
.appendonly
;
8415 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
8419 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
8423 /* Temporarily disable AOF, to prevent EXEC from feeding a MULTI
8424 * to the same file we're about to read. */
8425 server
.appendonly
= 0;
8427 fakeClient
= createFakeClient();
8434 struct redisCommand
*cmd
;
8436 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
8442 if (buf
[0] != '*') goto fmterr
;
8444 argv
= zmalloc(sizeof(robj
*)*argc
);
8445 for (j
= 0; j
< argc
; j
++) {
8446 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
8447 if (buf
[0] != '$') goto fmterr
;
8448 len
= strtol(buf
+1,NULL
,10);
8449 argsds
= sdsnewlen(NULL
,len
);
8450 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
8451 argv
[j
] = createObject(REDIS_STRING
,argsds
);
8452 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
8455 /* Command lookup */
8456 cmd
= lookupCommand(argv
[0]->ptr
);
8458 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
8461 /* Try object encoding */
8462 if (cmd
->flags
& REDIS_CMD_BULK
)
8463 argv
[argc
-1] = tryObjectEncoding(argv
[argc
-1]);
8464 /* Run the command in the context of a fake client */
8465 fakeClient
->argc
= argc
;
8466 fakeClient
->argv
= argv
;
8467 cmd
->proc(fakeClient
);
8468 /* Discard the reply objects list from the fake client */
8469 while(listLength(fakeClient
->reply
))
8470 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
8471 /* Clean up, ready for the next command */
8472 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
8474 /* Handle swapping while loading big datasets when VM is on */
8476 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
8477 while (zmalloc_used_memory() > server
.vm_max_memory
) {
8478 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
8483 /* This point can only be reached when EOF is reached without errors.
8484 * If the client is in the middle of a MULTI/EXEC, log error and quit. */
8485 if (fakeClient
->flags
& REDIS_MULTI
) goto readerr
;
8488 freeFakeClient(fakeClient
);
8489 server
.appendonly
= appendonly
;
8494 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
8496 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
8500 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
8504 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
8505 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
8509 /* Avoid the incr/decr ref count business if possible to help
8510 * copy-on-write (we are often in a child process when this function
8512 * Also makes sure that key objects don't get incrRefCount-ed when VM
8514 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
8515 obj
= getDecodedObject(obj
);
8518 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
));
8519 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
;
8520 if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0)
8522 if (fwrite("\r\n",2,1,fp
) == 0) goto err
;
8523 if (decrrc
) decrRefCount(obj
);
8526 if (decrrc
) decrRefCount(obj
);
8530 /* Write binary-safe string into a file in the bulkformat
8531 * $<count>\r\n<payload>\r\n */
8532 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
8535 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
);
8536 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8537 if (len
&& fwrite(s
,len
,1,fp
) == 0) return 0;
8538 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
8542 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
8543 static int fwriteBulkDouble(FILE *fp
, double d
) {
8544 char buf
[128], dbuf
[128];
8546 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
8547 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
8548 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8549 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
8553 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
8554 static int fwriteBulkLong(FILE *fp
, long l
) {
8555 char buf
[128], lbuf
[128];
8557 snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
);
8558 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2);
8559 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8560 if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0;
8564 /* Write a sequence of commands able to fully rebuild the dataset into
8565 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
8566 static int rewriteAppendOnlyFile(char *filename
) {
8567 dictIterator
*di
= NULL
;
8572 time_t now
= time(NULL
);
8574 /* Note that we have to use a different temp name here compared to the
8575 * one used by rewriteAppendOnlyFileBackground() function. */
8576 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
8577 fp
= fopen(tmpfile
,"w");
8579 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
8582 for (j
= 0; j
< server
.dbnum
; j
++) {
8583 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
8584 redisDb
*db
= server
.db
+j
;
8586 if (dictSize(d
) == 0) continue;
8587 di
= dictGetIterator(d
);
8593 /* SELECT the new DB */
8594 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
8595 if (fwriteBulkLong(fp
,j
) == 0) goto werr
;
8597 /* Iterate this DB writing every entry */
8598 while((de
= dictNext(di
)) != NULL
) {
8603 key
= dictGetEntryKey(de
);
8604 /* If the value for this key is swapped, load a preview in memory.
8605 * We use a "swapped" flag to remember if we need to free the
8606 * value object instead to just increment the ref count anyway
8607 * in order to avoid copy-on-write of pages if we are forked() */
8608 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
8609 key
->storage
== REDIS_VM_SWAPPING
) {
8610 o
= dictGetEntryVal(de
);
8613 o
= vmPreviewObject(key
);
8616 expiretime
= getExpire(db
,key
);
8618 /* Save the key and associated value */
8619 if (o
->type
== REDIS_STRING
) {
8620 /* Emit a SET command */
8621 char cmd
[]="*3\r\n$3\r\nSET\r\n";
8622 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8624 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8625 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
8626 } else if (o
->type
== REDIS_LIST
) {
8627 /* Emit the RPUSHes needed to rebuild the list */
8628 list
*list
= o
->ptr
;
8632 listRewind(list
,&li
);
8633 while((ln
= listNext(&li
))) {
8634 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
8635 robj
*eleobj
= listNodeValue(ln
);
8637 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8638 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8639 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8641 } else if (o
->type
== REDIS_SET
) {
8642 /* Emit the SADDs needed to rebuild the set */
8644 dictIterator
*di
= dictGetIterator(set
);
8647 while((de
= dictNext(di
)) != NULL
) {
8648 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
8649 robj
*eleobj
= dictGetEntryKey(de
);
8651 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8652 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8653 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8655 dictReleaseIterator(di
);
8656 } else if (o
->type
== REDIS_ZSET
) {
8657 /* Emit the ZADDs needed to rebuild the sorted set */
8659 dictIterator
*di
= dictGetIterator(zs
->dict
);
8662 while((de
= dictNext(di
)) != NULL
) {
8663 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
8664 robj
*eleobj
= dictGetEntryKey(de
);
8665 double *score
= dictGetEntryVal(de
);
8667 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8668 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8669 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
8670 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8672 dictReleaseIterator(di
);
8673 } else if (o
->type
== REDIS_HASH
) {
8674 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
8676 /* Emit the HSETs needed to rebuild the hash */
8677 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8678 unsigned char *p
= zipmapRewind(o
->ptr
);
8679 unsigned char *field
, *val
;
8680 unsigned int flen
, vlen
;
8682 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
8683 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8684 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8685 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
8687 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
8691 dictIterator
*di
= dictGetIterator(o
->ptr
);
8694 while((de
= dictNext(di
)) != NULL
) {
8695 robj
*field
= dictGetEntryKey(de
);
8696 robj
*val
= dictGetEntryVal(de
);
8698 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8699 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8700 if (fwriteBulkObject(fp
,field
) == -1) return -1;
8701 if (fwriteBulkObject(fp
,val
) == -1) return -1;
8703 dictReleaseIterator(di
);
8706 redisPanic("Unknown object type");
8708 /* Save the expire time */
8709 if (expiretime
!= -1) {
8710 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
8711 /* If this key is already expired skip it */
8712 if (expiretime
< now
) continue;
8713 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8714 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8715 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
;
8717 if (swapped
) decrRefCount(o
);
8719 dictReleaseIterator(di
);
8722 /* Make sure data will not remain on the OS's output buffers */
8724 aof_fsync(fileno(fp
));
8727 /* Use RENAME to make sure the DB file is changed atomically only
8728 * if the generate DB file is ok. */
8729 if (rename(tmpfile
,filename
) == -1) {
8730 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
8734 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
8740 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
8741 if (di
) dictReleaseIterator(di
);
8745 /* This is how rewriting of the append only file in background works:
8747 * 1) The user calls BGREWRITEAOF
8748 * 2) Redis calls this function, that forks():
8749 * 2a) the child rewrite the append only file in a temp file.
8750 * 2b) the parent accumulates differences in server.bgrewritebuf.
8751 * 3) When the child finished '2a' exists.
8752 * 4) The parent will trap the exit code, if it's OK, will append the
8753 * data accumulated into server.bgrewritebuf into the temp file, and
8754 * finally will rename(2) the temp file in the actual file name.
8755 * The the new file is reopened as the new append only file. Profit!
8757 static int rewriteAppendOnlyFileBackground(void) {
8760 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
8761 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
8762 if ((childpid
= fork()) == 0) {
8766 if (server
.vm_enabled
) vmReopenSwapFile();
8768 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
8769 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
8776 if (childpid
== -1) {
8777 redisLog(REDIS_WARNING
,
8778 "Can't rewrite append only file in background: fork: %s",
8782 redisLog(REDIS_NOTICE
,
8783 "Background append only file rewriting started by pid %d",childpid
);
8784 server
.bgrewritechildpid
= childpid
;
8785 updateDictResizePolicy();
8786 /* We set appendseldb to -1 in order to force the next call to the
8787 * feedAppendOnlyFile() to issue a SELECT command, so the differences
8788 * accumulated by the parent into server.bgrewritebuf will start
8789 * with a SELECT statement and it will be safe to merge. */
8790 server
.appendseldb
= -1;
8793 return REDIS_OK
; /* unreached */
8796 static void bgrewriteaofCommand(redisClient
*c
) {
8797 if (server
.bgrewritechildpid
!= -1) {
8798 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
8801 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
8802 char *status
= "+Background append only file rewriting started\r\n";
8803 addReplySds(c
,sdsnew(status
));
8805 addReply(c
,shared
.err
);
8809 static void aofRemoveTempFile(pid_t childpid
) {
8812 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
8816 /* Virtual Memory is composed mainly of two subsystems:
8817 * - Blocking Virutal Memory
8818 * - Threaded Virtual Memory I/O
8819 * The two parts are not fully decoupled, but functions are split among two
8820 * different sections of the source code (delimited by comments) in order to
8821 * make more clear what functionality is about the blocking VM and what about
8822 * the threaded (not blocking) VM.
8826 * Redis VM is a blocking VM (one that blocks reading swapped values from
8827 * disk into memory when a value swapped out is needed in memory) that is made
8828 * unblocking by trying to examine the command argument vector in order to
8829 * load in background values that will likely be needed in order to exec
8830 * the command. The command is executed only once all the relevant keys
8831 * are loaded into memory.
8833 * This basically is almost as simple of a blocking VM, but almost as parallel
8834 * as a fully non-blocking VM.
8837 /* Called when the user switches from "appendonly yes" to "appendonly no"
8838 * at runtime using the CONFIG command. */
8839 static void stopAppendOnly(void) {
8840 flushAppendOnlyFile();
8841 aof_fsync(server
.appendfd
);
8842 close(server
.appendfd
);
8844 server
.appendfd
= -1;
8845 server
.appendseldb
= -1;
8846 server
.appendonly
= 0;
8847 /* rewrite operation in progress? kill it, wait child exit */
8848 if (server
.bgsavechildpid
!= -1) {
8851 if (kill(server
.bgsavechildpid
,SIGKILL
) != -1)
8852 wait3(&statloc
,0,NULL
);
8853 /* reset the buffer accumulating changes while the child saves */
8854 sdsfree(server
.bgrewritebuf
);
8855 server
.bgrewritebuf
= sdsempty();
8856 server
.bgsavechildpid
= -1;
8860 /* Called when the user switches from "appendonly no" to "appendonly yes"
8861 * at runtime using the CONFIG command. */
8862 static int startAppendOnly(void) {
8863 server
.appendonly
= 1;
8864 server
.lastfsync
= time(NULL
);
8865 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
8866 if (server
.appendfd
== -1) {
8867 redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, but I can't open the AOF file: %s",strerror(errno
));
8870 if (rewriteAppendOnlyFileBackground() == REDIS_ERR
) {
8871 server
.appendonly
= 0;
8872 close(server
.appendfd
);
8873 redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, I can't trigger a background AOF rewrite operation. Check the above logs for more info about the error.",strerror(errno
));
8879 /* =================== Virtual Memory - Blocking Side ====================== */
8881 static void vmInit(void) {
8887 if (server
.vm_max_threads
!= 0)
8888 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
8890 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
8891 /* Try to open the old swap file, otherwise create it */
8892 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
8893 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
8895 if (server
.vm_fp
== NULL
) {
8896 redisLog(REDIS_WARNING
,
8897 "Can't open the swap file: %s. Exiting.",
8901 server
.vm_fd
= fileno(server
.vm_fp
);
8902 /* Lock the swap file for writing, this is useful in order to avoid
8903 * another instance to use the same swap file for a config error. */
8904 fl
.l_type
= F_WRLCK
;
8905 fl
.l_whence
= SEEK_SET
;
8906 fl
.l_start
= fl
.l_len
= 0;
8907 if (fcntl(server
.vm_fd
,F_SETLK
,&fl
) == -1) {
8908 redisLog(REDIS_WARNING
,
8909 "Can't lock the swap file at '%s': %s. Make sure it is not used by another Redis instance.", server
.vm_swap_file
, strerror(errno
));
8913 server
.vm_next_page
= 0;
8914 server
.vm_near_pages
= 0;
8915 server
.vm_stats_used_pages
= 0;
8916 server
.vm_stats_swapped_objects
= 0;
8917 server
.vm_stats_swapouts
= 0;
8918 server
.vm_stats_swapins
= 0;
8919 totsize
= server
.vm_pages
*server
.vm_page_size
;
8920 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
8921 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
8922 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
8926 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
8928 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
8929 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
8930 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
8931 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
8933 /* Initialize threaded I/O (used by Virtual Memory) */
8934 server
.io_newjobs
= listCreate();
8935 server
.io_processing
= listCreate();
8936 server
.io_processed
= listCreate();
8937 server
.io_ready_clients
= listCreate();
8938 pthread_mutex_init(&server
.io_mutex
,NULL
);
8939 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
8940 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
8941 server
.io_active_threads
= 0;
8942 if (pipe(pipefds
) == -1) {
8943 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
8947 server
.io_ready_pipe_read
= pipefds
[0];
8948 server
.io_ready_pipe_write
= pipefds
[1];
8949 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
8950 /* LZF requires a lot of stack */
8951 pthread_attr_init(&server
.io_threads_attr
);
8952 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
8953 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
8954 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
8955 /* Listen for events in the threaded I/O pipe */
8956 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
8957 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
8958 oom("creating file event");
8961 /* Mark the page as used */
8962 static void vmMarkPageUsed(off_t page
) {
8963 off_t byte
= page
/8;
8965 redisAssert(vmFreePage(page
) == 1);
8966 server
.vm_bitmap
[byte
] |= 1<<bit
;
8969 /* Mark N contiguous pages as used, with 'page' being the first. */
8970 static void vmMarkPagesUsed(off_t page
, off_t count
) {
8973 for (j
= 0; j
< count
; j
++)
8974 vmMarkPageUsed(page
+j
);
8975 server
.vm_stats_used_pages
+= count
;
8976 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
8977 (long long)count
, (long long)page
);
8980 /* Mark the page as free */
8981 static void vmMarkPageFree(off_t page
) {
8982 off_t byte
= page
/8;
8984 redisAssert(vmFreePage(page
) == 0);
8985 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
8988 /* Mark N contiguous pages as free, with 'page' being the first. */
8989 static void vmMarkPagesFree(off_t page
, off_t count
) {
8992 for (j
= 0; j
< count
; j
++)
8993 vmMarkPageFree(page
+j
);
8994 server
.vm_stats_used_pages
-= count
;
8995 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
8996 (long long)count
, (long long)page
);
8999 /* Test if the page is free */
9000 static int vmFreePage(off_t page
) {
9001 off_t byte
= page
/8;
9003 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
9006 /* Find N contiguous free pages storing the first page of the cluster in *first.
9007 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
9008 * REDIS_ERR is returned.
9010 * This function uses a simple algorithm: we try to allocate
9011 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
9012 * again from the start of the swap file searching for free spaces.
9014 * If it looks pretty clear that there are no free pages near our offset
9015 * we try to find less populated places doing a forward jump of
9016 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
9017 * without hurry, and then we jump again and so forth...
9019 * This function can be improved using a free list to avoid to guess
9020 * too much, since we could collect data about freed pages.
9022 * note: I implemented this function just after watching an episode of
9023 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
9025 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
9026 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
9028 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
9029 server
.vm_near_pages
= 0;
9030 server
.vm_next_page
= 0;
9032 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
9033 base
= server
.vm_next_page
;
9035 while(offset
< server
.vm_pages
) {
9036 off_t
this = base
+offset
;
9038 /* If we overflow, restart from page zero */
9039 if (this >= server
.vm_pages
) {
9040 this -= server
.vm_pages
;
9042 /* Just overflowed, what we found on tail is no longer
9043 * interesting, as it's no longer contiguous. */
9047 if (vmFreePage(this)) {
9048 /* This is a free page */
9050 /* Already got N free pages? Return to the caller, with success */
9052 *first
= this-(n
-1);
9053 server
.vm_next_page
= this+1;
9054 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
9058 /* The current one is not a free page */
9062 /* Fast-forward if the current page is not free and we already
9063 * searched enough near this place. */
9065 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
9066 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
9068 /* Note that even if we rewind after the jump, we are don't need
9069 * to make sure numfree is set to zero as we only jump *if* it
9070 * is set to zero. */
9072 /* Otherwise just check the next page */
9079 /* Write the specified object at the specified page of the swap file */
9080 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
9081 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
9082 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
9083 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9084 redisLog(REDIS_WARNING
,
9085 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
9089 rdbSaveObject(server
.vm_fp
,o
);
9090 fflush(server
.vm_fp
);
9091 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9095 /* Swap the 'val' object relative to 'key' into disk. Store all the information
9096 * needed to later retrieve the object into the key object.
9097 * If we can't find enough contiguous empty pages to swap the object on disk
9098 * REDIS_ERR is returned. */
9099 static int vmSwapObjectBlocking(robj
*key
, robj
*val
) {
9100 off_t pages
= rdbSavedObjectPages(val
,NULL
);
9103 assert(key
->storage
== REDIS_VM_MEMORY
);
9104 assert(key
->refcount
== 1);
9105 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
;
9106 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
;
9107 key
->vm
.page
= page
;
9108 key
->vm
.usedpages
= pages
;
9109 key
->storage
= REDIS_VM_SWAPPED
;
9110 key
->vtype
= val
->type
;
9111 decrRefCount(val
); /* Deallocate the object from memory. */
9112 vmMarkPagesUsed(page
,pages
);
9113 redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)",
9114 (unsigned char*) key
->ptr
,
9115 (unsigned long long) page
, (unsigned long long) pages
);
9116 server
.vm_stats_swapped_objects
++;
9117 server
.vm_stats_swapouts
++;
9121 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
9124 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
9125 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
9126 redisLog(REDIS_WARNING
,
9127 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
9131 o
= rdbLoadObject(type
,server
.vm_fp
);
9133 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
9136 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9140 /* Load the value object relative to the 'key' object from swap to memory.
9141 * The newly allocated object is returned.
9143 * If preview is true the unserialized object is returned to the caller but
9144 * no changes are made to the key object, nor the pages are marked as freed */
9145 static robj
*vmGenericLoadObject(robj
*key
, int preview
) {
9148 redisAssert(key
->storage
== REDIS_VM_SWAPPED
|| key
->storage
== REDIS_VM_LOADING
);
9149 val
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
);
9151 key
->storage
= REDIS_VM_MEMORY
;
9152 key
->vm
.atime
= server
.unixtime
;
9153 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
9154 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk",
9155 (unsigned char*) key
->ptr
);
9156 server
.vm_stats_swapped_objects
--;
9158 redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk",
9159 (unsigned char*) key
->ptr
);
9161 server
.vm_stats_swapins
++;
9165 /* Plain object loading, from swap to memory */
9166 static robj
*vmLoadObject(robj
*key
) {
9167 /* If we are loading the object in background, stop it, we
9168 * need to load this object synchronously ASAP. */
9169 if (key
->storage
== REDIS_VM_LOADING
)
9170 vmCancelThreadedIOJob(key
);
9171 return vmGenericLoadObject(key
,0);
9174 /* Just load the value on disk, without to modify the key.
9175 * This is useful when we want to perform some operation on the value
9176 * without to really bring it from swap to memory, like while saving the
9177 * dataset or rewriting the append only log. */
9178 static robj
*vmPreviewObject(robj
*key
) {
9179 return vmGenericLoadObject(key
,1);
9182 /* How a good candidate is this object for swapping?
9183 * The better candidate it is, the greater the returned value.
9185 * Currently we try to perform a fast estimation of the object size in
9186 * memory, and combine it with aging informations.
9188 * Basically swappability = idle-time * log(estimated size)
9190 * Bigger objects are preferred over smaller objects, but not
9191 * proportionally, this is why we use the logarithm. This algorithm is
9192 * just a first try and will probably be tuned later. */
9193 static double computeObjectSwappability(robj
*o
) {
9194 time_t age
= server
.unixtime
- o
->vm
.atime
;
9198 struct dictEntry
*de
;
9201 if (age
<= 0) return 0;
9204 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
9207 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
9212 listNode
*ln
= listFirst(l
);
9214 asize
= sizeof(list
);
9216 robj
*ele
= ln
->value
;
9219 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9220 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9222 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
9227 z
= (o
->type
== REDIS_ZSET
);
9228 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
9230 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
9231 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
9236 de
= dictGetRandomKey(d
);
9237 ele
= dictGetEntryKey(de
);
9238 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9239 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9241 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
9242 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
9246 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
9247 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
9248 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
9249 unsigned int klen
, vlen
;
9250 unsigned char *key
, *val
;
9252 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
9256 asize
= len
*(klen
+vlen
+3);
9257 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
9259 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
9264 de
= dictGetRandomKey(d
);
9265 ele
= dictGetEntryKey(de
);
9266 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9267 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9269 ele
= dictGetEntryVal(de
);
9270 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9271 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9273 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
9278 return (double)age
*log(1+asize
);
9281 /* Try to swap an object that's a good candidate for swapping.
9282 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
9283 * to swap any object at all.
9285 * If 'usethreaded' is true, Redis will try to swap the object in background
9286 * using I/O threads. */
9287 static int vmSwapOneObject(int usethreads
) {
9289 struct dictEntry
*best
= NULL
;
9290 double best_swappability
= 0;
9291 redisDb
*best_db
= NULL
;
9294 for (j
= 0; j
< server
.dbnum
; j
++) {
9295 redisDb
*db
= server
.db
+j
;
9296 /* Why maxtries is set to 100?
9297 * Because this way (usually) we'll find 1 object even if just 1% - 2%
9298 * are swappable objects */
9301 if (dictSize(db
->dict
) == 0) continue;
9302 for (i
= 0; i
< 5; i
++) {
9304 double swappability
;
9306 if (maxtries
) maxtries
--;
9307 de
= dictGetRandomKey(db
->dict
);
9308 key
= dictGetEntryKey(de
);
9309 val
= dictGetEntryVal(de
);
9310 /* Only swap objects that are currently in memory.
9312 * Also don't swap shared objects if threaded VM is on, as we
9313 * try to ensure that the main thread does not touch the
9314 * object while the I/O thread is using it, but we can't
9315 * control other keys without adding additional mutex. */
9316 if (key
->storage
!= REDIS_VM_MEMORY
||
9317 (server
.vm_max_threads
!= 0 && val
->refcount
!= 1)) {
9318 if (maxtries
) i
--; /* don't count this try */
9321 swappability
= computeObjectSwappability(val
);
9322 if (!best
|| swappability
> best_swappability
) {
9324 best_swappability
= swappability
;
9329 if (best
== NULL
) return REDIS_ERR
;
9330 key
= dictGetEntryKey(best
);
9331 val
= dictGetEntryVal(best
);
9333 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
9334 key
->ptr
, best_swappability
);
9336 /* Unshare the key if needed */
9337 if (key
->refcount
> 1) {
9338 robj
*newkey
= dupStringObject(key
);
9340 key
= dictGetEntryKey(best
) = newkey
;
9344 vmSwapObjectThreaded(key
,val
,best_db
);
9347 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
9348 dictGetEntryVal(best
) = NULL
;
9356 static int vmSwapOneObjectBlocking() {
9357 return vmSwapOneObject(0);
9360 static int vmSwapOneObjectThreaded() {
9361 return vmSwapOneObject(1);
9364 /* Return true if it's safe to swap out objects in a given moment.
9365 * Basically we don't want to swap objects out while there is a BGSAVE
9366 * or a BGAEOREWRITE running in backgroud. */
9367 static int vmCanSwapOut(void) {
9368 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
9371 /* Delete a key if swapped. Returns 1 if the key was found, was swapped
9372 * and was deleted. Otherwise 0 is returned. */
9373 static int deleteIfSwapped(redisDb
*db
, robj
*key
) {
9377 if ((de
= dictFind(db
->dict
,key
)) == NULL
) return 0;
9378 foundkey
= dictGetEntryKey(de
);
9379 if (foundkey
->storage
== REDIS_VM_MEMORY
) return 0;
9384 /* =================== Virtual Memory - Threaded I/O ======================= */
9386 static void freeIOJob(iojob
*j
) {
9387 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
9388 j
->type
== REDIS_IOJOB_DO_SWAP
||
9389 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
9390 decrRefCount(j
->val
);
9391 /* We don't decrRefCount the j->key field as we did't incremented
9392 * the count creating IO Jobs. This is because the key field here is
9393 * just used as an indentifier and if a key is removed the Job should
9394 * never be touched again. */
9398 /* Every time a thread finished a Job, it writes a byte into the write side
9399 * of an unix pipe in order to "awake" the main thread, and this function
9401 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
9405 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
9407 REDIS_NOTUSED(mask
);
9408 REDIS_NOTUSED(privdata
);
9410 /* For every byte we read in the read side of the pipe, there is one
9411 * I/O job completed to process. */
9412 while((retval
= read(fd
,buf
,1)) == 1) {
9416 struct dictEntry
*de
;
9418 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
9420 /* Get the processed element (the oldest one) */
9422 assert(listLength(server
.io_processed
) != 0);
9423 if (toprocess
== -1) {
9424 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
9425 if (toprocess
<= 0) toprocess
= 1;
9427 ln
= listFirst(server
.io_processed
);
9429 listDelNode(server
.io_processed
,ln
);
9431 /* If this job is marked as canceled, just ignore it */
9436 /* Post process it in the main thread, as there are things we
9437 * can do just here to avoid race conditions and/or invasive locks */
9438 redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
);
9439 de
= dictFind(j
->db
->dict
,j
->key
);
9441 key
= dictGetEntryKey(de
);
9442 if (j
->type
== REDIS_IOJOB_LOAD
) {
9445 /* Key loaded, bring it at home */
9446 key
->storage
= REDIS_VM_MEMORY
;
9447 key
->vm
.atime
= server
.unixtime
;
9448 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
9449 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
9450 (unsigned char*) key
->ptr
);
9451 server
.vm_stats_swapped_objects
--;
9452 server
.vm_stats_swapins
++;
9453 dictGetEntryVal(de
) = j
->val
;
9454 incrRefCount(j
->val
);
9457 /* Handle clients waiting for this key to be loaded. */
9458 handleClientsBlockedOnSwappedKey(db
,key
);
9459 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9460 /* Now we know the amount of pages required to swap this object.
9461 * Let's find some space for it, and queue this task again
9462 * rebranded as REDIS_IOJOB_DO_SWAP. */
9463 if (!vmCanSwapOut() ||
9464 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
9466 /* Ooops... no space or we can't swap as there is
9467 * a fork()ed Redis trying to save stuff on disk. */
9469 key
->storage
= REDIS_VM_MEMORY
; /* undo operation */
9471 /* Note that we need to mark this pages as used now,
9472 * if the job will be canceled, we'll mark them as freed
9474 vmMarkPagesUsed(j
->page
,j
->pages
);
9475 j
->type
= REDIS_IOJOB_DO_SWAP
;
9480 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9483 /* Key swapped. We can finally free some memory. */
9484 if (key
->storage
!= REDIS_VM_SWAPPING
) {
9485 printf("key->storage: %d\n",key
->storage
);
9486 printf("key->name: %s\n",(char*)key
->ptr
);
9487 printf("key->refcount: %d\n",key
->refcount
);
9488 printf("val: %p\n",(void*)j
->val
);
9489 printf("val->type: %d\n",j
->val
->type
);
9490 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
9492 redisAssert(key
->storage
== REDIS_VM_SWAPPING
);
9493 val
= dictGetEntryVal(de
);
9494 key
->vm
.page
= j
->page
;
9495 key
->vm
.usedpages
= j
->pages
;
9496 key
->storage
= REDIS_VM_SWAPPED
;
9497 key
->vtype
= j
->val
->type
;
9498 decrRefCount(val
); /* Deallocate the object from memory. */
9499 dictGetEntryVal(de
) = NULL
;
9500 redisLog(REDIS_DEBUG
,
9501 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
9502 (unsigned char*) key
->ptr
,
9503 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
9504 server
.vm_stats_swapped_objects
++;
9505 server
.vm_stats_swapouts
++;
9507 /* Put a few more swap requests in queue if we are still
9509 if (trytoswap
&& vmCanSwapOut() &&
9510 zmalloc_used_memory() > server
.vm_max_memory
)
9515 more
= listLength(server
.io_newjobs
) <
9516 (unsigned) server
.vm_max_threads
;
9518 /* Don't waste CPU time if swappable objects are rare. */
9519 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
9527 if (processed
== toprocess
) return;
9529 if (retval
< 0 && errno
!= EAGAIN
) {
9530 redisLog(REDIS_WARNING
,
9531 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
9536 static void lockThreadedIO(void) {
9537 pthread_mutex_lock(&server
.io_mutex
);
9540 static void unlockThreadedIO(void) {
9541 pthread_mutex_unlock(&server
.io_mutex
);
9544 /* Remove the specified object from the threaded I/O queue if still not
9545 * processed, otherwise make sure to flag it as canceled. */
9546 static void vmCancelThreadedIOJob(robj
*o
) {
9548 server
.io_newjobs
, /* 0 */
9549 server
.io_processing
, /* 1 */
9550 server
.io_processed
/* 2 */
9554 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
9557 /* Search for a matching key in one of the queues */
9558 for (i
= 0; i
< 3; i
++) {
9562 listRewind(lists
[i
],&li
);
9563 while ((ln
= listNext(&li
)) != NULL
) {
9564 iojob
*job
= ln
->value
;
9566 if (job
->canceled
) continue; /* Skip this, already canceled. */
9567 if (job
->key
== o
) {
9568 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n",
9569 (void*)job
, (char*)o
->ptr
, job
->type
, i
);
9570 /* Mark the pages as free since the swap didn't happened
9571 * or happened but is now discarded. */
9572 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
9573 vmMarkPagesFree(job
->page
,job
->pages
);
9574 /* Cancel the job. It depends on the list the job is
9577 case 0: /* io_newjobs */
9578 /* If the job was yet not processed the best thing to do
9579 * is to remove it from the queue at all */
9581 listDelNode(lists
[i
],ln
);
9583 case 1: /* io_processing */
9584 /* Oh Shi- the thread is messing with the Job:
9586 * Probably it's accessing the object if this is a
9587 * PREPARE_SWAP or DO_SWAP job.
9588 * If it's a LOAD job it may be reading from disk and
9589 * if we don't wait for the job to terminate before to
9590 * cancel it, maybe in a few microseconds data can be
9591 * corrupted in this pages. So the short story is:
9593 * Better to wait for the job to move into the
9594 * next queue (processed)... */
9596 /* We try again and again until the job is completed. */
9598 /* But let's wait some time for the I/O thread
9599 * to finish with this job. After all this condition
9600 * should be very rare. */
9603 case 2: /* io_processed */
9604 /* The job was already processed, that's easy...
9605 * just mark it as canceled so that we'll ignore it
9606 * when processing completed jobs. */
9610 /* Finally we have to adjust the storage type of the object
9611 * in order to "UNDO" the operaiton. */
9612 if (o
->storage
== REDIS_VM_LOADING
)
9613 o
->storage
= REDIS_VM_SWAPPED
;
9614 else if (o
->storage
== REDIS_VM_SWAPPING
)
9615 o
->storage
= REDIS_VM_MEMORY
;
9622 assert(1 != 1); /* We should never reach this */
9625 static void *IOThreadEntryPoint(void *arg
) {
9630 pthread_detach(pthread_self());
9632 /* Get a new job to process */
9634 if (listLength(server
.io_newjobs
) == 0) {
9635 /* No new jobs in queue, exit. */
9636 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
9637 (long) pthread_self());
9638 server
.io_active_threads
--;
9642 ln
= listFirst(server
.io_newjobs
);
9644 listDelNode(server
.io_newjobs
,ln
);
9645 /* Add the job in the processing queue */
9646 j
->thread
= pthread_self();
9647 listAddNodeTail(server
.io_processing
,j
);
9648 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
9650 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
9651 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
9653 /* Process the Job */
9654 if (j
->type
== REDIS_IOJOB_LOAD
) {
9655 j
->val
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
);
9656 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9657 FILE *fp
= fopen("/dev/null","w+");
9658 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
9660 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9661 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
9665 /* Done: insert the job into the processed queue */
9666 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
9667 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
9669 listDelNode(server
.io_processing
,ln
);
9670 listAddNodeTail(server
.io_processed
,j
);
9673 /* Signal the main thread there is new stuff to process */
9674 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
9676 return NULL
; /* never reached */
9679 static void spawnIOThread(void) {
9681 sigset_t mask
, omask
;
9685 sigaddset(&mask
,SIGCHLD
);
9686 sigaddset(&mask
,SIGHUP
);
9687 sigaddset(&mask
,SIGPIPE
);
9688 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
9689 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
9690 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
9694 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
9695 server
.io_active_threads
++;
9698 /* We need to wait for the last thread to exit before we are able to
9699 * fork() in order to BGSAVE or BGREWRITEAOF. */
9700 static void waitEmptyIOJobsQueue(void) {
9702 int io_processed_len
;
9705 if (listLength(server
.io_newjobs
) == 0 &&
9706 listLength(server
.io_processing
) == 0 &&
9707 server
.io_active_threads
== 0)
9712 /* While waiting for empty jobs queue condition we post-process some
9713 * finshed job, as I/O threads may be hanging trying to write against
9714 * the io_ready_pipe_write FD but there are so much pending jobs that
9716 io_processed_len
= listLength(server
.io_processed
);
9718 if (io_processed_len
) {
9719 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
9720 usleep(1000); /* 1 millisecond */
9722 usleep(10000); /* 10 milliseconds */
9727 static void vmReopenSwapFile(void) {
9728 /* Note: we don't close the old one as we are in the child process
9729 * and don't want to mess at all with the original file object. */
9730 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
9731 if (server
.vm_fp
== NULL
) {
9732 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
9733 server
.vm_swap_file
);
9736 server
.vm_fd
= fileno(server
.vm_fp
);
9739 /* This function must be called while with threaded IO locked */
9740 static void queueIOJob(iojob
*j
) {
9741 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
9742 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
9743 listAddNodeTail(server
.io_newjobs
,j
);
9744 if (server
.io_active_threads
< server
.vm_max_threads
)
9748 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
9751 assert(key
->storage
== REDIS_VM_MEMORY
);
9752 assert(key
->refcount
== 1);
9754 j
= zmalloc(sizeof(*j
));
9755 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
9761 j
->thread
= (pthread_t
) -1;
9762 key
->storage
= REDIS_VM_SWAPPING
;
9770 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
9772 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
9773 * If there is not already a job loading the key, it is craeted.
9774 * The key is added to the io_keys list in the client structure, and also
9775 * in the hash table mapping swapped keys to waiting clients, that is,
9776 * server.io_waited_keys. */
9777 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
9778 struct dictEntry
*de
;
9782 /* If the key does not exist or is already in RAM we don't need to
9783 * block the client at all. */
9784 de
= dictFind(c
->db
->dict
,key
);
9785 if (de
== NULL
) return 0;
9786 o
= dictGetEntryKey(de
);
9787 if (o
->storage
== REDIS_VM_MEMORY
) {
9789 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
9790 /* We were swapping the key, undo it! */
9791 vmCancelThreadedIOJob(o
);
9795 /* OK: the key is either swapped, or being loaded just now. */
9797 /* Add the key to the list of keys this client is waiting for.
9798 * This maps clients to keys they are waiting for. */
9799 listAddNodeTail(c
->io_keys
,key
);
9802 /* Add the client to the swapped keys => clients waiting map. */
9803 de
= dictFind(c
->db
->io_keys
,key
);
9807 /* For every key we take a list of clients blocked for it */
9809 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
9811 assert(retval
== DICT_OK
);
9813 l
= dictGetEntryVal(de
);
9815 listAddNodeTail(l
,c
);
9817 /* Are we already loading the key from disk? If not create a job */
9818 if (o
->storage
== REDIS_VM_SWAPPED
) {
9821 o
->storage
= REDIS_VM_LOADING
;
9822 j
= zmalloc(sizeof(*j
));
9823 j
->type
= REDIS_IOJOB_LOAD
;
9826 j
->key
->vtype
= o
->vtype
;
9827 j
->page
= o
->vm
.page
;
9830 j
->thread
= (pthread_t
) -1;
9838 /* Preload keys for any command with first, last and step values for
9839 * the command keys prototype, as defined in the command table. */
9840 static void waitForMultipleSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
9842 if (cmd
->vm_firstkey
== 0) return;
9843 last
= cmd
->vm_lastkey
;
9844 if (last
< 0) last
= argc
+last
;
9845 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
) {
9846 redisAssert(j
< argc
);
9847 waitForSwappedKey(c
,argv
[j
]);
9851 /* Preload keys needed for the ZUNIONSTORE and ZINTERSTORE commands.
9852 * Note that the number of keys to preload is user-defined, so we need to
9853 * apply a sanity check against argc. */
9854 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
9858 num
= atoi(argv
[2]->ptr
);
9859 if (num
> (argc
-3)) return;
9860 for (i
= 0; i
< num
; i
++) {
9861 waitForSwappedKey(c
,argv
[3+i
]);
9865 /* Preload keys needed to execute the entire MULTI/EXEC block.
9867 * This function is called by blockClientOnSwappedKeys when EXEC is issued,
9868 * and will block the client when any command requires a swapped out value. */
9869 static void execBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
9871 struct redisCommand
*mcmd
;
9874 REDIS_NOTUSED(argc
);
9875 REDIS_NOTUSED(argv
);
9877 if (!(c
->flags
& REDIS_MULTI
)) return;
9878 for (i
= 0; i
< c
->mstate
.count
; i
++) {
9879 mcmd
= c
->mstate
.commands
[i
].cmd
;
9880 margc
= c
->mstate
.commands
[i
].argc
;
9881 margv
= c
->mstate
.commands
[i
].argv
;
9883 if (mcmd
->vm_preload_proc
!= NULL
) {
9884 mcmd
->vm_preload_proc(c
,mcmd
,margc
,margv
);
9886 waitForMultipleSwappedKeys(c
,mcmd
,margc
,margv
);
9891 /* Is this client attempting to run a command against swapped keys?
9892 * If so, block it ASAP, load the keys in background, then resume it.
9894 * The important idea about this function is that it can fail! If keys will
9895 * still be swapped when the client is resumed, this key lookups will
9896 * just block loading keys from disk. In practical terms this should only
9897 * happen with SORT BY command or if there is a bug in this function.
9899 * Return 1 if the client is marked as blocked, 0 if the client can
9900 * continue as the keys it is going to access appear to be in memory. */
9901 static int blockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
) {
9902 if (cmd
->vm_preload_proc
!= NULL
) {
9903 cmd
->vm_preload_proc(c
,cmd
,c
->argc
,c
->argv
);
9905 waitForMultipleSwappedKeys(c
,cmd
,c
->argc
,c
->argv
);
9908 /* If the client was blocked for at least one key, mark it as blocked. */
9909 if (listLength(c
->io_keys
)) {
9910 c
->flags
|= REDIS_IO_WAIT
;
9911 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
9912 server
.vm_blocked_clients
++;
9919 /* Remove the 'key' from the list of blocked keys for a given client.
9921 * The function returns 1 when there are no longer blocking keys after
9922 * the current one was removed (and the client can be unblocked). */
9923 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
9927 struct dictEntry
*de
;
9929 /* Remove the key from the list of keys this client is waiting for. */
9930 listRewind(c
->io_keys
,&li
);
9931 while ((ln
= listNext(&li
)) != NULL
) {
9932 if (equalStringObjects(ln
->value
,key
)) {
9933 listDelNode(c
->io_keys
,ln
);
9939 /* Remove the client form the key => waiting clients map. */
9940 de
= dictFind(c
->db
->io_keys
,key
);
9942 l
= dictGetEntryVal(de
);
9943 ln
= listSearchKey(l
,c
);
9946 if (listLength(l
) == 0)
9947 dictDelete(c
->db
->io_keys
,key
);
9949 return listLength(c
->io_keys
) == 0;
9952 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
9953 struct dictEntry
*de
;
9958 de
= dictFind(db
->io_keys
,key
);
9961 l
= dictGetEntryVal(de
);
9962 len
= listLength(l
);
9963 /* Note: we can't use something like while(listLength(l)) as the list
9964 * can be freed by the calling function when we remove the last element. */
9967 redisClient
*c
= ln
->value
;
9969 if (dontWaitForSwappedKey(c
,key
)) {
9970 /* Put the client in the list of clients ready to go as we
9971 * loaded all the keys about it. */
9972 listAddNodeTail(server
.io_ready_clients
,c
);
9977 /* =========================== Remote Configuration ========================= */
9979 static void configSetCommand(redisClient
*c
) {
9980 robj
*o
= getDecodedObject(c
->argv
[3]);
9983 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
9984 zfree(server
.dbfilename
);
9985 server
.dbfilename
= zstrdup(o
->ptr
);
9986 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
9987 zfree(server
.requirepass
);
9988 server
.requirepass
= zstrdup(o
->ptr
);
9989 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
9990 zfree(server
.masterauth
);
9991 server
.masterauth
= zstrdup(o
->ptr
);
9992 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
9993 if (getLongLongFromObject(o
,&ll
) == REDIS_ERR
||
9994 ll
< 0) goto badfmt
;
9995 server
.maxmemory
= ll
;
9996 } else if (!strcasecmp(c
->argv
[2]->ptr
,"timeout")) {
9997 if (getLongLongFromObject(o
,&ll
) == REDIS_ERR
||
9998 ll
< 0 || ll
> LONG_MAX
) goto badfmt
;
9999 server
.maxidletime
= ll
;
10000 } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendfsync")) {
10001 if (!strcasecmp(o
->ptr
,"no")) {
10002 server
.appendfsync
= APPENDFSYNC_NO
;
10003 } else if (!strcasecmp(o
->ptr
,"everysec")) {
10004 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
10005 } else if (!strcasecmp(o
->ptr
,"always")) {
10006 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
10010 } else if (!strcasecmp(c
->argv
[2]->ptr
,"no-appendfsync-on-rewrite")) {
10011 int yn
= yesnotoi(o
->ptr
);
10013 if (yn
== -1) goto badfmt
;
10014 server
.no_appendfsync_on_rewrite
= yn
;
10015 } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendonly")) {
10016 int old
= server
.appendonly
;
10017 int new = yesnotoi(o
->ptr
);
10019 if (new == -1) goto badfmt
;
10024 if (startAppendOnly() == REDIS_ERR
) {
10025 addReplySds(c
,sdscatprintf(sdsempty(),
10026 "-ERR Unable to turn on AOF. Check server logs.\r\n"));
10032 } else if (!strcasecmp(c
->argv
[2]->ptr
,"save")) {
10034 sds
*v
= sdssplitlen(o
->ptr
,sdslen(o
->ptr
)," ",1,&vlen
);
10036 /* Perform sanity check before setting the new config:
10037 * - Even number of args
10038 * - Seconds >= 1, changes >= 0 */
10040 sdsfreesplitres(v
,vlen
);
10043 for (j
= 0; j
< vlen
; j
++) {
10047 val
= strtoll(v
[j
], &eptr
, 10);
10048 if (eptr
[0] != '\0' ||
10049 ((j
& 1) == 0 && val
< 1) ||
10050 ((j
& 1) == 1 && val
< 0)) {
10051 sdsfreesplitres(v
,vlen
);
10055 /* Finally set the new config */
10056 resetServerSaveParams();
10057 for (j
= 0; j
< vlen
; j
+= 2) {
10061 seconds
= strtoll(v
[j
],NULL
,10);
10062 changes
= strtoll(v
[j
+1],NULL
,10);
10063 appendServerSaveParams(seconds
, changes
);
10065 sdsfreesplitres(v
,vlen
);
10067 addReplySds(c
,sdscatprintf(sdsempty(),
10068 "-ERR not supported CONFIG parameter %s\r\n",
10069 (char*)c
->argv
[2]->ptr
));
10074 addReply(c
,shared
.ok
);
10077 badfmt
: /* Bad format errors */
10078 addReplySds(c
,sdscatprintf(sdsempty(),
10079 "-ERR invalid argument '%s' for CONFIG SET '%s'\r\n",
10081 (char*)c
->argv
[2]->ptr
));
10085 static void configGetCommand(redisClient
*c
) {
10086 robj
*o
= getDecodedObject(c
->argv
[2]);
10087 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
10088 char *pattern
= o
->ptr
;
10091 addReply(c
,lenobj
);
10092 decrRefCount(lenobj
);
10094 if (stringmatch(pattern
,"dbfilename",0)) {
10095 addReplyBulkCString(c
,"dbfilename");
10096 addReplyBulkCString(c
,server
.dbfilename
);
10099 if (stringmatch(pattern
,"requirepass",0)) {
10100 addReplyBulkCString(c
,"requirepass");
10101 addReplyBulkCString(c
,server
.requirepass
);
10104 if (stringmatch(pattern
,"masterauth",0)) {
10105 addReplyBulkCString(c
,"masterauth");
10106 addReplyBulkCString(c
,server
.masterauth
);
10109 if (stringmatch(pattern
,"maxmemory",0)) {
10112 ll2string(buf
,128,server
.maxmemory
);
10113 addReplyBulkCString(c
,"maxmemory");
10114 addReplyBulkCString(c
,buf
);
10117 if (stringmatch(pattern
,"timeout",0)) {
10120 ll2string(buf
,128,server
.maxidletime
);
10121 addReplyBulkCString(c
,"timeout");
10122 addReplyBulkCString(c
,buf
);
10125 if (stringmatch(pattern
,"appendonly",0)) {
10126 addReplyBulkCString(c
,"appendonly");
10127 addReplyBulkCString(c
,server
.appendonly
? "yes" : "no");
10130 if (stringmatch(pattern
,"no-appendfsync-on-rewrite",0)) {
10131 addReplyBulkCString(c
,"no-appendfsync-on-rewrite");
10132 addReplyBulkCString(c
,server
.no_appendfsync_on_rewrite
? "yes" : "no");
10135 if (stringmatch(pattern
,"appendfsync",0)) {
10138 switch(server
.appendfsync
) {
10139 case APPENDFSYNC_NO
: policy
= "no"; break;
10140 case APPENDFSYNC_EVERYSEC
: policy
= "everysec"; break;
10141 case APPENDFSYNC_ALWAYS
: policy
= "always"; break;
10142 default: policy
= "unknown"; break; /* too harmless to panic */
10144 addReplyBulkCString(c
,"appendfsync");
10145 addReplyBulkCString(c
,policy
);
10148 if (stringmatch(pattern
,"save",0)) {
10149 sds buf
= sdsempty();
10152 for (j
= 0; j
< server
.saveparamslen
; j
++) {
10153 buf
= sdscatprintf(buf
,"%ld %d",
10154 server
.saveparams
[j
].seconds
,
10155 server
.saveparams
[j
].changes
);
10156 if (j
!= server
.saveparamslen
-1)
10157 buf
= sdscatlen(buf
," ",1);
10159 addReplyBulkCString(c
,"save");
10160 addReplyBulkCString(c
,buf
);
10165 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
10168 static void configCommand(redisClient
*c
) {
10169 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
10170 if (c
->argc
!= 4) goto badarity
;
10171 configSetCommand(c
);
10172 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
10173 if (c
->argc
!= 3) goto badarity
;
10174 configGetCommand(c
);
10175 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
10176 if (c
->argc
!= 2) goto badarity
;
10177 server
.stat_numcommands
= 0;
10178 server
.stat_numconnections
= 0;
10179 server
.stat_expiredkeys
= 0;
10180 server
.stat_starttime
= time(NULL
);
10181 addReply(c
,shared
.ok
);
10183 addReplySds(c
,sdscatprintf(sdsempty(),
10184 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
10189 addReplySds(c
,sdscatprintf(sdsempty(),
10190 "-ERR Wrong number of arguments for CONFIG %s\r\n",
10191 (char*) c
->argv
[1]->ptr
));
10194 /* =========================== Pubsub implementation ======================== */
10196 static void freePubsubPattern(void *p
) {
10197 pubsubPattern
*pat
= p
;
10199 decrRefCount(pat
->pattern
);
10203 static int listMatchPubsubPattern(void *a
, void *b
) {
10204 pubsubPattern
*pa
= a
, *pb
= b
;
10206 return (pa
->client
== pb
->client
) &&
10207 (equalStringObjects(pa
->pattern
,pb
->pattern
));
10210 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
10211 * 0 if the client was already subscribed to that channel. */
10212 static int pubsubSubscribeChannel(redisClient
*c
, robj
*channel
) {
10213 struct dictEntry
*de
;
10214 list
*clients
= NULL
;
10217 /* Add the channel to the client -> channels hash table */
10218 if (dictAdd(c
->pubsub_channels
,channel
,NULL
) == DICT_OK
) {
10220 incrRefCount(channel
);
10221 /* Add the client to the channel -> list of clients hash table */
10222 de
= dictFind(server
.pubsub_channels
,channel
);
10224 clients
= listCreate();
10225 dictAdd(server
.pubsub_channels
,channel
,clients
);
10226 incrRefCount(channel
);
10228 clients
= dictGetEntryVal(de
);
10230 listAddNodeTail(clients
,c
);
10232 /* Notify the client */
10233 addReply(c
,shared
.mbulk3
);
10234 addReply(c
,shared
.subscribebulk
);
10235 addReplyBulk(c
,channel
);
10236 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
10240 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
10241 * 0 if the client was not subscribed to the specified channel. */
10242 static int pubsubUnsubscribeChannel(redisClient
*c
, robj
*channel
, int notify
) {
10243 struct dictEntry
*de
;
10248 /* Remove the channel from the client -> channels hash table */
10249 incrRefCount(channel
); /* channel may be just a pointer to the same object
10250 we have in the hash tables. Protect it... */
10251 if (dictDelete(c
->pubsub_channels
,channel
) == DICT_OK
) {
10253 /* Remove the client from the channel -> clients list hash table */
10254 de
= dictFind(server
.pubsub_channels
,channel
);
10255 assert(de
!= NULL
);
10256 clients
= dictGetEntryVal(de
);
10257 ln
= listSearchKey(clients
,c
);
10258 assert(ln
!= NULL
);
10259 listDelNode(clients
,ln
);
10260 if (listLength(clients
) == 0) {
10261 /* Free the list and associated hash entry at all if this was
10262 * the latest client, so that it will be possible to abuse
10263 * Redis PUBSUB creating millions of channels. */
10264 dictDelete(server
.pubsub_channels
,channel
);
10267 /* Notify the client */
10269 addReply(c
,shared
.mbulk3
);
10270 addReply(c
,shared
.unsubscribebulk
);
10271 addReplyBulk(c
,channel
);
10272 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+
10273 listLength(c
->pubsub_patterns
));
10276 decrRefCount(channel
); /* it is finally safe to release it */
10280 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */
10281 static int pubsubSubscribePattern(redisClient
*c
, robj
*pattern
) {
10284 if (listSearchKey(c
->pubsub_patterns
,pattern
) == NULL
) {
10286 pubsubPattern
*pat
;
10287 listAddNodeTail(c
->pubsub_patterns
,pattern
);
10288 incrRefCount(pattern
);
10289 pat
= zmalloc(sizeof(*pat
));
10290 pat
->pattern
= getDecodedObject(pattern
);
10292 listAddNodeTail(server
.pubsub_patterns
,pat
);
10294 /* Notify the client */
10295 addReply(c
,shared
.mbulk3
);
10296 addReply(c
,shared
.psubscribebulk
);
10297 addReplyBulk(c
,pattern
);
10298 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
10302 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
10303 * 0 if the client was not subscribed to the specified channel. */
10304 static int pubsubUnsubscribePattern(redisClient
*c
, robj
*pattern
, int notify
) {
10309 incrRefCount(pattern
); /* Protect the object. May be the same we remove */
10310 if ((ln
= listSearchKey(c
->pubsub_patterns
,pattern
)) != NULL
) {
10312 listDelNode(c
->pubsub_patterns
,ln
);
10314 pat
.pattern
= pattern
;
10315 ln
= listSearchKey(server
.pubsub_patterns
,&pat
);
10316 listDelNode(server
.pubsub_patterns
,ln
);
10318 /* Notify the client */
10320 addReply(c
,shared
.mbulk3
);
10321 addReply(c
,shared
.punsubscribebulk
);
10322 addReplyBulk(c
,pattern
);
10323 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+
10324 listLength(c
->pubsub_patterns
));
10326 decrRefCount(pattern
);
10330 /* Unsubscribe from all the channels. Return the number of channels the
10331 * client was subscribed from. */
10332 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
) {
10333 dictIterator
*di
= dictGetIterator(c
->pubsub_channels
);
10337 while((de
= dictNext(di
)) != NULL
) {
10338 robj
*channel
= dictGetEntryKey(de
);
10340 count
+= pubsubUnsubscribeChannel(c
,channel
,notify
);
10342 dictReleaseIterator(di
);
10346 /* Unsubscribe from all the patterns. Return the number of patterns the
10347 * client was subscribed from. */
10348 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
) {
10353 listRewind(c
->pubsub_patterns
,&li
);
10354 while ((ln
= listNext(&li
)) != NULL
) {
10355 robj
*pattern
= ln
->value
;
10357 count
+= pubsubUnsubscribePattern(c
,pattern
,notify
);
10362 /* Publish a message */
10363 static int pubsubPublishMessage(robj
*channel
, robj
*message
) {
10365 struct dictEntry
*de
;
10369 /* Send to clients listening for that channel */
10370 de
= dictFind(server
.pubsub_channels
,channel
);
10372 list
*list
= dictGetEntryVal(de
);
10376 listRewind(list
,&li
);
10377 while ((ln
= listNext(&li
)) != NULL
) {
10378 redisClient
*c
= ln
->value
;
10380 addReply(c
,shared
.mbulk3
);
10381 addReply(c
,shared
.messagebulk
);
10382 addReplyBulk(c
,channel
);
10383 addReplyBulk(c
,message
);
10387 /* Send to clients listening to matching channels */
10388 if (listLength(server
.pubsub_patterns
)) {
10389 listRewind(server
.pubsub_patterns
,&li
);
10390 channel
= getDecodedObject(channel
);
10391 while ((ln
= listNext(&li
)) != NULL
) {
10392 pubsubPattern
*pat
= ln
->value
;
10394 if (stringmatchlen((char*)pat
->pattern
->ptr
,
10395 sdslen(pat
->pattern
->ptr
),
10396 (char*)channel
->ptr
,
10397 sdslen(channel
->ptr
),0)) {
10398 addReply(pat
->client
,shared
.mbulk4
);
10399 addReply(pat
->client
,shared
.pmessagebulk
);
10400 addReplyBulk(pat
->client
,pat
->pattern
);
10401 addReplyBulk(pat
->client
,channel
);
10402 addReplyBulk(pat
->client
,message
);
10406 decrRefCount(channel
);
10411 static void subscribeCommand(redisClient
*c
) {
10414 for (j
= 1; j
< c
->argc
; j
++)
10415 pubsubSubscribeChannel(c
,c
->argv
[j
]);
10418 static void unsubscribeCommand(redisClient
*c
) {
10419 if (c
->argc
== 1) {
10420 pubsubUnsubscribeAllChannels(c
,1);
10425 for (j
= 1; j
< c
->argc
; j
++)
10426 pubsubUnsubscribeChannel(c
,c
->argv
[j
],1);
10430 static void psubscribeCommand(redisClient
*c
) {
10433 for (j
= 1; j
< c
->argc
; j
++)
10434 pubsubSubscribePattern(c
,c
->argv
[j
]);
10437 static void punsubscribeCommand(redisClient
*c
) {
10438 if (c
->argc
== 1) {
10439 pubsubUnsubscribeAllPatterns(c
,1);
10444 for (j
= 1; j
< c
->argc
; j
++)
10445 pubsubUnsubscribePattern(c
,c
->argv
[j
],1);
10449 static void publishCommand(redisClient
*c
) {
10450 int receivers
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]);
10451 addReplyLongLong(c
,receivers
);
10454 /* ===================== WATCH (CAS alike for MULTI/EXEC) ===================
10456 * The implementation uses a per-DB hash table mapping keys to list of clients
10457 * WATCHing those keys, so that given a key that is going to be modified
10458 * we can mark all the associated clients as dirty.
10460 * Also every client contains a list of WATCHed keys so that's possible to
10461 * un-watch such keys when the client is freed or when UNWATCH is called. */
10463 /* In the client->watched_keys list we need to use watchedKey structures
10464 * as in order to identify a key in Redis we need both the key name and the
10466 typedef struct watchedKey
{
10471 /* Watch for the specified key */
10472 static void watchForKey(redisClient
*c
, robj
*key
) {
10473 list
*clients
= NULL
;
10478 /* Check if we are already watching for this key */
10479 listRewind(c
->watched_keys
,&li
);
10480 while((ln
= listNext(&li
))) {
10481 wk
= listNodeValue(ln
);
10482 if (wk
->db
== c
->db
&& equalStringObjects(key
,wk
->key
))
10483 return; /* Key already watched */
10485 /* This key is not already watched in this DB. Let's add it */
10486 clients
= dictFetchValue(c
->db
->watched_keys
,key
);
10488 clients
= listCreate();
10489 dictAdd(c
->db
->watched_keys
,key
,clients
);
10492 listAddNodeTail(clients
,c
);
10493 /* Add the new key to the lits of keys watched by this client */
10494 wk
= zmalloc(sizeof(*wk
));
10498 listAddNodeTail(c
->watched_keys
,wk
);
10501 /* Unwatch all the keys watched by this client. To clean the EXEC dirty
10502 * flag is up to the caller. */
10503 static void unwatchAllKeys(redisClient
*c
) {
10507 if (listLength(c
->watched_keys
) == 0) return;
10508 listRewind(c
->watched_keys
,&li
);
10509 while((ln
= listNext(&li
))) {
10513 /* Lookup the watched key -> clients list and remove the client
10515 wk
= listNodeValue(ln
);
10516 clients
= dictFetchValue(wk
->db
->watched_keys
, wk
->key
);
10517 assert(clients
!= NULL
);
10518 listDelNode(clients
,listSearchKey(clients
,c
));
10519 /* Kill the entry at all if this was the only client */
10520 if (listLength(clients
) == 0)
10521 dictDelete(wk
->db
->watched_keys
, wk
->key
);
10522 /* Remove this watched key from the client->watched list */
10523 listDelNode(c
->watched_keys
,ln
);
10524 decrRefCount(wk
->key
);
10529 /* "Touch" a key, so that if this key is being WATCHed by some client the
10530 * next EXEC will fail. */
10531 static void touchWatchedKey(redisDb
*db
, robj
*key
) {
10536 if (dictSize(db
->watched_keys
) == 0) return;
10537 clients
= dictFetchValue(db
->watched_keys
, key
);
10538 if (!clients
) return;
10540 /* Mark all the clients watching this key as REDIS_DIRTY_CAS */
10541 /* Check if we are already watching for this key */
10542 listRewind(clients
,&li
);
10543 while((ln
= listNext(&li
))) {
10544 redisClient
*c
= listNodeValue(ln
);
10546 c
->flags
|= REDIS_DIRTY_CAS
;
10550 /* On FLUSHDB or FLUSHALL all the watched keys that are present before the
10551 * flush but will be deleted as effect of the flushing operation should
10552 * be touched. "dbid" is the DB that's getting the flush. -1 if it is
10553 * a FLUSHALL operation (all the DBs flushed). */
10554 static void touchWatchedKeysOnFlush(int dbid
) {
10558 /* For every client, check all the waited keys */
10559 listRewind(server
.clients
,&li1
);
10560 while((ln
= listNext(&li1
))) {
10561 redisClient
*c
= listNodeValue(ln
);
10562 listRewind(c
->watched_keys
,&li2
);
10563 while((ln
= listNext(&li2
))) {
10564 watchedKey
*wk
= listNodeValue(ln
);
10566 /* For every watched key matching the specified DB, if the
10567 * key exists, mark the client as dirty, as the key will be
10569 if (dbid
== -1 || wk
->db
->id
== dbid
) {
10570 if (dictFind(wk
->db
->dict
, wk
->key
) != NULL
)
10571 c
->flags
|= REDIS_DIRTY_CAS
;
10577 static void watchCommand(redisClient
*c
) {
10580 if (c
->flags
& REDIS_MULTI
) {
10581 addReplySds(c
,sdsnew("-ERR WATCH inside MULTI is not allowed\r\n"));
10584 for (j
= 1; j
< c
->argc
; j
++)
10585 watchForKey(c
,c
->argv
[j
]);
10586 addReply(c
,shared
.ok
);
10589 static void unwatchCommand(redisClient
*c
) {
10591 c
->flags
&= (~REDIS_DIRTY_CAS
);
10592 addReply(c
,shared
.ok
);
10595 /* ================================= Debugging ============================== */
10597 /* Compute the sha1 of string at 's' with 'len' bytes long.
10598 * The SHA1 is then xored againt the string pointed by digest.
10599 * Since xor is commutative, this operation is used in order to
10600 * "add" digests relative to unordered elements.
10602 * So digest(a,b,c,d) will be the same of digest(b,a,c,d) */
10603 static void xorDigest(unsigned char *digest
, void *ptr
, size_t len
) {
10605 unsigned char hash
[20], *s
= ptr
;
10609 SHA1Update(&ctx
,s
,len
);
10610 SHA1Final(hash
,&ctx
);
10612 for (j
= 0; j
< 20; j
++)
10613 digest
[j
] ^= hash
[j
];
10616 static void xorObjectDigest(unsigned char *digest
, robj
*o
) {
10617 o
= getDecodedObject(o
);
10618 xorDigest(digest
,o
->ptr
,sdslen(o
->ptr
));
10622 /* This function instead of just computing the SHA1 and xoring it
10623 * against diget, also perform the digest of "digest" itself and
10624 * replace the old value with the new one.
10626 * So the final digest will be:
10628 * digest = SHA1(digest xor SHA1(data))
10630 * This function is used every time we want to preserve the order so
10631 * that digest(a,b,c,d) will be different than digest(b,c,d,a)
10633 * Also note that mixdigest("foo") followed by mixdigest("bar")
10634 * will lead to a different digest compared to "fo", "obar".
10636 static void mixDigest(unsigned char *digest
, void *ptr
, size_t len
) {
10640 xorDigest(digest
,s
,len
);
10642 SHA1Update(&ctx
,digest
,20);
10643 SHA1Final(digest
,&ctx
);
10646 static void mixObjectDigest(unsigned char *digest
, robj
*o
) {
10647 o
= getDecodedObject(o
);
10648 mixDigest(digest
,o
->ptr
,sdslen(o
->ptr
));
10652 /* Compute the dataset digest. Since keys, sets elements, hashes elements
10653 * are not ordered, we use a trick: every aggregate digest is the xor
10654 * of the digests of their elements. This way the order will not change
10655 * the result. For list instead we use a feedback entering the output digest
10656 * as input in order to ensure that a different ordered list will result in
10657 * a different digest. */
10658 static void computeDatasetDigest(unsigned char *final
) {
10659 unsigned char digest
[20];
10661 dictIterator
*di
= NULL
;
10666 memset(final
,0,20); /* Start with a clean result */
10668 for (j
= 0; j
< server
.dbnum
; j
++) {
10669 redisDb
*db
= server
.db
+j
;
10671 if (dictSize(db
->dict
) == 0) continue;
10672 di
= dictGetIterator(db
->dict
);
10674 /* hash the DB id, so the same dataset moved in a different
10675 * DB will lead to a different digest */
10677 mixDigest(final
,&aux
,sizeof(aux
));
10679 /* Iterate this DB writing every entry */
10680 while((de
= dictNext(di
)) != NULL
) {
10681 robj
*key
, *o
, *kcopy
;
10684 memset(digest
,0,20); /* This key-val digest */
10685 key
= dictGetEntryKey(de
);
10687 if (!server
.vm_enabled
) {
10688 mixObjectDigest(digest
,key
);
10689 o
= dictGetEntryVal(de
);
10691 /* Don't work with the key directly as when VM is active
10692 * this is unsafe: TODO: fix decrRefCount to check if the
10693 * count really reached 0 to avoid this mess */
10694 kcopy
= dupStringObject(key
);
10695 mixObjectDigest(digest
,kcopy
);
10696 o
= lookupKeyRead(db
,kcopy
);
10697 decrRefCount(kcopy
);
10699 aux
= htonl(o
->type
);
10700 mixDigest(digest
,&aux
,sizeof(aux
));
10701 expiretime
= getExpire(db
,key
);
10703 /* Save the key and associated value */
10704 if (o
->type
== REDIS_STRING
) {
10705 mixObjectDigest(digest
,o
);
10706 } else if (o
->type
== REDIS_LIST
) {
10707 list
*list
= o
->ptr
;
10711 listRewind(list
,&li
);
10712 while((ln
= listNext(&li
))) {
10713 robj
*eleobj
= listNodeValue(ln
);
10715 mixObjectDigest(digest
,eleobj
);
10717 } else if (o
->type
== REDIS_SET
) {
10718 dict
*set
= o
->ptr
;
10719 dictIterator
*di
= dictGetIterator(set
);
10722 while((de
= dictNext(di
)) != NULL
) {
10723 robj
*eleobj
= dictGetEntryKey(de
);
10725 xorObjectDigest(digest
,eleobj
);
10727 dictReleaseIterator(di
);
10728 } else if (o
->type
== REDIS_ZSET
) {
10730 dictIterator
*di
= dictGetIterator(zs
->dict
);
10733 while((de
= dictNext(di
)) != NULL
) {
10734 robj
*eleobj
= dictGetEntryKey(de
);
10735 double *score
= dictGetEntryVal(de
);
10736 unsigned char eledigest
[20];
10738 snprintf(buf
,sizeof(buf
),"%.17g",*score
);
10739 memset(eledigest
,0,20);
10740 mixObjectDigest(eledigest
,eleobj
);
10741 mixDigest(eledigest
,buf
,strlen(buf
));
10742 xorDigest(digest
,eledigest
,20);
10744 dictReleaseIterator(di
);
10745 } else if (o
->type
== REDIS_HASH
) {
10749 hi
= hashInitIterator(o
);
10750 while (hashNext(hi
) != REDIS_ERR
) {
10751 unsigned char eledigest
[20];
10753 memset(eledigest
,0,20);
10754 obj
= hashCurrent(hi
,REDIS_HASH_KEY
);
10755 mixObjectDigest(eledigest
,obj
);
10757 obj
= hashCurrent(hi
,REDIS_HASH_VALUE
);
10758 mixObjectDigest(eledigest
,obj
);
10760 xorDigest(digest
,eledigest
,20);
10762 hashReleaseIterator(hi
);
10764 redisPanic("Unknown object type");
10766 /* If the key has an expire, add it to the mix */
10767 if (expiretime
!= -1) xorDigest(digest
,"!!expire!!",10);
10768 /* We can finally xor the key-val digest to the final digest */
10769 xorDigest(final
,digest
,20);
10771 dictReleaseIterator(di
);
10775 static void debugCommand(redisClient
*c
) {
10776 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
10777 *((char*)-1) = 'x';
10778 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
10779 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
10780 addReply(c
,shared
.err
);
10784 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
10785 addReply(c
,shared
.err
);
10788 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
10789 addReply(c
,shared
.ok
);
10790 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
10792 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
10793 addReply(c
,shared
.err
);
10796 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
10797 addReply(c
,shared
.ok
);
10798 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
10799 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
10803 addReply(c
,shared
.nokeyerr
);
10806 key
= dictGetEntryKey(de
);
10807 val
= dictGetEntryVal(de
);
10808 if (!server
.vm_enabled
|| (key
->storage
== REDIS_VM_MEMORY
||
10809 key
->storage
== REDIS_VM_SWAPPING
)) {
10813 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
10814 strenc
= strencoding
[val
->encoding
];
10816 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
10819 addReplySds(c
,sdscatprintf(sdsempty(),
10820 "+Key at:%p refcount:%d, value at:%p refcount:%d "
10821 "encoding:%s serializedlength:%lld\r\n",
10822 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
,
10823 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
10825 addReplySds(c
,sdscatprintf(sdsempty(),
10826 "+Key at:%p refcount:%d, value swapped at: page %llu "
10827 "using %llu pages\r\n",
10828 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
,
10829 (unsigned long long) key
->vm
.usedpages
));
10831 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapin") && c
->argc
== 3) {
10832 lookupKeyRead(c
->db
,c
->argv
[2]);
10833 addReply(c
,shared
.ok
);
10834 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
10835 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
10838 if (!server
.vm_enabled
) {
10839 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
10843 addReply(c
,shared
.nokeyerr
);
10846 key
= dictGetEntryKey(de
);
10847 val
= dictGetEntryVal(de
);
10848 /* If the key is shared we want to create a copy */
10849 if (key
->refcount
> 1) {
10850 robj
*newkey
= dupStringObject(key
);
10852 key
= dictGetEntryKey(de
) = newkey
;
10855 if (key
->storage
!= REDIS_VM_MEMORY
) {
10856 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
10857 } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
10858 dictGetEntryVal(de
) = NULL
;
10859 addReply(c
,shared
.ok
);
10861 addReply(c
,shared
.err
);
10863 } else if (!strcasecmp(c
->argv
[1]->ptr
,"populate") && c
->argc
== 3) {
10868 if (getLongFromObjectOrReply(c
, c
->argv
[2], &keys
, NULL
) != REDIS_OK
)
10870 for (j
= 0; j
< keys
; j
++) {
10871 snprintf(buf
,sizeof(buf
),"key:%lu",j
);
10872 key
= createStringObject(buf
,strlen(buf
));
10873 if (lookupKeyRead(c
->db
,key
) != NULL
) {
10877 snprintf(buf
,sizeof(buf
),"value:%lu",j
);
10878 val
= createStringObject(buf
,strlen(buf
));
10879 dictAdd(c
->db
->dict
,key
,val
);
10881 addReply(c
,shared
.ok
);
10882 } else if (!strcasecmp(c
->argv
[1]->ptr
,"digest") && c
->argc
== 2) {
10883 unsigned char digest
[20];
10884 sds d
= sdsnew("+");
10887 computeDatasetDigest(digest
);
10888 for (j
= 0; j
< 20; j
++)
10889 d
= sdscatprintf(d
, "%02x",digest
[j
]);
10891 d
= sdscatlen(d
,"\r\n",2);
10894 addReplySds(c
,sdsnew(
10895 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n"));
10899 static void _redisAssert(char *estr
, char *file
, int line
) {
10900 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
10901 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true",file
,line
,estr
);
10902 #ifdef HAVE_BACKTRACE
10903 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
10904 *((char*)-1) = 'x';
10908 static void _redisPanic(char *msg
, char *file
, int line
) {
10909 redisLog(REDIS_WARNING
,"!!! Software Failure. Press left mouse button to continue");
10910 redisLog(REDIS_WARNING
,"Guru Meditation: %s #%s:%d",msg
,file
,line
);
10911 #ifdef HAVE_BACKTRACE
10912 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
10913 *((char*)-1) = 'x';
10917 /* =================================== Main! ================================ */
10920 int linuxOvercommitMemoryValue(void) {
10921 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
10924 if (!fp
) return -1;
10925 if (fgets(buf
,64,fp
) == NULL
) {
10934 void linuxOvercommitMemoryWarning(void) {
10935 if (linuxOvercommitMemoryValue() == 0) {
10936 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
10939 #endif /* __linux__ */
10941 static void daemonize(void) {
10945 if (fork() != 0) exit(0); /* parent exits */
10946 setsid(); /* create a new session */
10948 /* Every output goes to /dev/null. If Redis is daemonized but
10949 * the 'logfile' is set to 'stdout' in the configuration file
10950 * it will not log at all. */
10951 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
10952 dup2(fd
, STDIN_FILENO
);
10953 dup2(fd
, STDOUT_FILENO
);
10954 dup2(fd
, STDERR_FILENO
);
10955 if (fd
> STDERR_FILENO
) close(fd
);
10957 /* Try to write the pid file */
10958 fp
= fopen(server
.pidfile
,"w");
10960 fprintf(fp
,"%d\n",getpid());
10965 static void version() {
10966 printf("Redis server version %s (%s:%d)\n", REDIS_VERSION
,
10967 REDIS_GIT_SHA1
, atoi(REDIS_GIT_DIRTY
) > 0);
10971 static void usage() {
10972 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
10973 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
10977 int main(int argc
, char **argv
) {
10980 initServerConfig();
10981 sortCommandTable();
10983 if (strcmp(argv
[1], "-v") == 0 ||
10984 strcmp(argv
[1], "--version") == 0) version();
10985 if (strcmp(argv
[1], "--help") == 0) usage();
10986 resetServerSaveParams();
10987 loadServerConfig(argv
[1]);
10988 } else if ((argc
> 2)) {
10991 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
10993 if (server
.daemonize
) daemonize();
10995 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
10997 linuxOvercommitMemoryWarning();
10999 start
= time(NULL
);
11000 if (server
.appendonly
) {
11001 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
11002 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
11004 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
11005 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
11007 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
11008 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
11010 aeDeleteEventLoop(server
.el
);
11014 /* ============================= Backtrace support ========================= */
11016 #ifdef HAVE_BACKTRACE
11017 static char *findFuncName(void *pointer
, unsigned long *offset
);
11019 static void *getMcontextEip(ucontext_t
*uc
) {
11020 #if defined(__FreeBSD__)
11021 return (void*) uc
->uc_mcontext
.mc_eip
;
11022 #elif defined(__dietlibc__)
11023 return (void*) uc
->uc_mcontext
.eip
;
11024 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
11026 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
11028 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
11030 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
11031 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
11032 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
11034 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
11036 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
11037 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
11038 #elif defined(__ia64__) /* Linux IA64 */
11039 return (void*) uc
->uc_mcontext
.sc_ip
;
11045 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
11047 char **messages
= NULL
;
11048 int i
, trace_size
= 0;
11049 unsigned long offset
=0;
11050 ucontext_t
*uc
= (ucontext_t
*) secret
;
11052 REDIS_NOTUSED(info
);
11054 redisLog(REDIS_WARNING
,
11055 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
11056 infostring
= genRedisInfoString();
11057 redisLog(REDIS_WARNING
, "%s",infostring
);
11058 /* It's not safe to sdsfree() the returned string under memory
11059 * corruption conditions. Let it leak as we are going to abort */
11061 trace_size
= backtrace(trace
, 100);
11062 /* overwrite sigaction with caller's address */
11063 if (getMcontextEip(uc
) != NULL
) {
11064 trace
[1] = getMcontextEip(uc
);
11066 messages
= backtrace_symbols(trace
, trace_size
);
11068 for (i
=1; i
<trace_size
; ++i
) {
11069 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
11071 p
= strchr(messages
[i
],'+');
11072 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
11073 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
11075 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
11078 /* free(messages); Don't call free() with possibly corrupted memory. */
11082 static void sigtermHandler(int sig
) {
11083 REDIS_NOTUSED(sig
);
11085 redisLog(REDIS_WARNING
,"SIGTERM received, scheduling shutting down...");
11086 server
.shutdown_asap
= 1;
11089 static void setupSigSegvAction(void) {
11090 struct sigaction act
;
11092 sigemptyset (&act
.sa_mask
);
11093 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
11094 * is used. Otherwise, sa_handler is used */
11095 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
11096 act
.sa_sigaction
= segvHandler
;
11097 sigaction (SIGSEGV
, &act
, NULL
);
11098 sigaction (SIGBUS
, &act
, NULL
);
11099 sigaction (SIGFPE
, &act
, NULL
);
11100 sigaction (SIGILL
, &act
, NULL
);
11101 sigaction (SIGBUS
, &act
, NULL
);
11103 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
;
11104 act
.sa_handler
= sigtermHandler
;
11105 sigaction (SIGTERM
, &act
, NULL
);
11109 #include "staticsymbols.h"
11110 /* This function try to convert a pointer into a function name. It's used in
11111 * oreder to provide a backtrace under segmentation fault that's able to
11112 * display functions declared as static (otherwise the backtrace is useless). */
11113 static char *findFuncName(void *pointer
, unsigned long *offset
){
11115 unsigned long off
, minoff
= 0;
11117 /* Try to match against the Symbol with the smallest offset */
11118 for (i
=0; symsTable
[i
].pointer
; i
++) {
11119 unsigned long lp
= (unsigned long) pointer
;
11121 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
11122 off
=lp
-symsTable
[i
].pointer
;
11123 if (ret
< 0 || off
< minoff
) {
11129 if (ret
== -1) return NULL
;
11131 return symsTable
[ret
].name
;
11133 #else /* HAVE_BACKTRACE */
11134 static void setupSigSegvAction(void) {
11136 #endif /* HAVE_BACKTRACE */