2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "2.1.1"
45 #endif /* HAVE_BACKTRACE */
53 #include <arpa/inet.h>
57 #include <sys/resource.h>
65 #include "solarisfixes.h"
69 #include "ae.h" /* Event driven programming library */
70 #include "sds.h" /* Dynamic safe strings */
71 #include "anet.h" /* Networking the easy way */
72 #include "dict.h" /* Hash tables */
73 #include "adlist.h" /* Linked lists */
74 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
75 #include "lzf.h" /* LZF compression library */
76 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
77 #include "zipmap.h" /* Compact dictionary-alike data structure */
78 #include "sha1.h" /* SHA1 is used for DEBUG DIGEST */
79 #include "release.h" /* Release and/or git repository information */
85 /* Static server configuration */
86 #define REDIS_SERVERPORT 6379 /* TCP port */
87 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
88 #define REDIS_IOBUF_LEN 1024
89 #define REDIS_LOADBUF_LEN 1024
90 #define REDIS_STATIC_ARGS 8
91 #define REDIS_DEFAULT_DBNUM 16
92 #define REDIS_CONFIGLINE_MAX 1024
93 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
94 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
95 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* lookup 10 expires per loop */
96 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
97 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
99 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
100 #define REDIS_WRITEV_THRESHOLD 3
101 /* Max number of iovecs used for each writev call */
102 #define REDIS_WRITEV_IOVEC_COUNT 256
104 /* Hash table parameters */
105 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
108 #define REDIS_CMD_BULK 1 /* Bulk write command */
109 #define REDIS_CMD_INLINE 2 /* Inline command */
110 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
111 this flags will return an error when the 'maxmemory' option is set in the
112 config file and the server is using more than maxmemory bytes of memory.
113 In short this commands are denied on low memory conditions. */
114 #define REDIS_CMD_DENYOOM 4
115 #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */
118 #define REDIS_STRING 0
124 /* Objects encoding. Some kind of objects like Strings and Hashes can be
125 * internally represented in multiple ways. The 'encoding' field of the object
126 * is set to one of this fields for this object. */
127 #define REDIS_ENCODING_RAW 0 /* Raw representation */
128 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
129 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
130 #define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
132 static char* strencoding
[] = {
133 "raw", "int", "zipmap", "hashtable"
136 /* Object types only used for dumping to disk */
137 #define REDIS_EXPIRETIME 253
138 #define REDIS_SELECTDB 254
139 #define REDIS_EOF 255
141 /* Defines related to the dump file format. To store 32 bits lengths for short
142 * keys requires a lot of space, so we check the most significant 2 bits of
143 * the first byte to interpreter the length:
145 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
146 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
147 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
148 * 11|000000 this means: specially encoded object will follow. The six bits
149 * number specify the kind of object that follows.
150 * See the REDIS_RDB_ENC_* defines.
152 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
153 * values, will fit inside. */
154 #define REDIS_RDB_6BITLEN 0
155 #define REDIS_RDB_14BITLEN 1
156 #define REDIS_RDB_32BITLEN 2
157 #define REDIS_RDB_ENCVAL 3
158 #define REDIS_RDB_LENERR UINT_MAX
160 /* When a length of a string object stored on disk has the first two bits
161 * set, the remaining two bits specify a special encoding for the object
162 * accordingly to the following defines: */
163 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
164 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
165 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
166 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
168 /* Virtual memory object->where field. */
169 #define REDIS_VM_MEMORY 0 /* The object is on memory */
170 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
171 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
172 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
174 /* Virtual memory static configuration stuff.
175 * Check vmFindContiguousPages() to know more about this magic numbers. */
176 #define REDIS_VM_MAX_NEAR_PAGES 65536
177 #define REDIS_VM_MAX_RANDOM_JUMP 4096
178 #define REDIS_VM_MAX_THREADS 32
179 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
180 /* The following is the *percentage* of completed I/O jobs to process when the
181 * handelr is called. While Virtual Memory I/O operations are performed by
182 * threads, this operations must be processed by the main thread when completed
183 * in order to take effect. */
184 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
187 #define REDIS_SLAVE 1 /* This client is a slave server */
188 #define REDIS_MASTER 2 /* This client is a master server */
189 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
190 #define REDIS_MULTI 8 /* This client is in a MULTI context */
191 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
192 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
193 #define REDIS_DIRTY_CAS 64 /* Watched keys modified. EXEC will fail. */
195 /* Slave replication state - slave side */
196 #define REDIS_REPL_NONE 0 /* No active replication */
197 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
198 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
200 /* Slave replication state - from the point of view of master
201 * Note that in SEND_BULK and ONLINE state the slave receives new updates
202 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
203 * to start the next background saving in order to send updates to it. */
204 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
205 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
206 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
207 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
209 /* List related stuff */
213 /* Sort operations */
214 #define REDIS_SORT_GET 0
215 #define REDIS_SORT_ASC 1
216 #define REDIS_SORT_DESC 2
217 #define REDIS_SORTKEY_MAX 1024
220 #define REDIS_DEBUG 0
221 #define REDIS_VERBOSE 1
222 #define REDIS_NOTICE 2
223 #define REDIS_WARNING 3
225 /* Anti-warning macro... */
226 #define REDIS_NOTUSED(V) ((void) V)
228 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
229 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
231 /* Append only defines */
232 #define APPENDFSYNC_NO 0
233 #define APPENDFSYNC_ALWAYS 1
234 #define APPENDFSYNC_EVERYSEC 2
236 /* Hashes related defaults */
237 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
238 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
240 /* We can print the stacktrace, so our assert is defined this way: */
241 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
242 #define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1)
243 static void _redisAssert(char *estr
, char *file
, int line
);
244 static void _redisPanic(char *msg
, char *file
, int line
);
246 /*================================= Data types ============================== */
248 /* A redis object, that is a type able to hold a string / list / set */
250 /* The VM object structure */
251 struct redisObjectVM
{
252 off_t page
; /* the page at witch the object is stored on disk */
253 off_t usedpages
; /* number of pages used on disk */
254 time_t atime
; /* Last access time */
257 /* The actual Redis Object */
258 typedef struct redisObject
{
261 unsigned char encoding
;
262 unsigned char storage
; /* If this object is a key, where is the value?
263 * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
264 unsigned char vtype
; /* If this object is a key, and value is swapped out,
265 * this is the type of the swapped out object. */
267 /* VM fields, this are only allocated if VM is active, otherwise the
268 * object allocation function will just allocate
269 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
270 * Redis without VM active will not have any overhead. */
271 struct redisObjectVM vm
;
274 /* Macro used to initalize a Redis object allocated on the stack.
275 * Note that this macro is taken near the structure definition to make sure
276 * we'll update it when the structure is changed, to avoid bugs like
277 * bug #85 introduced exactly in this way. */
278 #define initStaticStringObject(_var,_ptr) do { \
280 _var.type = REDIS_STRING; \
281 _var.encoding = REDIS_ENCODING_RAW; \
283 if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \
286 typedef struct redisDb
{
287 dict
*dict
; /* The keyspace for this DB */
288 dict
*expires
; /* Timeout of keys with a timeout set */
289 dict
*blocking_keys
; /* Keys with clients waiting for data (BLPOP) */
290 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
291 dict
*watched_keys
; /* WATCHED keys for MULTI/EXEC CAS */
295 /* Client MULTI/EXEC state */
296 typedef struct multiCmd
{
299 struct redisCommand
*cmd
;
302 typedef struct multiState
{
303 multiCmd
*commands
; /* Array of MULTI commands */
304 int count
; /* Total number of MULTI commands */
307 /* With multiplexing we need to take per-clinet state.
308 * Clients are taken in a liked list. */
309 typedef struct redisClient
{
314 robj
**argv
, **mbargv
;
316 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
317 int multibulk
; /* multi bulk command format active */
320 time_t lastinteraction
; /* time of the last interaction, used for timeout */
321 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
322 int slaveseldb
; /* slave selected db, if this client is a slave */
323 int authenticated
; /* when requirepass is non-NULL */
324 int replstate
; /* replication state if this is a slave */
325 int repldbfd
; /* replication DB file descriptor */
326 long repldboff
; /* replication DB file offset */
327 off_t repldbsize
; /* replication DB file size */
328 multiState mstate
; /* MULTI/EXEC state */
329 robj
**blocking_keys
; /* The key we are waiting to terminate a blocking
330 * operation such as BLPOP. Otherwise NULL. */
331 int blocking_keys_num
; /* Number of blocking keys */
332 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
333 * is >= blockingto then the operation timed out. */
334 list
*io_keys
; /* Keys this client is waiting to be loaded from the
335 * swap file in order to continue. */
336 list
*watched_keys
; /* Keys WATCHED for MULTI/EXEC CAS */
337 dict
*pubsub_channels
; /* channels a client is interested in (SUBSCRIBE) */
338 list
*pubsub_patterns
; /* patterns a client is interested in (SUBSCRIBE) */
346 /* Global server state structure */
351 long long dirty
; /* changes to DB from the last save */
353 list
*slaves
, *monitors
;
354 char neterr
[ANET_ERR_LEN
];
356 int cronloops
; /* number of times the cron function run */
357 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
358 time_t lastsave
; /* Unix time of last save succeeede */
359 /* Fields used only for stats */
360 time_t stat_starttime
; /* server start time */
361 long long stat_numcommands
; /* number of processed commands */
362 long long stat_numconnections
; /* number of connections received */
363 long long stat_expiredkeys
; /* number of expired keys */
372 int no_appendfsync_on_rewrite
;
378 pid_t bgsavechildpid
;
379 pid_t bgrewritechildpid
;
380 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
381 sds aofbuf
; /* AOF buffer, written before entering the event loop */
382 struct saveparam
*saveparams
;
387 char *appendfilename
;
391 /* Replication related */
396 redisClient
*master
; /* client that is master for this slave */
398 unsigned int maxclients
;
399 unsigned long long maxmemory
;
400 unsigned int blpop_blocked_clients
;
401 unsigned int vm_blocked_clients
;
402 /* Sort parameters - qsort_r() is only available under BSD so we
403 * have to take this state global, in order to pass it to sortCompare() */
407 /* Virtual memory configuration */
412 unsigned long long vm_max_memory
;
414 size_t hash_max_zipmap_entries
;
415 size_t hash_max_zipmap_value
;
416 /* Virtual memory state */
419 off_t vm_next_page
; /* Next probably empty page */
420 off_t vm_near_pages
; /* Number of pages allocated sequentially */
421 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
422 time_t unixtime
; /* Unix time sampled every second. */
423 /* Virtual memory I/O threads stuff */
424 /* An I/O thread process an element taken from the io_jobs queue and
425 * put the result of the operation in the io_done list. While the
426 * job is being processed, it's put on io_processing queue. */
427 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
428 list
*io_processing
; /* List of VM I/O jobs being processed */
429 list
*io_processed
; /* List of VM I/O jobs already processed */
430 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
431 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
432 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
433 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
434 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
435 int io_active_threads
; /* Number of running I/O threads */
436 int vm_max_threads
; /* Max number of I/O threads running at the same time */
437 /* Our main thread is blocked on the event loop, locking for sockets ready
438 * to be read or written, so when a threaded I/O operation is ready to be
439 * processed by the main thread, the I/O thread will use a unix pipe to
440 * awake the main thread. The followings are the two pipe FDs. */
441 int io_ready_pipe_read
;
442 int io_ready_pipe_write
;
443 /* Virtual memory stats */
444 unsigned long long vm_stats_used_pages
;
445 unsigned long long vm_stats_swapped_objects
;
446 unsigned long long vm_stats_swapouts
;
447 unsigned long long vm_stats_swapins
;
449 dict
*pubsub_channels
; /* Map channels to list of subscribed clients */
450 list
*pubsub_patterns
; /* A list of pubsub_patterns */
455 typedef struct pubsubPattern
{
460 typedef void redisCommandProc(redisClient
*c
);
461 typedef void redisVmPreloadProc(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
462 struct redisCommand
{
464 redisCommandProc
*proc
;
467 /* Use a function to determine which keys need to be loaded
468 * in the background prior to executing this command. Takes precedence
469 * over vm_firstkey and others, ignored when NULL */
470 redisVmPreloadProc
*vm_preload_proc
;
471 /* What keys should be loaded in background when calling this command? */
472 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
473 int vm_lastkey
; /* THe last argument that's a key */
474 int vm_keystep
; /* The step between first and last key */
477 struct redisFunctionSym
{
479 unsigned long pointer
;
482 typedef struct _redisSortObject
{
490 typedef struct _redisSortOperation
{
493 } redisSortOperation
;
495 /* ZSETs use a specialized version of Skiplists */
497 typedef struct zskiplistNode
{
498 struct zskiplistNode
**forward
;
499 struct zskiplistNode
*backward
;
505 typedef struct zskiplist
{
506 struct zskiplistNode
*header
, *tail
;
507 unsigned long length
;
511 typedef struct zset
{
516 /* Our shared "common" objects */
518 #define REDIS_SHARED_INTEGERS 10000
519 struct sharedObjectsStruct
{
520 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
521 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
522 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
523 *outofrangeerr
, *plus
,
524 *select0
, *select1
, *select2
, *select3
, *select4
,
525 *select5
, *select6
, *select7
, *select8
, *select9
,
526 *messagebulk
, *pmessagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
,
527 *mbulk4
, *psubscribebulk
, *punsubscribebulk
,
528 *integers
[REDIS_SHARED_INTEGERS
];
531 /* Global vars that are actally used as constants. The following double
532 * values are used for double on-disk serialization, and are initialized
533 * at runtime to avoid strange compiler optimizations. */
535 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
537 /* VM threaded I/O request message */
538 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
539 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
540 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
541 typedef struct iojob
{
542 int type
; /* Request type, REDIS_IOJOB_* */
543 redisDb
*db
;/* Redis database */
544 robj
*key
; /* This I/O request is about swapping this key */
545 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
546 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
547 off_t page
; /* Swap page where to read/write the object */
548 off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */
549 int canceled
; /* True if this command was canceled by blocking side of VM */
550 pthread_t thread
; /* ID of the thread processing this entry */
553 /*================================ Prototypes =============================== */
555 static void freeStringObject(robj
*o
);
556 static void freeListObject(robj
*o
);
557 static void freeSetObject(robj
*o
);
558 static void decrRefCount(void *o
);
559 static robj
*createObject(int type
, void *ptr
);
560 static void freeClient(redisClient
*c
);
561 static int rdbLoad(char *filename
);
562 static void addReply(redisClient
*c
, robj
*obj
);
563 static void addReplySds(redisClient
*c
, sds s
);
564 static void incrRefCount(robj
*o
);
565 static int rdbSaveBackground(char *filename
);
566 static robj
*createStringObject(char *ptr
, size_t len
);
567 static robj
*dupStringObject(robj
*o
);
568 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
);
569 static void replicationFeedMonitors(list
*monitors
, int dictid
, robj
**argv
, int argc
);
570 static void flushAppendOnlyFile(void);
571 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
572 static int syncWithMaster(void);
573 static robj
*tryObjectEncoding(robj
*o
);
574 static robj
*getDecodedObject(robj
*o
);
575 static int removeExpire(redisDb
*db
, robj
*key
);
576 static int expireIfNeeded(redisDb
*db
, robj
*key
);
577 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
578 static int deleteIfSwapped(redisDb
*db
, robj
*key
);
579 static int deleteKey(redisDb
*db
, robj
*key
);
580 static time_t getExpire(redisDb
*db
, robj
*key
);
581 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
582 static void updateSlavesWaitingBgsave(int bgsaveerr
);
583 static void freeMemoryIfNeeded(void);
584 static int processCommand(redisClient
*c
);
585 static void setupSigSegvAction(void);
586 static void rdbRemoveTempFile(pid_t childpid
);
587 static void aofRemoveTempFile(pid_t childpid
);
588 static size_t stringObjectLen(robj
*o
);
589 static void processInputBuffer(redisClient
*c
);
590 static zskiplist
*zslCreate(void);
591 static void zslFree(zskiplist
*zsl
);
592 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
593 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
594 static void initClientMultiState(redisClient
*c
);
595 static void freeClientMultiState(redisClient
*c
);
596 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
597 static void unblockClientWaitingData(redisClient
*c
);
598 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
599 static void vmInit(void);
600 static void vmMarkPagesFree(off_t page
, off_t count
);
601 static robj
*vmLoadObject(robj
*key
);
602 static robj
*vmPreviewObject(robj
*key
);
603 static int vmSwapOneObjectBlocking(void);
604 static int vmSwapOneObjectThreaded(void);
605 static int vmCanSwapOut(void);
606 static int tryFreeOneObjectFromFreelist(void);
607 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
608 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
609 static void vmCancelThreadedIOJob(robj
*o
);
610 static void lockThreadedIO(void);
611 static void unlockThreadedIO(void);
612 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
613 static void freeIOJob(iojob
*j
);
614 static void queueIOJob(iojob
*j
);
615 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
616 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
617 static void waitEmptyIOJobsQueue(void);
618 static void vmReopenSwapFile(void);
619 static int vmFreePage(off_t page
);
620 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
621 static void execBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
622 static int blockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
);
623 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
624 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
625 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
626 static struct redisCommand
*lookupCommand(char *name
);
627 static void call(redisClient
*c
, struct redisCommand
*cmd
);
628 static void resetClient(redisClient
*c
);
629 static void convertToRealHash(robj
*o
);
630 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
);
631 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
);
632 static void freePubsubPattern(void *p
);
633 static int listMatchPubsubPattern(void *a
, void *b
);
634 static int compareStringObjects(robj
*a
, robj
*b
);
635 static int equalStringObjects(robj
*a
, robj
*b
);
637 static int rewriteAppendOnlyFileBackground(void);
638 static int vmSwapObjectBlocking(robj
*key
, robj
*val
);
639 static int prepareForShutdown();
640 static void touchWatchedKey(redisDb
*db
, robj
*key
);
641 static void touchWatchedKeysOnFlush(int dbid
);
642 static void unwatchAllKeys(redisClient
*c
);
644 static void authCommand(redisClient
*c
);
645 static void pingCommand(redisClient
*c
);
646 static void echoCommand(redisClient
*c
);
647 static void setCommand(redisClient
*c
);
648 static void setnxCommand(redisClient
*c
);
649 static void setexCommand(redisClient
*c
);
650 static void getCommand(redisClient
*c
);
651 static void delCommand(redisClient
*c
);
652 static void existsCommand(redisClient
*c
);
653 static void incrCommand(redisClient
*c
);
654 static void decrCommand(redisClient
*c
);
655 static void incrbyCommand(redisClient
*c
);
656 static void decrbyCommand(redisClient
*c
);
657 static void selectCommand(redisClient
*c
);
658 static void randomkeyCommand(redisClient
*c
);
659 static void keysCommand(redisClient
*c
);
660 static void dbsizeCommand(redisClient
*c
);
661 static void lastsaveCommand(redisClient
*c
);
662 static void saveCommand(redisClient
*c
);
663 static void bgsaveCommand(redisClient
*c
);
664 static void bgrewriteaofCommand(redisClient
*c
);
665 static void shutdownCommand(redisClient
*c
);
666 static void moveCommand(redisClient
*c
);
667 static void renameCommand(redisClient
*c
);
668 static void renamenxCommand(redisClient
*c
);
669 static void lpushCommand(redisClient
*c
);
670 static void rpushCommand(redisClient
*c
);
671 static void lpopCommand(redisClient
*c
);
672 static void rpopCommand(redisClient
*c
);
673 static void llenCommand(redisClient
*c
);
674 static void lindexCommand(redisClient
*c
);
675 static void lrangeCommand(redisClient
*c
);
676 static void ltrimCommand(redisClient
*c
);
677 static void typeCommand(redisClient
*c
);
678 static void lsetCommand(redisClient
*c
);
679 static void saddCommand(redisClient
*c
);
680 static void sremCommand(redisClient
*c
);
681 static void smoveCommand(redisClient
*c
);
682 static void sismemberCommand(redisClient
*c
);
683 static void scardCommand(redisClient
*c
);
684 static void spopCommand(redisClient
*c
);
685 static void srandmemberCommand(redisClient
*c
);
686 static void sinterCommand(redisClient
*c
);
687 static void sinterstoreCommand(redisClient
*c
);
688 static void sunionCommand(redisClient
*c
);
689 static void sunionstoreCommand(redisClient
*c
);
690 static void sdiffCommand(redisClient
*c
);
691 static void sdiffstoreCommand(redisClient
*c
);
692 static void syncCommand(redisClient
*c
);
693 static void flushdbCommand(redisClient
*c
);
694 static void flushallCommand(redisClient
*c
);
695 static void sortCommand(redisClient
*c
);
696 static void lremCommand(redisClient
*c
);
697 static void rpoplpushcommand(redisClient
*c
);
698 static void infoCommand(redisClient
*c
);
699 static void mgetCommand(redisClient
*c
);
700 static void monitorCommand(redisClient
*c
);
701 static void expireCommand(redisClient
*c
);
702 static void expireatCommand(redisClient
*c
);
703 static void getsetCommand(redisClient
*c
);
704 static void ttlCommand(redisClient
*c
);
705 static void slaveofCommand(redisClient
*c
);
706 static void debugCommand(redisClient
*c
);
707 static void msetCommand(redisClient
*c
);
708 static void msetnxCommand(redisClient
*c
);
709 static void zaddCommand(redisClient
*c
);
710 static void zincrbyCommand(redisClient
*c
);
711 static void zrangeCommand(redisClient
*c
);
712 static void zrangebyscoreCommand(redisClient
*c
);
713 static void zcountCommand(redisClient
*c
);
714 static void zrevrangeCommand(redisClient
*c
);
715 static void zcardCommand(redisClient
*c
);
716 static void zremCommand(redisClient
*c
);
717 static void zscoreCommand(redisClient
*c
);
718 static void zremrangebyscoreCommand(redisClient
*c
);
719 static void multiCommand(redisClient
*c
);
720 static void execCommand(redisClient
*c
);
721 static void discardCommand(redisClient
*c
);
722 static void blpopCommand(redisClient
*c
);
723 static void brpopCommand(redisClient
*c
);
724 static void appendCommand(redisClient
*c
);
725 static void substrCommand(redisClient
*c
);
726 static void zrankCommand(redisClient
*c
);
727 static void zrevrankCommand(redisClient
*c
);
728 static void hsetCommand(redisClient
*c
);
729 static void hsetnxCommand(redisClient
*c
);
730 static void hgetCommand(redisClient
*c
);
731 static void hmsetCommand(redisClient
*c
);
732 static void hmgetCommand(redisClient
*c
);
733 static void hdelCommand(redisClient
*c
);
734 static void hlenCommand(redisClient
*c
);
735 static void zremrangebyrankCommand(redisClient
*c
);
736 static void zunionstoreCommand(redisClient
*c
);
737 static void zinterstoreCommand(redisClient
*c
);
738 static void hkeysCommand(redisClient
*c
);
739 static void hvalsCommand(redisClient
*c
);
740 static void hgetallCommand(redisClient
*c
);
741 static void hexistsCommand(redisClient
*c
);
742 static void configCommand(redisClient
*c
);
743 static void hincrbyCommand(redisClient
*c
);
744 static void subscribeCommand(redisClient
*c
);
745 static void unsubscribeCommand(redisClient
*c
);
746 static void psubscribeCommand(redisClient
*c
);
747 static void punsubscribeCommand(redisClient
*c
);
748 static void publishCommand(redisClient
*c
);
749 static void watchCommand(redisClient
*c
);
750 static void unwatchCommand(redisClient
*c
);
752 /*================================= Globals ================================= */
755 static struct redisServer server
; /* server global state */
756 static struct redisCommand cmdTable
[] = {
757 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
758 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
759 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
760 {"setex",setexCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
761 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
762 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
763 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
764 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
765 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
766 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
767 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
768 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
769 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
770 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
771 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
772 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
773 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
774 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
775 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
776 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
777 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
778 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
779 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
780 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
781 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
782 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
783 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
784 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
785 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
786 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
787 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
788 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
789 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
790 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
791 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
792 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
793 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
794 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
795 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
796 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
797 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
798 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
799 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
800 {"zunionstore",zunionstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
801 {"zinterstore",zinterstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
802 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
803 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
804 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
805 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
806 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
807 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
808 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
809 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
810 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
811 {"hsetnx",hsetnxCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
812 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
813 {"hmset",hmsetCommand
,-4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
814 {"hmget",hmgetCommand
,-3,REDIS_CMD_BULK
,NULL
,1,1,1},
815 {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
816 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
817 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
818 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
819 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
820 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
821 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
822 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
823 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
824 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
825 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
826 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
827 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
828 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
829 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
830 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
831 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
832 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
833 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
834 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
835 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
836 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
837 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
838 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
839 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
840 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
841 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
842 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
843 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
844 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
845 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
846 {"exec",execCommand
,1,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,execBlockClientOnSwappedKeys
,0,0,0},
847 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
848 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
849 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
850 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
851 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
852 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
853 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
854 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
855 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
856 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
857 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
858 {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
859 {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
860 {"psubscribe",psubscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
861 {"punsubscribe",punsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
862 {"publish",publishCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_FORCE_REPLICATION
,NULL
,0,0,0},
863 {"watch",watchCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
864 {"unwatch",unwatchCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
865 {NULL
,NULL
,0,0,NULL
,0,0,0}
868 /*============================ Utility functions ============================ */
870 /* Glob-style pattern matching. */
871 static int stringmatchlen(const char *pattern
, int patternLen
,
872 const char *string
, int stringLen
, int nocase
)
877 while (pattern
[1] == '*') {
882 return 1; /* match */
884 if (stringmatchlen(pattern
+1, patternLen
-1,
885 string
, stringLen
, nocase
))
886 return 1; /* match */
890 return 0; /* no match */
894 return 0; /* no match */
904 not = pattern
[0] == '^';
911 if (pattern
[0] == '\\') {
914 if (pattern
[0] == string
[0])
916 } else if (pattern
[0] == ']') {
918 } else if (patternLen
== 0) {
922 } else if (pattern
[1] == '-' && patternLen
>= 3) {
923 int start
= pattern
[0];
924 int end
= pattern
[2];
932 start
= tolower(start
);
938 if (c
>= start
&& c
<= end
)
942 if (pattern
[0] == string
[0])
945 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
955 return 0; /* no match */
961 if (patternLen
>= 2) {
968 if (pattern
[0] != string
[0])
969 return 0; /* no match */
971 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
972 return 0; /* no match */
980 if (stringLen
== 0) {
981 while(*pattern
== '*') {
988 if (patternLen
== 0 && stringLen
== 0)
993 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
994 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
997 /* Convert a string representing an amount of memory into the number of
998 * bytes, so for instance memtoll("1Gi") will return 1073741824 that is
1001 * On parsing error, if *err is not NULL, it's set to 1, otherwise it's
1003 static long long memtoll(const char *p
, int *err
) {
1006 long mul
; /* unit multiplier */
1008 unsigned int digits
;
1011 /* Search the first non digit character. */
1014 while(*u
&& isdigit(*u
)) u
++;
1015 if (*u
== '\0' || !strcasecmp(u
,"b")) {
1017 } else if (!strcasecmp(u
,"k")) {
1019 } else if (!strcasecmp(u
,"kb")) {
1021 } else if (!strcasecmp(u
,"m")) {
1023 } else if (!strcasecmp(u
,"mb")) {
1025 } else if (!strcasecmp(u
,"g")) {
1026 mul
= 1000L*1000*1000;
1027 } else if (!strcasecmp(u
,"gb")) {
1028 mul
= 1024L*1024*1024;
1034 if (digits
>= sizeof(buf
)) {
1038 memcpy(buf
,p
,digits
);
1040 val
= strtoll(buf
,NULL
,10);
1044 /* Convert a long long into a string. Returns the number of
1045 * characters needed to represent the number, that can be shorter if passed
1046 * buffer length is not enough to store the whole number. */
1047 static int ll2string(char *s
, size_t len
, long long value
) {
1049 unsigned long long v
;
1052 if (len
== 0) return 0;
1053 v
= (value
< 0) ? -value
: value
;
1054 p
= buf
+31; /* point to the last character */
1059 if (value
< 0) *p
-- = '-';
1062 if (l
+1 > len
) l
= len
-1; /* Make sure it fits, including the nul term */
1068 static void redisLog(int level
, const char *fmt
, ...) {
1072 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
1076 if (level
>= server
.verbosity
) {
1082 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
1083 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
1084 vfprintf(fp
, fmt
, ap
);
1090 if (server
.logfile
) fclose(fp
);
1093 /*====================== Hash table type implementation ==================== */
1095 /* This is an hash table type that uses the SDS dynamic strings libary as
1096 * keys and radis objects as values (objects can hold SDS strings,
1099 static void dictVanillaFree(void *privdata
, void *val
)
1101 DICT_NOTUSED(privdata
);
1105 static void dictListDestructor(void *privdata
, void *val
)
1107 DICT_NOTUSED(privdata
);
1108 listRelease((list
*)val
);
1111 static int sdsDictKeyCompare(void *privdata
, const void *key1
,
1115 DICT_NOTUSED(privdata
);
1117 l1
= sdslen((sds
)key1
);
1118 l2
= sdslen((sds
)key2
);
1119 if (l1
!= l2
) return 0;
1120 return memcmp(key1
, key2
, l1
) == 0;
1123 static void dictRedisObjectDestructor(void *privdata
, void *val
)
1125 DICT_NOTUSED(privdata
);
1127 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
1131 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1134 const robj
*o1
= key1
, *o2
= key2
;
1135 return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1138 static unsigned int dictObjHash(const void *key
) {
1139 const robj
*o
= key
;
1140 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1143 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1146 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1149 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1150 o2
->encoding
== REDIS_ENCODING_INT
)
1151 return o1
->ptr
== o2
->ptr
;
1153 o1
= getDecodedObject(o1
);
1154 o2
= getDecodedObject(o2
);
1155 cmp
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1161 static unsigned int dictEncObjHash(const void *key
) {
1162 robj
*o
= (robj
*) key
;
1164 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1165 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1167 if (o
->encoding
== REDIS_ENCODING_INT
) {
1171 len
= ll2string(buf
,32,(long)o
->ptr
);
1172 return dictGenHashFunction((unsigned char*)buf
, len
);
1176 o
= getDecodedObject(o
);
1177 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1184 /* Sets type and expires */
1185 static dictType setDictType
= {
1186 dictEncObjHash
, /* hash function */
1189 dictEncObjKeyCompare
, /* key compare */
1190 dictRedisObjectDestructor
, /* key destructor */
1191 NULL
/* val destructor */
1194 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1195 static dictType zsetDictType
= {
1196 dictEncObjHash
, /* hash function */
1199 dictEncObjKeyCompare
, /* key compare */
1200 dictRedisObjectDestructor
, /* key destructor */
1201 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1205 static dictType dbDictType
= {
1206 dictObjHash
, /* hash function */
1209 dictObjKeyCompare
, /* key compare */
1210 dictRedisObjectDestructor
, /* key destructor */
1211 dictRedisObjectDestructor
/* val destructor */
1215 static dictType keyptrDictType
= {
1216 dictObjHash
, /* hash function */
1219 dictObjKeyCompare
, /* key compare */
1220 dictRedisObjectDestructor
, /* key destructor */
1221 NULL
/* val destructor */
1224 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1225 static dictType hashDictType
= {
1226 dictEncObjHash
, /* hash function */
1229 dictEncObjKeyCompare
, /* key compare */
1230 dictRedisObjectDestructor
, /* key destructor */
1231 dictRedisObjectDestructor
/* val destructor */
1234 /* Keylist hash table type has unencoded redis objects as keys and
1235 * lists as values. It's used for blocking operations (BLPOP) and to
1236 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1237 static dictType keylistDictType
= {
1238 dictObjHash
, /* hash function */
1241 dictObjKeyCompare
, /* key compare */
1242 dictRedisObjectDestructor
, /* key destructor */
1243 dictListDestructor
/* val destructor */
1246 static void version();
1248 /* ========================= Random utility functions ======================= */
1250 /* Redis generally does not try to recover from out of memory conditions
1251 * when allocating objects or strings, it is not clear if it will be possible
1252 * to report this condition to the client since the networking layer itself
1253 * is based on heap allocation for send buffers, so we simply abort.
1254 * At least the code will be simpler to read... */
1255 static void oom(const char *msg
) {
1256 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1261 /* ====================== Redis server networking stuff ===================== */
1262 static void closeTimedoutClients(void) {
1265 time_t now
= time(NULL
);
1268 listRewind(server
.clients
,&li
);
1269 while ((ln
= listNext(&li
)) != NULL
) {
1270 c
= listNodeValue(ln
);
1271 if (server
.maxidletime
&&
1272 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1273 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1274 dictSize(c
->pubsub_channels
) == 0 && /* no timeout for pubsub */
1275 listLength(c
->pubsub_patterns
) == 0 &&
1276 (now
- c
->lastinteraction
> server
.maxidletime
))
1278 redisLog(REDIS_VERBOSE
,"Closing idle client");
1280 } else if (c
->flags
& REDIS_BLOCKED
) {
1281 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1282 addReply(c
,shared
.nullmultibulk
);
1283 unblockClientWaitingData(c
);
1289 static int htNeedsResize(dict
*dict
) {
1290 long long size
, used
;
1292 size
= dictSlots(dict
);
1293 used
= dictSize(dict
);
1294 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1295 (used
*100/size
< REDIS_HT_MINFILL
));
1298 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1299 * we resize the hash table to save memory */
1300 static void tryResizeHashTables(void) {
1303 for (j
= 0; j
< server
.dbnum
; j
++) {
1304 if (htNeedsResize(server
.db
[j
].dict
))
1305 dictResize(server
.db
[j
].dict
);
1306 if (htNeedsResize(server
.db
[j
].expires
))
1307 dictResize(server
.db
[j
].expires
);
1311 /* Our hash table implementation performs rehashing incrementally while
1312 * we write/read from the hash table. Still if the server is idle, the hash
1313 * table will use two tables for a long time. So we try to use 1 millisecond
1314 * of CPU time at every serverCron() loop in order to rehash some key. */
1315 static void incrementallyRehash(void) {
1318 for (j
= 0; j
< server
.dbnum
; j
++) {
1319 if (dictIsRehashing(server
.db
[j
].dict
)) {
1320 dictRehashMilliseconds(server
.db
[j
].dict
,1);
1321 break; /* already used our millisecond for this loop... */
1326 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1327 void backgroundSaveDoneHandler(int statloc
) {
1328 int exitcode
= WEXITSTATUS(statloc
);
1329 int bysignal
= WIFSIGNALED(statloc
);
1331 if (!bysignal
&& exitcode
== 0) {
1332 redisLog(REDIS_NOTICE
,
1333 "Background saving terminated with success");
1335 server
.lastsave
= time(NULL
);
1336 } else if (!bysignal
&& exitcode
!= 0) {
1337 redisLog(REDIS_WARNING
, "Background saving error");
1339 redisLog(REDIS_WARNING
,
1340 "Background saving terminated by signal %d", WTERMSIG(statloc
));
1341 rdbRemoveTempFile(server
.bgsavechildpid
);
1343 server
.bgsavechildpid
= -1;
1344 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1345 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1346 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1349 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1351 void backgroundRewriteDoneHandler(int statloc
) {
1352 int exitcode
= WEXITSTATUS(statloc
);
1353 int bysignal
= WIFSIGNALED(statloc
);
1355 if (!bysignal
&& exitcode
== 0) {
1359 redisLog(REDIS_NOTICE
,
1360 "Background append only file rewriting terminated with success");
1361 /* Now it's time to flush the differences accumulated by the parent */
1362 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1363 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1365 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1368 /* Flush our data... */
1369 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1370 (signed) sdslen(server
.bgrewritebuf
)) {
1371 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1375 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1376 /* Now our work is to rename the temp file into the stable file. And
1377 * switch the file descriptor used by the server for append only. */
1378 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1379 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1383 /* Mission completed... almost */
1384 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1385 if (server
.appendfd
!= -1) {
1386 /* If append only is actually enabled... */
1387 close(server
.appendfd
);
1388 server
.appendfd
= fd
;
1389 if (server
.appendfsync
!= APPENDFSYNC_NO
) aof_fsync(fd
);
1390 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1391 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1393 /* If append only is disabled we just generate a dump in this
1394 * format. Why not? */
1397 } else if (!bysignal
&& exitcode
!= 0) {
1398 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1400 redisLog(REDIS_WARNING
,
1401 "Background append only file rewriting terminated by signal %d",
1405 sdsfree(server
.bgrewritebuf
);
1406 server
.bgrewritebuf
= sdsempty();
1407 aofRemoveTempFile(server
.bgrewritechildpid
);
1408 server
.bgrewritechildpid
= -1;
1411 /* This function is called once a background process of some kind terminates,
1412 * as we want to avoid resizing the hash tables when there is a child in order
1413 * to play well with copy-on-write (otherwise when a resize happens lots of
1414 * memory pages are copied). The goal of this function is to update the ability
1415 * for dict.c to resize the hash tables accordingly to the fact we have o not
1416 * running childs. */
1417 static void updateDictResizePolicy(void) {
1418 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1)
1421 dictDisableResize();
1424 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1425 int j
, loops
= server
.cronloops
++;
1426 REDIS_NOTUSED(eventLoop
);
1428 REDIS_NOTUSED(clientData
);
1430 /* We take a cached value of the unix time in the global state because
1431 * with virtual memory and aging there is to store the current time
1432 * in objects at every object access, and accuracy is not needed.
1433 * To access a global var is faster than calling time(NULL) */
1434 server
.unixtime
= time(NULL
);
1436 /* We received a SIGTERM, shutting down here in a safe way, as it is
1437 * not ok doing so inside the signal handler. */
1438 if (server
.shutdown_asap
) {
1439 if (prepareForShutdown() == REDIS_OK
) exit(0);
1440 redisLog(REDIS_WARNING
,"SIGTERM received but errors trying to shut down the server, check the logs for more information");
1443 /* Show some info about non-empty databases */
1444 for (j
= 0; j
< server
.dbnum
; j
++) {
1445 long long size
, used
, vkeys
;
1447 size
= dictSlots(server
.db
[j
].dict
);
1448 used
= dictSize(server
.db
[j
].dict
);
1449 vkeys
= dictSize(server
.db
[j
].expires
);
1450 if (!(loops
% 50) && (used
|| vkeys
)) {
1451 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1452 /* dictPrintStats(server.dict); */
1456 /* We don't want to resize the hash tables while a bacground saving
1457 * is in progress: the saving child is created using fork() that is
1458 * implemented with a copy-on-write semantic in most modern systems, so
1459 * if we resize the HT while there is the saving child at work actually
1460 * a lot of memory movements in the parent will cause a lot of pages
1462 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1) {
1463 if (!(loops
% 10)) tryResizeHashTables();
1464 if (server
.activerehashing
) incrementallyRehash();
1467 /* Show information about connected clients */
1468 if (!(loops
% 50)) {
1469 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use",
1470 listLength(server
.clients
)-listLength(server
.slaves
),
1471 listLength(server
.slaves
),
1472 zmalloc_used_memory());
1475 /* Close connections of timedout clients */
1476 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1477 closeTimedoutClients();
1479 /* Check if a background saving or AOF rewrite in progress terminated */
1480 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1484 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1485 if (pid
== server
.bgsavechildpid
) {
1486 backgroundSaveDoneHandler(statloc
);
1488 backgroundRewriteDoneHandler(statloc
);
1490 updateDictResizePolicy();
1493 /* If there is not a background saving in progress check if
1494 * we have to save now */
1495 time_t now
= time(NULL
);
1496 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1497 struct saveparam
*sp
= server
.saveparams
+j
;
1499 if (server
.dirty
>= sp
->changes
&&
1500 now
-server
.lastsave
> sp
->seconds
) {
1501 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1502 sp
->changes
, sp
->seconds
);
1503 rdbSaveBackground(server
.dbfilename
);
1509 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1510 * will use few CPU cycles if there are few expiring keys, otherwise
1511 * it will get more aggressive to avoid that too much memory is used by
1512 * keys that can be removed from the keyspace. */
1513 for (j
= 0; j
< server
.dbnum
; j
++) {
1515 redisDb
*db
= server
.db
+j
;
1517 /* Continue to expire if at the end of the cycle more than 25%
1518 * of the keys were expired. */
1520 long num
= dictSize(db
->expires
);
1521 time_t now
= time(NULL
);
1524 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1525 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1530 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1531 t
= (time_t) dictGetEntryVal(de
);
1533 deleteKey(db
,dictGetEntryKey(de
));
1535 server
.stat_expiredkeys
++;
1538 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1541 /* Swap a few keys on disk if we are over the memory limit and VM
1542 * is enbled. Try to free objects from the free list first. */
1543 if (vmCanSwapOut()) {
1544 while (server
.vm_enabled
&& zmalloc_used_memory() >
1545 server
.vm_max_memory
)
1549 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1550 retval
= (server
.vm_max_threads
== 0) ?
1551 vmSwapOneObjectBlocking() :
1552 vmSwapOneObjectThreaded();
1553 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1554 zmalloc_used_memory() >
1555 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1557 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1559 /* Note that when using threade I/O we free just one object,
1560 * because anyway when the I/O thread in charge to swap this
1561 * object out will finish, the handler of completed jobs
1562 * will try to swap more objects if we are still out of memory. */
1563 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1567 /* Check if we should connect to a MASTER */
1568 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1569 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1570 if (syncWithMaster() == REDIS_OK
) {
1571 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1572 if (server
.appendonly
) rewriteAppendOnlyFileBackground();
1578 /* This function gets called every time Redis is entering the
1579 * main loop of the event driven library, that is, before to sleep
1580 * for ready file descriptors. */
1581 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1582 REDIS_NOTUSED(eventLoop
);
1584 /* Awake clients that got all the swapped keys they requested */
1585 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1589 listRewind(server
.io_ready_clients
,&li
);
1590 while((ln
= listNext(&li
))) {
1591 redisClient
*c
= ln
->value
;
1592 struct redisCommand
*cmd
;
1594 /* Resume the client. */
1595 listDelNode(server
.io_ready_clients
,ln
);
1596 c
->flags
&= (~REDIS_IO_WAIT
);
1597 server
.vm_blocked_clients
--;
1598 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1599 readQueryFromClient
, c
);
1600 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1601 assert(cmd
!= NULL
);
1604 /* There may be more data to process in the input buffer. */
1605 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1606 processInputBuffer(c
);
1609 /* Write the AOF buffer on disk */
1610 flushAppendOnlyFile();
1613 static void createSharedObjects(void) {
1616 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1617 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1618 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1619 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1620 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1621 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1622 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1623 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1624 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1625 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1626 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1627 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1628 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1629 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1630 "-ERR no such key\r\n"));
1631 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1632 "-ERR syntax error\r\n"));
1633 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1634 "-ERR source and destination objects are the same\r\n"));
1635 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1636 "-ERR index out of range\r\n"));
1637 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1638 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1639 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1640 shared
.select0
= createStringObject("select 0\r\n",10);
1641 shared
.select1
= createStringObject("select 1\r\n",10);
1642 shared
.select2
= createStringObject("select 2\r\n",10);
1643 shared
.select3
= createStringObject("select 3\r\n",10);
1644 shared
.select4
= createStringObject("select 4\r\n",10);
1645 shared
.select5
= createStringObject("select 5\r\n",10);
1646 shared
.select6
= createStringObject("select 6\r\n",10);
1647 shared
.select7
= createStringObject("select 7\r\n",10);
1648 shared
.select8
= createStringObject("select 8\r\n",10);
1649 shared
.select9
= createStringObject("select 9\r\n",10);
1650 shared
.messagebulk
= createStringObject("$7\r\nmessage\r\n",13);
1651 shared
.pmessagebulk
= createStringObject("$8\r\npmessage\r\n",14);
1652 shared
.subscribebulk
= createStringObject("$9\r\nsubscribe\r\n",15);
1653 shared
.unsubscribebulk
= createStringObject("$11\r\nunsubscribe\r\n",18);
1654 shared
.psubscribebulk
= createStringObject("$10\r\npsubscribe\r\n",17);
1655 shared
.punsubscribebulk
= createStringObject("$12\r\npunsubscribe\r\n",19);
1656 shared
.mbulk3
= createStringObject("*3\r\n",4);
1657 shared
.mbulk4
= createStringObject("*4\r\n",4);
1658 for (j
= 0; j
< REDIS_SHARED_INTEGERS
; j
++) {
1659 shared
.integers
[j
] = createObject(REDIS_STRING
,(void*)(long)j
);
1660 shared
.integers
[j
]->encoding
= REDIS_ENCODING_INT
;
1664 static void appendServerSaveParams(time_t seconds
, int changes
) {
1665 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1666 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1667 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1668 server
.saveparamslen
++;
1671 static void resetServerSaveParams() {
1672 zfree(server
.saveparams
);
1673 server
.saveparams
= NULL
;
1674 server
.saveparamslen
= 0;
1677 static void initServerConfig() {
1678 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1679 server
.port
= REDIS_SERVERPORT
;
1680 server
.verbosity
= REDIS_VERBOSE
;
1681 server
.maxidletime
= REDIS_MAXIDLETIME
;
1682 server
.saveparams
= NULL
;
1683 server
.logfile
= NULL
; /* NULL = log on standard output */
1684 server
.bindaddr
= NULL
;
1685 server
.glueoutputbuf
= 1;
1686 server
.daemonize
= 0;
1687 server
.appendonly
= 0;
1688 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1689 server
.no_appendfsync_on_rewrite
= 0;
1690 server
.lastfsync
= time(NULL
);
1691 server
.appendfd
= -1;
1692 server
.appendseldb
= -1; /* Make sure the first time will not match */
1693 server
.pidfile
= zstrdup("/var/run/redis.pid");
1694 server
.dbfilename
= zstrdup("dump.rdb");
1695 server
.appendfilename
= zstrdup("appendonly.aof");
1696 server
.requirepass
= NULL
;
1697 server
.rdbcompression
= 1;
1698 server
.activerehashing
= 1;
1699 server
.maxclients
= 0;
1700 server
.blpop_blocked_clients
= 0;
1701 server
.maxmemory
= 0;
1702 server
.vm_enabled
= 0;
1703 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1704 server
.vm_page_size
= 256; /* 256 bytes per page */
1705 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1706 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1707 server
.vm_max_threads
= 4;
1708 server
.vm_blocked_clients
= 0;
1709 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1710 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1711 server
.shutdown_asap
= 0;
1713 resetServerSaveParams();
1715 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1716 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1717 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1718 /* Replication related */
1720 server
.masterauth
= NULL
;
1721 server
.masterhost
= NULL
;
1722 server
.masterport
= 6379;
1723 server
.master
= NULL
;
1724 server
.replstate
= REDIS_REPL_NONE
;
1726 /* Double constants initialization */
1728 R_PosInf
= 1.0/R_Zero
;
1729 R_NegInf
= -1.0/R_Zero
;
1730 R_Nan
= R_Zero
/R_Zero
;
1733 static void initServer() {
1736 signal(SIGHUP
, SIG_IGN
);
1737 signal(SIGPIPE
, SIG_IGN
);
1738 setupSigSegvAction();
1740 server
.devnull
= fopen("/dev/null","w");
1741 if (server
.devnull
== NULL
) {
1742 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1745 server
.clients
= listCreate();
1746 server
.slaves
= listCreate();
1747 server
.monitors
= listCreate();
1748 server
.objfreelist
= listCreate();
1749 createSharedObjects();
1750 server
.el
= aeCreateEventLoop();
1751 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1752 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1753 if (server
.fd
== -1) {
1754 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1757 for (j
= 0; j
< server
.dbnum
; j
++) {
1758 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1759 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1760 server
.db
[j
].blocking_keys
= dictCreate(&keylistDictType
,NULL
);
1761 server
.db
[j
].watched_keys
= dictCreate(&keylistDictType
,NULL
);
1762 if (server
.vm_enabled
)
1763 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1764 server
.db
[j
].id
= j
;
1766 server
.pubsub_channels
= dictCreate(&keylistDictType
,NULL
);
1767 server
.pubsub_patterns
= listCreate();
1768 listSetFreeMethod(server
.pubsub_patterns
,freePubsubPattern
);
1769 listSetMatchMethod(server
.pubsub_patterns
,listMatchPubsubPattern
);
1770 server
.cronloops
= 0;
1771 server
.bgsavechildpid
= -1;
1772 server
.bgrewritechildpid
= -1;
1773 server
.bgrewritebuf
= sdsempty();
1774 server
.aofbuf
= sdsempty();
1775 server
.lastsave
= time(NULL
);
1777 server
.stat_numcommands
= 0;
1778 server
.stat_numconnections
= 0;
1779 server
.stat_expiredkeys
= 0;
1780 server
.stat_starttime
= time(NULL
);
1781 server
.unixtime
= time(NULL
);
1782 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1783 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1784 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1786 if (server
.appendonly
) {
1787 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1788 if (server
.appendfd
== -1) {
1789 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1795 if (server
.vm_enabled
) vmInit();
1798 /* Empty the whole database */
1799 static long long emptyDb() {
1801 long long removed
= 0;
1803 for (j
= 0; j
< server
.dbnum
; j
++) {
1804 removed
+= dictSize(server
.db
[j
].dict
);
1805 dictEmpty(server
.db
[j
].dict
);
1806 dictEmpty(server
.db
[j
].expires
);
1811 static int yesnotoi(char *s
) {
1812 if (!strcasecmp(s
,"yes")) return 1;
1813 else if (!strcasecmp(s
,"no")) return 0;
1817 /* I agree, this is a very rudimental way to load a configuration...
1818 will improve later if the config gets more complex */
1819 static void loadServerConfig(char *filename
) {
1821 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1825 if (filename
[0] == '-' && filename
[1] == '\0')
1828 if ((fp
= fopen(filename
,"r")) == NULL
) {
1829 redisLog(REDIS_WARNING
, "Fatal error, can't open config file '%s'", filename
);
1834 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1840 line
= sdstrim(line
," \t\r\n");
1842 /* Skip comments and blank lines*/
1843 if (line
[0] == '#' || line
[0] == '\0') {
1848 /* Split into arguments */
1849 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1850 sdstolower(argv
[0]);
1852 /* Execute config directives */
1853 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1854 server
.maxidletime
= atoi(argv
[1]);
1855 if (server
.maxidletime
< 0) {
1856 err
= "Invalid timeout value"; goto loaderr
;
1858 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1859 server
.port
= atoi(argv
[1]);
1860 if (server
.port
< 1 || server
.port
> 65535) {
1861 err
= "Invalid port"; goto loaderr
;
1863 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1864 server
.bindaddr
= zstrdup(argv
[1]);
1865 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1866 int seconds
= atoi(argv
[1]);
1867 int changes
= atoi(argv
[2]);
1868 if (seconds
< 1 || changes
< 0) {
1869 err
= "Invalid save parameters"; goto loaderr
;
1871 appendServerSaveParams(seconds
,changes
);
1872 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1873 if (chdir(argv
[1]) == -1) {
1874 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1875 argv
[1], strerror(errno
));
1878 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1879 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1880 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1881 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1882 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1884 err
= "Invalid log level. Must be one of debug, notice, warning";
1887 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1890 server
.logfile
= zstrdup(argv
[1]);
1891 if (!strcasecmp(server
.logfile
,"stdout")) {
1892 zfree(server
.logfile
);
1893 server
.logfile
= NULL
;
1895 if (server
.logfile
) {
1896 /* Test if we are able to open the file. The server will not
1897 * be able to abort just for this problem later... */
1898 logfp
= fopen(server
.logfile
,"a");
1899 if (logfp
== NULL
) {
1900 err
= sdscatprintf(sdsempty(),
1901 "Can't open the log file: %s", strerror(errno
));
1906 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1907 server
.dbnum
= atoi(argv
[1]);
1908 if (server
.dbnum
< 1) {
1909 err
= "Invalid number of databases"; goto loaderr
;
1911 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1912 loadServerConfig(argv
[1]);
1913 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1914 server
.maxclients
= atoi(argv
[1]);
1915 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1916 server
.maxmemory
= memtoll(argv
[1],NULL
);
1917 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1918 server
.masterhost
= sdsnew(argv
[1]);
1919 server
.masterport
= atoi(argv
[2]);
1920 server
.replstate
= REDIS_REPL_CONNECT
;
1921 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1922 server
.masterauth
= zstrdup(argv
[1]);
1923 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1924 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1925 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1927 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1928 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1929 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1931 } else if (!strcasecmp(argv
[0],"activerehashing") && argc
== 2) {
1932 if ((server
.activerehashing
= yesnotoi(argv
[1])) == -1) {
1933 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1935 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1936 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1937 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1939 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1940 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1941 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1943 } else if (!strcasecmp(argv
[0],"appendfilename") && argc
== 2) {
1944 zfree(server
.appendfilename
);
1945 server
.appendfilename
= zstrdup(argv
[1]);
1946 } else if (!strcasecmp(argv
[0],"no-appendfsync-on-rewrite")
1948 if ((server
.no_appendfsync_on_rewrite
= yesnotoi(argv
[1])) == -1) {
1949 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1951 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
1952 if (!strcasecmp(argv
[1],"no")) {
1953 server
.appendfsync
= APPENDFSYNC_NO
;
1954 } else if (!strcasecmp(argv
[1],"always")) {
1955 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1956 } else if (!strcasecmp(argv
[1],"everysec")) {
1957 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1959 err
= "argument must be 'no', 'always' or 'everysec'";
1962 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
1963 server
.requirepass
= zstrdup(argv
[1]);
1964 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
1965 zfree(server
.pidfile
);
1966 server
.pidfile
= zstrdup(argv
[1]);
1967 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
1968 zfree(server
.dbfilename
);
1969 server
.dbfilename
= zstrdup(argv
[1]);
1970 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
1971 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
1972 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1974 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
1975 zfree(server
.vm_swap_file
);
1976 server
.vm_swap_file
= zstrdup(argv
[1]);
1977 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
1978 server
.vm_max_memory
= memtoll(argv
[1],NULL
);
1979 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
1980 server
.vm_page_size
= memtoll(argv
[1], NULL
);
1981 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
1982 server
.vm_pages
= memtoll(argv
[1], NULL
);
1983 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1984 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1985 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
1986 server
.hash_max_zipmap_entries
= memtoll(argv
[1], NULL
);
1987 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
1988 server
.hash_max_zipmap_value
= memtoll(argv
[1], NULL
);
1990 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
1992 for (j
= 0; j
< argc
; j
++)
1997 if (fp
!= stdin
) fclose(fp
);
2001 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
2002 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
2003 fprintf(stderr
, ">>> '%s'\n", line
);
2004 fprintf(stderr
, "%s\n", err
);
2008 static void freeClientArgv(redisClient
*c
) {
2011 for (j
= 0; j
< c
->argc
; j
++)
2012 decrRefCount(c
->argv
[j
]);
2013 for (j
= 0; j
< c
->mbargc
; j
++)
2014 decrRefCount(c
->mbargv
[j
]);
2019 static void freeClient(redisClient
*c
) {
2022 /* Note that if the client we are freeing is blocked into a blocking
2023 * call, we have to set querybuf to NULL *before* to call
2024 * unblockClientWaitingData() to avoid processInputBuffer() will get
2025 * called. Also it is important to remove the file events after
2026 * this, because this call adds the READABLE event. */
2027 sdsfree(c
->querybuf
);
2029 if (c
->flags
& REDIS_BLOCKED
)
2030 unblockClientWaitingData(c
);
2032 /* UNWATCH all the keys */
2034 listRelease(c
->watched_keys
);
2035 /* Unsubscribe from all the pubsub channels */
2036 pubsubUnsubscribeAllChannels(c
,0);
2037 pubsubUnsubscribeAllPatterns(c
,0);
2038 dictRelease(c
->pubsub_channels
);
2039 listRelease(c
->pubsub_patterns
);
2040 /* Obvious cleanup */
2041 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
2042 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2043 listRelease(c
->reply
);
2046 /* Remove from the list of clients */
2047 ln
= listSearchKey(server
.clients
,c
);
2048 redisAssert(ln
!= NULL
);
2049 listDelNode(server
.clients
,ln
);
2050 /* Remove from the list of clients that are now ready to be restarted
2051 * after waiting for swapped keys */
2052 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
2053 ln
= listSearchKey(server
.io_ready_clients
,c
);
2055 listDelNode(server
.io_ready_clients
,ln
);
2056 server
.vm_blocked_clients
--;
2059 /* Remove from the list of clients waiting for swapped keys */
2060 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
2061 ln
= listFirst(c
->io_keys
);
2062 dontWaitForSwappedKey(c
,ln
->value
);
2064 listRelease(c
->io_keys
);
2065 /* Master/slave cleanup */
2066 if (c
->flags
& REDIS_SLAVE
) {
2067 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
2069 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
2070 ln
= listSearchKey(l
,c
);
2071 redisAssert(ln
!= NULL
);
2074 if (c
->flags
& REDIS_MASTER
) {
2075 server
.master
= NULL
;
2076 server
.replstate
= REDIS_REPL_CONNECT
;
2078 /* Release memory */
2081 freeClientMultiState(c
);
2085 #define GLUEREPLY_UP_TO (1024)
2086 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
2088 char buf
[GLUEREPLY_UP_TO
];
2093 listRewind(c
->reply
,&li
);
2094 while((ln
= listNext(&li
))) {
2098 objlen
= sdslen(o
->ptr
);
2099 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
2100 memcpy(buf
+copylen
,o
->ptr
,objlen
);
2102 listDelNode(c
->reply
,ln
);
2104 if (copylen
== 0) return;
2108 /* Now the output buffer is empty, add the new single element */
2109 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
2110 listAddNodeHead(c
->reply
,o
);
2113 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2114 redisClient
*c
= privdata
;
2115 int nwritten
= 0, totwritten
= 0, objlen
;
2118 REDIS_NOTUSED(mask
);
2120 /* Use writev() if we have enough buffers to send */
2121 if (!server
.glueoutputbuf
&&
2122 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
2123 !(c
->flags
& REDIS_MASTER
))
2125 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
2129 while(listLength(c
->reply
)) {
2130 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
2131 glueReplyBuffersIfNeeded(c
);
2133 o
= listNodeValue(listFirst(c
->reply
));
2134 objlen
= sdslen(o
->ptr
);
2137 listDelNode(c
->reply
,listFirst(c
->reply
));
2141 if (c
->flags
& REDIS_MASTER
) {
2142 /* Don't reply to a master */
2143 nwritten
= objlen
- c
->sentlen
;
2145 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
2146 if (nwritten
<= 0) break;
2148 c
->sentlen
+= nwritten
;
2149 totwritten
+= nwritten
;
2150 /* If we fully sent the object on head go to the next one */
2151 if (c
->sentlen
== objlen
) {
2152 listDelNode(c
->reply
,listFirst(c
->reply
));
2155 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
2156 * bytes, in a single threaded server it's a good idea to serve
2157 * other clients as well, even if a very large request comes from
2158 * super fast link that is always able to accept data (in real world
2159 * scenario think about 'KEYS *' against the loopback interfae) */
2160 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
2162 if (nwritten
== -1) {
2163 if (errno
== EAGAIN
) {
2166 redisLog(REDIS_VERBOSE
,
2167 "Error writing to client: %s", strerror(errno
));
2172 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
2173 if (listLength(c
->reply
) == 0) {
2175 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2179 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
2181 redisClient
*c
= privdata
;
2182 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
2184 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
2185 int offset
, ion
= 0;
2187 REDIS_NOTUSED(mask
);
2190 while (listLength(c
->reply
)) {
2191 offset
= c
->sentlen
;
2195 /* fill-in the iov[] array */
2196 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
2197 o
= listNodeValue(node
);
2198 objlen
= sdslen(o
->ptr
);
2200 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
2203 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2204 break; /* no more iovecs */
2206 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2207 iov
[ion
].iov_len
= objlen
- offset
;
2208 willwrite
+= objlen
- offset
;
2209 offset
= 0; /* just for the first item */
2216 /* write all collected blocks at once */
2217 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2218 if (errno
!= EAGAIN
) {
2219 redisLog(REDIS_VERBOSE
,
2220 "Error writing to client: %s", strerror(errno
));
2227 totwritten
+= nwritten
;
2228 offset
= c
->sentlen
;
2230 /* remove written robjs from c->reply */
2231 while (nwritten
&& listLength(c
->reply
)) {
2232 o
= listNodeValue(listFirst(c
->reply
));
2233 objlen
= sdslen(o
->ptr
);
2235 if(nwritten
>= objlen
- offset
) {
2236 listDelNode(c
->reply
, listFirst(c
->reply
));
2237 nwritten
-= objlen
- offset
;
2241 c
->sentlen
+= nwritten
;
2249 c
->lastinteraction
= time(NULL
);
2251 if (listLength(c
->reply
) == 0) {
2253 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2257 static struct redisCommand
*lookupCommand(char *name
) {
2259 while(cmdTable
[j
].name
!= NULL
) {
2260 if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
];
2266 /* resetClient prepare the client to process the next command */
2267 static void resetClient(redisClient
*c
) {
2273 /* Call() is the core of Redis execution of a command */
2274 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2277 dirty
= server
.dirty
;
2279 dirty
= server
.dirty
-dirty
;
2281 if (server
.appendonly
&& dirty
)
2282 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2283 if ((dirty
|| cmd
->flags
& REDIS_CMD_FORCE_REPLICATION
) &&
2284 listLength(server
.slaves
))
2285 replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
);
2286 if (listLength(server
.monitors
))
2287 replicationFeedMonitors(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
);
2288 server
.stat_numcommands
++;
2291 /* If this function gets called we already read a whole
2292 * command, argments are in the client argv/argc fields.
2293 * processCommand() execute the command or prepare the
2294 * server for a bulk read from the client.
2296 * If 1 is returned the client is still alive and valid and
2297 * and other operations can be performed by the caller. Otherwise
2298 * if 0 is returned the client was destroied (i.e. after QUIT). */
2299 static int processCommand(redisClient
*c
) {
2300 struct redisCommand
*cmd
;
2302 /* Free some memory if needed (maxmemory setting) */
2303 if (server
.maxmemory
) freeMemoryIfNeeded();
2305 /* Handle the multi bulk command type. This is an alternative protocol
2306 * supported by Redis in order to receive commands that are composed of
2307 * multiple binary-safe "bulk" arguments. The latency of processing is
2308 * a bit higher but this allows things like multi-sets, so if this
2309 * protocol is used only for MSET and similar commands this is a big win. */
2310 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2311 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2312 if (c
->multibulk
<= 0) {
2316 decrRefCount(c
->argv
[c
->argc
-1]);
2320 } else if (c
->multibulk
) {
2321 if (c
->bulklen
== -1) {
2322 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2323 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2327 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2328 decrRefCount(c
->argv
[0]);
2329 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2331 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2336 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2340 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2341 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2345 if (c
->multibulk
== 0) {
2349 /* Here we need to swap the multi-bulk argc/argv with the
2350 * normal argc/argv of the client structure. */
2352 c
->argv
= c
->mbargv
;
2353 c
->mbargv
= auxargv
;
2356 c
->argc
= c
->mbargc
;
2357 c
->mbargc
= auxargc
;
2359 /* We need to set bulklen to something different than -1
2360 * in order for the code below to process the command without
2361 * to try to read the last argument of a bulk command as
2362 * a special argument. */
2364 /* continue below and process the command */
2371 /* -- end of multi bulk commands processing -- */
2373 /* The QUIT command is handled as a special case. Normal command
2374 * procs are unable to close the client connection safely */
2375 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2380 /* Now lookup the command and check ASAP about trivial error conditions
2381 * such wrong arity, bad command name and so forth. */
2382 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2385 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2386 (char*)c
->argv
[0]->ptr
));
2389 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2390 (c
->argc
< -cmd
->arity
)) {
2392 sdscatprintf(sdsempty(),
2393 "-ERR wrong number of arguments for '%s' command\r\n",
2397 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2398 /* This is a bulk command, we have to read the last argument yet. */
2399 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2401 decrRefCount(c
->argv
[c
->argc
-1]);
2402 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2404 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2409 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2410 /* It is possible that the bulk read is already in the
2411 * buffer. Check this condition and handle it accordingly.
2412 * This is just a fast path, alternative to call processInputBuffer().
2413 * It's a good idea since the code is small and this condition
2414 * happens most of the times. */
2415 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2416 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2418 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2420 /* Otherwise return... there is to read the last argument
2421 * from the socket. */
2425 /* Let's try to encode the bulk object to save space. */
2426 if (cmd
->flags
& REDIS_CMD_BULK
)
2427 c
->argv
[c
->argc
-1] = tryObjectEncoding(c
->argv
[c
->argc
-1]);
2429 /* Check if the user is authenticated */
2430 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2431 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2436 /* Handle the maxmemory directive */
2437 if (server
.maxmemory
&& (cmd
->flags
& REDIS_CMD_DENYOOM
) &&
2438 zmalloc_used_memory() > server
.maxmemory
)
2440 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2445 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
2446 if ((dictSize(c
->pubsub_channels
) > 0 || listLength(c
->pubsub_patterns
) > 0)
2448 cmd
->proc
!= subscribeCommand
&& cmd
->proc
!= unsubscribeCommand
&&
2449 cmd
->proc
!= psubscribeCommand
&& cmd
->proc
!= punsubscribeCommand
) {
2450 addReplySds(c
,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n"));
2455 /* Exec the command */
2456 if (c
->flags
& REDIS_MULTI
&&
2457 cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
&&
2458 cmd
->proc
!= multiCommand
&& cmd
->proc
!= watchCommand
)
2460 queueMultiCommand(c
,cmd
);
2461 addReply(c
,shared
.queued
);
2463 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2464 blockClientOnSwappedKeys(c
,cmd
)) return 1;
2468 /* Prepare the client for the next command */
2473 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
) {
2478 /* We need 1+(ARGS*3) objects since commands are using the new protocol
2479 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
2480 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
2481 robj
*static_outv
[REDIS_STATIC_ARGS
*3+1];
2484 if (argc
<= REDIS_STATIC_ARGS
) {
2487 outv
= zmalloc(sizeof(robj
*)*(argc
*3+1));
2490 lenobj
= createObject(REDIS_STRING
,
2491 sdscatprintf(sdsempty(), "*%d\r\n", argc
));
2492 lenobj
->refcount
= 0;
2493 outv
[outc
++] = lenobj
;
2494 for (j
= 0; j
< argc
; j
++) {
2495 lenobj
= createObject(REDIS_STRING
,
2496 sdscatprintf(sdsempty(),"$%lu\r\n",
2497 (unsigned long) stringObjectLen(argv
[j
])));
2498 lenobj
->refcount
= 0;
2499 outv
[outc
++] = lenobj
;
2500 outv
[outc
++] = argv
[j
];
2501 outv
[outc
++] = shared
.crlf
;
2504 /* Increment all the refcounts at start and decrement at end in order to
2505 * be sure to free objects if there is no slave in a replication state
2506 * able to be feed with commands */
2507 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2508 listRewind(slaves
,&li
);
2509 while((ln
= listNext(&li
))) {
2510 redisClient
*slave
= ln
->value
;
2512 /* Don't feed slaves that are still waiting for BGSAVE to start */
2513 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2515 /* Feed all the other slaves, MONITORs and so on */
2516 if (slave
->slaveseldb
!= dictid
) {
2520 case 0: selectcmd
= shared
.select0
; break;
2521 case 1: selectcmd
= shared
.select1
; break;
2522 case 2: selectcmd
= shared
.select2
; break;
2523 case 3: selectcmd
= shared
.select3
; break;
2524 case 4: selectcmd
= shared
.select4
; break;
2525 case 5: selectcmd
= shared
.select5
; break;
2526 case 6: selectcmd
= shared
.select6
; break;
2527 case 7: selectcmd
= shared
.select7
; break;
2528 case 8: selectcmd
= shared
.select8
; break;
2529 case 9: selectcmd
= shared
.select9
; break;
2531 selectcmd
= createObject(REDIS_STRING
,
2532 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2533 selectcmd
->refcount
= 0;
2536 addReply(slave
,selectcmd
);
2537 slave
->slaveseldb
= dictid
;
2539 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2541 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2542 if (outv
!= static_outv
) zfree(outv
);
2545 static sds
sdscatrepr(sds s
, char *p
, size_t len
) {
2546 s
= sdscatlen(s
,"\"",1);
2551 s
= sdscatprintf(s
,"\\%c",*p
);
2553 case '\n': s
= sdscatlen(s
,"\\n",1); break;
2554 case '\r': s
= sdscatlen(s
,"\\r",1); break;
2555 case '\t': s
= sdscatlen(s
,"\\t",1); break;
2556 case '\a': s
= sdscatlen(s
,"\\a",1); break;
2557 case '\b': s
= sdscatlen(s
,"\\b",1); break;
2560 s
= sdscatprintf(s
,"%c",*p
);
2562 s
= sdscatprintf(s
,"\\x%02x",(unsigned char)*p
);
2567 return sdscatlen(s
,"\"",1);
2570 static void replicationFeedMonitors(list
*monitors
, int dictid
, robj
**argv
, int argc
) {
2574 sds cmdrepr
= sdsnew("+");
2578 gettimeofday(&tv
,NULL
);
2579 cmdrepr
= sdscatprintf(cmdrepr
,"%ld.%ld ",(long)tv
.tv_sec
,(long)tv
.tv_usec
);
2580 if (dictid
!= 0) cmdrepr
= sdscatprintf(cmdrepr
,"(db %d) ", dictid
);
2582 for (j
= 0; j
< argc
; j
++) {
2583 if (argv
[j
]->encoding
== REDIS_ENCODING_INT
) {
2584 cmdrepr
= sdscatprintf(cmdrepr
, "%ld", (long)argv
[j
]->ptr
);
2586 cmdrepr
= sdscatrepr(cmdrepr
,(char*)argv
[j
]->ptr
,
2587 sdslen(argv
[j
]->ptr
));
2590 cmdrepr
= sdscatlen(cmdrepr
," ",1);
2592 cmdrepr
= sdscatlen(cmdrepr
,"\r\n",2);
2593 cmdobj
= createObject(REDIS_STRING
,cmdrepr
);
2595 listRewind(monitors
,&li
);
2596 while((ln
= listNext(&li
))) {
2597 redisClient
*monitor
= ln
->value
;
2598 addReply(monitor
,cmdobj
);
2600 decrRefCount(cmdobj
);
2603 static void processInputBuffer(redisClient
*c
) {
2605 /* Before to process the input buffer, make sure the client is not
2606 * waitig for a blocking operation such as BLPOP. Note that the first
2607 * iteration the client is never blocked, otherwise the processInputBuffer
2608 * would not be called at all, but after the execution of the first commands
2609 * in the input buffer the client may be blocked, and the "goto again"
2610 * will try to reiterate. The following line will make it return asap. */
2611 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2612 if (c
->bulklen
== -1) {
2613 /* Read the first line of the query */
2614 char *p
= strchr(c
->querybuf
,'\n');
2621 query
= c
->querybuf
;
2622 c
->querybuf
= sdsempty();
2623 querylen
= 1+(p
-(query
));
2624 if (sdslen(query
) > querylen
) {
2625 /* leave data after the first line of the query in the buffer */
2626 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2628 *p
= '\0'; /* remove "\n" */
2629 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2630 sdsupdatelen(query
);
2632 /* Now we can split the query in arguments */
2633 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2636 if (c
->argv
) zfree(c
->argv
);
2637 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2639 for (j
= 0; j
< argc
; j
++) {
2640 if (sdslen(argv
[j
])) {
2641 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2649 /* Execute the command. If the client is still valid
2650 * after processCommand() return and there is something
2651 * on the query buffer try to process the next command. */
2652 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2654 /* Nothing to process, argc == 0. Just process the query
2655 * buffer if it's not empty or return to the caller */
2656 if (sdslen(c
->querybuf
)) goto again
;
2659 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2660 redisLog(REDIS_VERBOSE
, "Client protocol error");
2665 /* Bulk read handling. Note that if we are at this point
2666 the client already sent a command terminated with a newline,
2667 we are reading the bulk data that is actually the last
2668 argument of the command. */
2669 int qbl
= sdslen(c
->querybuf
);
2671 if (c
->bulklen
<= qbl
) {
2672 /* Copy everything but the final CRLF as final argument */
2673 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2675 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2676 /* Process the command. If the client is still valid after
2677 * the processing and there is more data in the buffer
2678 * try to parse it. */
2679 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2685 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2686 redisClient
*c
= (redisClient
*) privdata
;
2687 char buf
[REDIS_IOBUF_LEN
];
2690 REDIS_NOTUSED(mask
);
2692 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2694 if (errno
== EAGAIN
) {
2697 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2701 } else if (nread
== 0) {
2702 redisLog(REDIS_VERBOSE
, "Client closed connection");
2707 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2708 c
->lastinteraction
= time(NULL
);
2712 processInputBuffer(c
);
2715 static int selectDb(redisClient
*c
, int id
) {
2716 if (id
< 0 || id
>= server
.dbnum
)
2718 c
->db
= &server
.db
[id
];
2722 static void *dupClientReplyValue(void *o
) {
2723 incrRefCount((robj
*)o
);
2727 static int listMatchObjects(void *a
, void *b
) {
2728 return equalStringObjects(a
,b
);
2731 static redisClient
*createClient(int fd
) {
2732 redisClient
*c
= zmalloc(sizeof(*c
));
2734 anetNonBlock(NULL
,fd
);
2735 anetTcpNoDelay(NULL
,fd
);
2736 if (!c
) return NULL
;
2739 c
->querybuf
= sdsempty();
2748 c
->lastinteraction
= time(NULL
);
2749 c
->authenticated
= 0;
2750 c
->replstate
= REDIS_REPL_NONE
;
2751 c
->reply
= listCreate();
2752 listSetFreeMethod(c
->reply
,decrRefCount
);
2753 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2754 c
->blocking_keys
= NULL
;
2755 c
->blocking_keys_num
= 0;
2756 c
->io_keys
= listCreate();
2757 c
->watched_keys
= listCreate();
2758 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2759 c
->pubsub_channels
= dictCreate(&setDictType
,NULL
);
2760 c
->pubsub_patterns
= listCreate();
2761 listSetFreeMethod(c
->pubsub_patterns
,decrRefCount
);
2762 listSetMatchMethod(c
->pubsub_patterns
,listMatchObjects
);
2763 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2764 readQueryFromClient
, c
) == AE_ERR
) {
2768 listAddNodeTail(server
.clients
,c
);
2769 initClientMultiState(c
);
2773 static void addReply(redisClient
*c
, robj
*obj
) {
2774 if (listLength(c
->reply
) == 0 &&
2775 (c
->replstate
== REDIS_REPL_NONE
||
2776 c
->replstate
== REDIS_REPL_ONLINE
) &&
2777 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2778 sendReplyToClient
, c
) == AE_ERR
) return;
2780 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2781 obj
= dupStringObject(obj
);
2782 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2784 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2787 static void addReplySds(redisClient
*c
, sds s
) {
2788 robj
*o
= createObject(REDIS_STRING
,s
);
2793 static void addReplyDouble(redisClient
*c
, double d
) {
2796 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2797 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2798 (unsigned long) strlen(buf
),buf
));
2801 static void addReplyLongLong(redisClient
*c
, long long ll
) {
2806 addReply(c
,shared
.czero
);
2808 } else if (ll
== 1) {
2809 addReply(c
,shared
.cone
);
2813 len
= ll2string(buf
+1,sizeof(buf
)-1,ll
);
2816 addReplySds(c
,sdsnewlen(buf
,len
+3));
2819 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2824 addReply(c
,shared
.czero
);
2826 } else if (ul
== 1) {
2827 addReply(c
,shared
.cone
);
2830 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2831 addReplySds(c
,sdsnewlen(buf
,len
));
2834 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2838 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2839 len
= sdslen(obj
->ptr
);
2841 long n
= (long)obj
->ptr
;
2843 /* Compute how many bytes will take this integer as a radix 10 string */
2849 while((n
= n
/10) != 0) {
2854 intlen
= ll2string(buf
+1,sizeof(buf
)-1,(long long)len
);
2855 buf
[intlen
+1] = '\r';
2856 buf
[intlen
+2] = '\n';
2857 addReplySds(c
,sdsnewlen(buf
,intlen
+3));
2860 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2861 addReplyBulkLen(c
,obj
);
2863 addReply(c
,shared
.crlf
);
2866 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2867 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2869 addReply(c
,shared
.nullbulk
);
2871 robj
*o
= createStringObject(s
,strlen(s
));
2877 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2882 REDIS_NOTUSED(mask
);
2883 REDIS_NOTUSED(privdata
);
2885 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2886 if (cfd
== AE_ERR
) {
2887 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2890 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2891 if ((c
= createClient(cfd
)) == NULL
) {
2892 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2893 close(cfd
); /* May be already closed, just ingore errors */
2896 /* If maxclient directive is set and this is one client more... close the
2897 * connection. Note that we create the client instead to check before
2898 * for this condition, since now the socket is already set in nonblocking
2899 * mode and we can send an error for free using the Kernel I/O */
2900 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2901 char *err
= "-ERR max number of clients reached\r\n";
2903 /* That's a best effort error message, don't check write errors */
2904 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2905 /* Nothing to do, Just to avoid the warning... */
2910 server
.stat_numconnections
++;
2913 /* ======================= Redis objects implementation ===================== */
2915 static robj
*createObject(int type
, void *ptr
) {
2918 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2919 if (listLength(server
.objfreelist
)) {
2920 listNode
*head
= listFirst(server
.objfreelist
);
2921 o
= listNodeValue(head
);
2922 listDelNode(server
.objfreelist
,head
);
2923 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2925 if (server
.vm_enabled
) {
2926 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2927 o
= zmalloc(sizeof(*o
));
2929 o
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
));
2933 o
->encoding
= REDIS_ENCODING_RAW
;
2936 if (server
.vm_enabled
) {
2937 /* Note that this code may run in the context of an I/O thread
2938 * and accessing to server.unixtime in theory is an error
2939 * (no locks). But in practice this is safe, and even if we read
2940 * garbage Redis will not fail, as it's just a statistical info */
2941 o
->vm
.atime
= server
.unixtime
;
2942 o
->storage
= REDIS_VM_MEMORY
;
2947 static robj
*createStringObject(char *ptr
, size_t len
) {
2948 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
2951 static robj
*createStringObjectFromLongLong(long long value
) {
2953 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
2954 incrRefCount(shared
.integers
[value
]);
2955 o
= shared
.integers
[value
];
2957 if (value
>= LONG_MIN
&& value
<= LONG_MAX
) {
2958 o
= createObject(REDIS_STRING
, NULL
);
2959 o
->encoding
= REDIS_ENCODING_INT
;
2960 o
->ptr
= (void*)((long)value
);
2962 o
= createObject(REDIS_STRING
,sdsfromlonglong(value
));
2968 static robj
*dupStringObject(robj
*o
) {
2969 assert(o
->encoding
== REDIS_ENCODING_RAW
);
2970 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
2973 static robj
*createListObject(void) {
2974 list
*l
= listCreate();
2976 listSetFreeMethod(l
,decrRefCount
);
2977 return createObject(REDIS_LIST
,l
);
2980 static robj
*createSetObject(void) {
2981 dict
*d
= dictCreate(&setDictType
,NULL
);
2982 return createObject(REDIS_SET
,d
);
2985 static robj
*createHashObject(void) {
2986 /* All the Hashes start as zipmaps. Will be automatically converted
2987 * into hash tables if there are enough elements or big elements
2989 unsigned char *zm
= zipmapNew();
2990 robj
*o
= createObject(REDIS_HASH
,zm
);
2991 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
2995 static robj
*createZsetObject(void) {
2996 zset
*zs
= zmalloc(sizeof(*zs
));
2998 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
2999 zs
->zsl
= zslCreate();
3000 return createObject(REDIS_ZSET
,zs
);
3003 static void freeStringObject(robj
*o
) {
3004 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3009 static void freeListObject(robj
*o
) {
3010 listRelease((list
*) o
->ptr
);
3013 static void freeSetObject(robj
*o
) {
3014 dictRelease((dict
*) o
->ptr
);
3017 static void freeZsetObject(robj
*o
) {
3020 dictRelease(zs
->dict
);
3025 static void freeHashObject(robj
*o
) {
3026 switch (o
->encoding
) {
3027 case REDIS_ENCODING_HT
:
3028 dictRelease((dict
*) o
->ptr
);
3030 case REDIS_ENCODING_ZIPMAP
:
3034 redisPanic("Unknown hash encoding type");
3039 static void incrRefCount(robj
*o
) {
3043 static void decrRefCount(void *obj
) {
3046 if (o
->refcount
<= 0) redisPanic("decrRefCount against refcount <= 0");
3047 /* Object is a key of a swapped out value, or in the process of being
3049 if (server
.vm_enabled
&&
3050 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
3052 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
);
3053 redisAssert(o
->type
== REDIS_STRING
);
3054 freeStringObject(o
);
3055 vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
);
3056 pthread_mutex_lock(&server
.obj_freelist_mutex
);
3057 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
3058 !listAddNodeHead(server
.objfreelist
,o
))
3060 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
3061 server
.vm_stats_swapped_objects
--;
3064 /* Object is in memory, or in the process of being swapped out. */
3065 if (--(o
->refcount
) == 0) {
3066 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
3067 vmCancelThreadedIOJob(obj
);
3069 case REDIS_STRING
: freeStringObject(o
); break;
3070 case REDIS_LIST
: freeListObject(o
); break;
3071 case REDIS_SET
: freeSetObject(o
); break;
3072 case REDIS_ZSET
: freeZsetObject(o
); break;
3073 case REDIS_HASH
: freeHashObject(o
); break;
3074 default: redisPanic("Unknown object type"); break;
3076 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
3077 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
3078 !listAddNodeHead(server
.objfreelist
,o
))
3080 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
3084 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
3085 dictEntry
*de
= dictFind(db
->dict
,key
);
3087 robj
*key
= dictGetEntryKey(de
);
3088 robj
*val
= dictGetEntryVal(de
);
3090 if (server
.vm_enabled
) {
3091 if (key
->storage
== REDIS_VM_MEMORY
||
3092 key
->storage
== REDIS_VM_SWAPPING
)
3094 /* If we were swapping the object out, stop it, this key
3096 if (key
->storage
== REDIS_VM_SWAPPING
)
3097 vmCancelThreadedIOJob(key
);
3098 /* Update the access time of the key for the aging algorithm. */
3099 key
->vm
.atime
= server
.unixtime
;
3101 int notify
= (key
->storage
== REDIS_VM_LOADING
);
3103 /* Our value was swapped on disk. Bring it at home. */
3104 redisAssert(val
== NULL
);
3105 val
= vmLoadObject(key
);
3106 dictGetEntryVal(de
) = val
;
3108 /* Clients blocked by the VM subsystem may be waiting for
3110 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
3119 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
3120 expireIfNeeded(db
,key
);
3121 return lookupKey(db
,key
);
3124 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
3125 deleteIfVolatile(db
,key
);
3126 touchWatchedKey(db
,key
);
3127 return lookupKey(db
,key
);
3130 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3131 robj
*o
= lookupKeyRead(c
->db
, key
);
3132 if (!o
) addReply(c
,reply
);
3136 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3137 robj
*o
= lookupKeyWrite(c
->db
, key
);
3138 if (!o
) addReply(c
,reply
);
3142 static int checkType(redisClient
*c
, robj
*o
, int type
) {
3143 if (o
->type
!= type
) {
3144 addReply(c
,shared
.wrongtypeerr
);
3150 static int deleteKey(redisDb
*db
, robj
*key
) {
3153 /* We need to protect key from destruction: after the first dictDelete()
3154 * it may happen that 'key' is no longer valid if we don't increment
3155 * it's count. This may happen when we get the object reference directly
3156 * from the hash table with dictRandomKey() or dict iterators */
3158 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
);
3159 retval
= dictDelete(db
->dict
,key
);
3162 return retval
== DICT_OK
;
3165 /* Check if the nul-terminated string 's' can be represented by a long
3166 * (that is, is a number that fits into long without any other space or
3167 * character before or after the digits).
3169 * If so, the function returns REDIS_OK and *longval is set to the value
3170 * of the number. Otherwise REDIS_ERR is returned */
3171 static int isStringRepresentableAsLong(sds s
, long *longval
) {
3172 char buf
[32], *endptr
;
3176 value
= strtol(s
, &endptr
, 10);
3177 if (endptr
[0] != '\0') return REDIS_ERR
;
3178 slen
= ll2string(buf
,32,value
);
3180 /* If the number converted back into a string is not identical
3181 * then it's not possible to encode the string as integer */
3182 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
3183 if (longval
) *longval
= value
;
3187 /* Try to encode a string object in order to save space */
3188 static robj
*tryObjectEncoding(robj
*o
) {
3192 if (o
->encoding
!= REDIS_ENCODING_RAW
)
3193 return o
; /* Already encoded */
3195 /* It's not safe to encode shared objects: shared objects can be shared
3196 * everywhere in the "object space" of Redis. Encoded objects can only
3197 * appear as "values" (and not, for instance, as keys) */
3198 if (o
->refcount
> 1) return o
;
3200 /* Currently we try to encode only strings */
3201 redisAssert(o
->type
== REDIS_STRING
);
3203 /* Check if we can represent this string as a long integer */
3204 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return o
;
3206 /* Ok, this object can be encoded */
3207 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
3209 incrRefCount(shared
.integers
[value
]);
3210 return shared
.integers
[value
];
3212 o
->encoding
= REDIS_ENCODING_INT
;
3214 o
->ptr
= (void*) value
;
3219 /* Get a decoded version of an encoded object (returned as a new object).
3220 * If the object is already raw-encoded just increment the ref count. */
3221 static robj
*getDecodedObject(robj
*o
) {
3224 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3228 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
3231 ll2string(buf
,32,(long)o
->ptr
);
3232 dec
= createStringObject(buf
,strlen(buf
));
3235 redisPanic("Unknown encoding type");
3239 /* Compare two string objects via strcmp() or alike.
3240 * Note that the objects may be integer-encoded. In such a case we
3241 * use ll2string() to get a string representation of the numbers on the stack
3242 * and compare the strings, it's much faster than calling getDecodedObject().
3244 * Important note: if objects are not integer encoded, but binary-safe strings,
3245 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
3247 static int compareStringObjects(robj
*a
, robj
*b
) {
3248 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
3249 char bufa
[128], bufb
[128], *astr
, *bstr
;
3252 if (a
== b
) return 0;
3253 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
3254 ll2string(bufa
,sizeof(bufa
),(long) a
->ptr
);
3260 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
3261 ll2string(bufb
,sizeof(bufb
),(long) b
->ptr
);
3267 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
3270 /* Equal string objects return 1 if the two objects are the same from the
3271 * point of view of a string comparison, otherwise 0 is returned. Note that
3272 * this function is faster then checking for (compareStringObject(a,b) == 0)
3273 * because it can perform some more optimization. */
3274 static int equalStringObjects(robj
*a
, robj
*b
) {
3275 if (a
->encoding
!= REDIS_ENCODING_RAW
&& b
->encoding
!= REDIS_ENCODING_RAW
){
3276 return a
->ptr
== b
->ptr
;
3278 return compareStringObjects(a
,b
) == 0;
3282 static size_t stringObjectLen(robj
*o
) {
3283 redisAssert(o
->type
== REDIS_STRING
);
3284 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3285 return sdslen(o
->ptr
);
3289 return ll2string(buf
,32,(long)o
->ptr
);
3293 static int getDoubleFromObject(robj
*o
, double *target
) {
3300 redisAssert(o
->type
== REDIS_STRING
);
3301 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3302 value
= strtod(o
->ptr
, &eptr
);
3303 if (eptr
[0] != '\0') return REDIS_ERR
;
3304 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3305 value
= (long)o
->ptr
;
3307 redisPanic("Unknown string encoding");
3315 static int getDoubleFromObjectOrReply(redisClient
*c
, robj
*o
, double *target
, const char *msg
) {
3317 if (getDoubleFromObject(o
, &value
) != REDIS_OK
) {
3319 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3321 addReplySds(c
, sdsnew("-ERR value is not a double\r\n"));
3330 static int getLongLongFromObject(robj
*o
, long long *target
) {
3337 redisAssert(o
->type
== REDIS_STRING
);
3338 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3339 value
= strtoll(o
->ptr
, &eptr
, 10);
3340 if (eptr
[0] != '\0') return REDIS_ERR
;
3341 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3342 value
= (long)o
->ptr
;
3344 redisPanic("Unknown string encoding");
3352 static int getLongLongFromObjectOrReply(redisClient
*c
, robj
*o
, long long *target
, const char *msg
) {
3354 if (getLongLongFromObject(o
, &value
) != REDIS_OK
) {
3356 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3358 addReplySds(c
, sdsnew("-ERR value is not an integer\r\n"));
3367 static int getLongFromObjectOrReply(redisClient
*c
, robj
*o
, long *target
, const char *msg
) {
3370 if (getLongLongFromObjectOrReply(c
, o
, &value
, msg
) != REDIS_OK
) return REDIS_ERR
;
3371 if (value
< LONG_MIN
|| value
> LONG_MAX
) {
3373 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3375 addReplySds(c
, sdsnew("-ERR value is out of range\r\n"));
3384 /*============================ RDB saving/loading =========================== */
3386 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3387 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3391 static int rdbSaveTime(FILE *fp
, time_t t
) {
3392 int32_t t32
= (int32_t) t
;
3393 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3397 /* check rdbLoadLen() comments for more info */
3398 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3399 unsigned char buf
[2];
3402 /* Save a 6 bit len */
3403 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3404 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3405 } else if (len
< (1<<14)) {
3406 /* Save a 14 bit len */
3407 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3409 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3411 /* Save a 32 bit len */
3412 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3413 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3415 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3420 /* Encode 'value' as an integer if possible (if integer will fit the
3421 * supported range). If the function sucessful encoded the integer
3422 * then the (up to 5 bytes) encoded representation is written in the
3423 * string pointed by 'enc' and the length is returned. Otherwise
3425 static int rdbEncodeInteger(long long value
, unsigned char *enc
) {
3426 /* Finally check if it fits in our ranges */
3427 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3428 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3429 enc
[1] = value
&0xFF;
3431 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3432 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3433 enc
[1] = value
&0xFF;
3434 enc
[2] = (value
>>8)&0xFF;
3436 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3437 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3438 enc
[1] = value
&0xFF;
3439 enc
[2] = (value
>>8)&0xFF;
3440 enc
[3] = (value
>>16)&0xFF;
3441 enc
[4] = (value
>>24)&0xFF;
3448 /* String objects in the form "2391" "-100" without any space and with a
3449 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3450 * encoded as integers to save space */
3451 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3453 char *endptr
, buf
[32];
3455 /* Check if it's possible to encode this value as a number */
3456 value
= strtoll(s
, &endptr
, 10);
3457 if (endptr
[0] != '\0') return 0;
3458 ll2string(buf
,32,value
);
3460 /* If the number converted back into a string is not identical
3461 * then it's not possible to encode the string as integer */
3462 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3464 return rdbEncodeInteger(value
,enc
);
3467 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3468 size_t comprlen
, outlen
;
3472 /* We require at least four bytes compression for this to be worth it */
3473 if (len
<= 4) return 0;
3475 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3476 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3477 if (comprlen
== 0) {
3481 /* Data compressed! Let's save it on disk */
3482 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3483 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3484 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3485 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3486 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3495 /* Save a string objet as [len][data] on disk. If the object is a string
3496 * representation of an integer value we try to safe it in a special form */
3497 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3500 /* Try integer encoding */
3502 unsigned char buf
[5];
3503 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3504 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3509 /* Try LZF compression - under 20 bytes it's unable to compress even
3510 * aaaaaaaaaaaaaaaaaa so skip it */
3511 if (server
.rdbcompression
&& len
> 20) {
3514 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3515 if (retval
== -1) return -1;
3516 if (retval
> 0) return 0;
3517 /* retval == 0 means data can't be compressed, save the old way */
3520 /* Store verbatim */
3521 if (rdbSaveLen(fp
,len
) == -1) return -1;
3522 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3526 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3527 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3530 /* Avoid to decode the object, then encode it again, if the
3531 * object is alrady integer encoded. */
3532 if (obj
->encoding
== REDIS_ENCODING_INT
) {
3533 long val
= (long) obj
->ptr
;
3534 unsigned char buf
[5];
3537 if ((enclen
= rdbEncodeInteger(val
,buf
)) > 0) {
3538 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3541 /* otherwise... fall throught and continue with the usual
3545 /* Avoid incr/decr ref count business when possible.
3546 * This plays well with copy-on-write given that we are probably
3547 * in a child process (BGSAVE). Also this makes sure key objects
3548 * of swapped objects are not incRefCount-ed (an assert does not allow
3549 * this in order to avoid bugs) */
3550 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
3551 obj
= getDecodedObject(obj
);
3552 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3555 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3560 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3561 * 8 bit integer specifing the length of the representation.
3562 * This 8 bit integer has special values in order to specify the following
3568 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3569 unsigned char buf
[128];
3575 } else if (!isfinite(val
)) {
3577 buf
[0] = (val
< 0) ? 255 : 254;
3579 #if (DBL_MANT_DIG >= 52) && (LLONG_MAX == 0x7fffffffffffffffLL)
3580 /* Check if the float is in a safe range to be casted into a
3581 * long long. We are assuming that long long is 64 bit here.
3582 * Also we are assuming that there are no implementations around where
3583 * double has precision < 52 bit.
3585 * Under this assumptions we test if a double is inside an interval
3586 * where casting to long long is safe. Then using two castings we
3587 * make sure the decimal part is zero. If all this is true we use
3588 * integer printing function that is much faster. */
3589 double min
= -4503599627370495; /* (2^52)-1 */
3590 double max
= 4503599627370496; /* -(2^52) */
3591 if (val
> min
&& val
< max
&& val
== ((double)((long long)val
)))
3592 ll2string((char*)buf
+1,sizeof(buf
),(long long)val
);
3595 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3596 buf
[0] = strlen((char*)buf
+1);
3599 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3603 /* Save a Redis object. */
3604 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3605 if (o
->type
== REDIS_STRING
) {
3606 /* Save a string value */
3607 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3608 } else if (o
->type
== REDIS_LIST
) {
3609 /* Save a list value */
3610 list
*list
= o
->ptr
;
3614 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3615 listRewind(list
,&li
);
3616 while((ln
= listNext(&li
))) {
3617 robj
*eleobj
= listNodeValue(ln
);
3619 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3621 } else if (o
->type
== REDIS_SET
) {
3622 /* Save a set value */
3624 dictIterator
*di
= dictGetIterator(set
);
3627 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3628 while((de
= dictNext(di
)) != NULL
) {
3629 robj
*eleobj
= dictGetEntryKey(de
);
3631 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3633 dictReleaseIterator(di
);
3634 } else if (o
->type
== REDIS_ZSET
) {
3635 /* Save a set value */
3637 dictIterator
*di
= dictGetIterator(zs
->dict
);
3640 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3641 while((de
= dictNext(di
)) != NULL
) {
3642 robj
*eleobj
= dictGetEntryKey(de
);
3643 double *score
= dictGetEntryVal(de
);
3645 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3646 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3648 dictReleaseIterator(di
);
3649 } else if (o
->type
== REDIS_HASH
) {
3650 /* Save a hash value */
3651 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3652 unsigned char *p
= zipmapRewind(o
->ptr
);
3653 unsigned int count
= zipmapLen(o
->ptr
);
3654 unsigned char *key
, *val
;
3655 unsigned int klen
, vlen
;
3657 if (rdbSaveLen(fp
,count
) == -1) return -1;
3658 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3659 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3660 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3663 dictIterator
*di
= dictGetIterator(o
->ptr
);
3666 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3667 while((de
= dictNext(di
)) != NULL
) {
3668 robj
*key
= dictGetEntryKey(de
);
3669 robj
*val
= dictGetEntryVal(de
);
3671 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3672 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3674 dictReleaseIterator(di
);
3677 redisPanic("Unknown object type");
3682 /* Return the length the object will have on disk if saved with
3683 * the rdbSaveObject() function. Currently we use a trick to get
3684 * this length with very little changes to the code. In the future
3685 * we could switch to a faster solution. */
3686 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3687 if (fp
== NULL
) fp
= server
.devnull
;
3689 assert(rdbSaveObject(fp
,o
) != 1);
3693 /* Return the number of pages required to save this object in the swap file */
3694 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3695 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3697 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3700 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3701 static int rdbSave(char *filename
) {
3702 dictIterator
*di
= NULL
;
3707 time_t now
= time(NULL
);
3709 /* Wait for I/O therads to terminate, just in case this is a
3710 * foreground-saving, to avoid seeking the swap file descriptor at the
3712 if (server
.vm_enabled
)
3713 waitEmptyIOJobsQueue();
3715 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3716 fp
= fopen(tmpfile
,"w");
3718 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3721 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3722 for (j
= 0; j
< server
.dbnum
; j
++) {
3723 redisDb
*db
= server
.db
+j
;
3725 if (dictSize(d
) == 0) continue;
3726 di
= dictGetIterator(d
);
3732 /* Write the SELECT DB opcode */
3733 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3734 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3736 /* Iterate this DB writing every entry */
3737 while((de
= dictNext(di
)) != NULL
) {
3738 robj
*key
= dictGetEntryKey(de
);
3739 robj
*o
= dictGetEntryVal(de
);
3740 time_t expiretime
= getExpire(db
,key
);
3742 /* Save the expire time */
3743 if (expiretime
!= -1) {
3744 /* If this key is already expired skip it */
3745 if (expiretime
< now
) continue;
3746 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3747 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3749 /* Save the key and associated value. This requires special
3750 * handling if the value is swapped out. */
3751 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
3752 key
->storage
== REDIS_VM_SWAPPING
) {
3753 /* Save type, key, value */
3754 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3755 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3756 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3758 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3760 /* Get a preview of the object in memory */
3761 po
= vmPreviewObject(key
);
3762 /* Save type, key, value */
3763 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
;
3764 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3765 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3766 /* Remove the loaded object from memory */
3770 dictReleaseIterator(di
);
3773 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3775 /* Make sure data will not remain on the OS's output buffers */
3780 /* Use RENAME to make sure the DB file is changed atomically only
3781 * if the generate DB file is ok. */
3782 if (rename(tmpfile
,filename
) == -1) {
3783 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3787 redisLog(REDIS_NOTICE
,"DB saved on disk");
3789 server
.lastsave
= time(NULL
);
3795 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3796 if (di
) dictReleaseIterator(di
);
3800 static int rdbSaveBackground(char *filename
) {
3803 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3804 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3805 if ((childpid
= fork()) == 0) {
3807 if (server
.vm_enabled
) vmReopenSwapFile();
3809 if (rdbSave(filename
) == REDIS_OK
) {
3816 if (childpid
== -1) {
3817 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3821 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3822 server
.bgsavechildpid
= childpid
;
3823 updateDictResizePolicy();
3826 return REDIS_OK
; /* unreached */
3829 static void rdbRemoveTempFile(pid_t childpid
) {
3832 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
3836 static int rdbLoadType(FILE *fp
) {
3838 if (fread(&type
,1,1,fp
) == 0) return -1;
3842 static time_t rdbLoadTime(FILE *fp
) {
3844 if (fread(&t32
,4,1,fp
) == 0) return -1;
3845 return (time_t) t32
;
3848 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3849 * of this file for a description of how this are stored on disk.
3851 * isencoded is set to 1 if the readed length is not actually a length but
3852 * an "encoding type", check the above comments for more info */
3853 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
3854 unsigned char buf
[2];
3858 if (isencoded
) *isencoded
= 0;
3859 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3860 type
= (buf
[0]&0xC0)>>6;
3861 if (type
== REDIS_RDB_6BITLEN
) {
3862 /* Read a 6 bit len */
3864 } else if (type
== REDIS_RDB_ENCVAL
) {
3865 /* Read a 6 bit len encoding type */
3866 if (isencoded
) *isencoded
= 1;
3868 } else if (type
== REDIS_RDB_14BITLEN
) {
3869 /* Read a 14 bit len */
3870 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3871 return ((buf
[0]&0x3F)<<8)|buf
[1];
3873 /* Read a 32 bit len */
3874 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
3879 /* Load an integer-encoded object from file 'fp', with the specified
3880 * encoding type 'enctype'. If encode is true the function may return
3881 * an integer-encoded object as reply, otherwise the returned object
3882 * will always be encoded as a raw string. */
3883 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
, int encode
) {
3884 unsigned char enc
[4];
3887 if (enctype
== REDIS_RDB_ENC_INT8
) {
3888 if (fread(enc
,1,1,fp
) == 0) return NULL
;
3889 val
= (signed char)enc
[0];
3890 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
3892 if (fread(enc
,2,1,fp
) == 0) return NULL
;
3893 v
= enc
[0]|(enc
[1]<<8);
3895 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
3897 if (fread(enc
,4,1,fp
) == 0) return NULL
;
3898 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
3901 val
= 0; /* anti-warning */
3902 redisPanic("Unknown RDB integer encoding type");
3905 return createStringObjectFromLongLong(val
);
3907 return createObject(REDIS_STRING
,sdsfromlonglong(val
));
3910 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
3911 unsigned int len
, clen
;
3912 unsigned char *c
= NULL
;
3915 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3916 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3917 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
3918 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
3919 if (fread(c
,clen
,1,fp
) == 0) goto err
;
3920 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
3922 return createObject(REDIS_STRING
,val
);
3929 static robj
*rdbGenericLoadStringObject(FILE*fp
, int encode
) {
3934 len
= rdbLoadLen(fp
,&isencoded
);
3937 case REDIS_RDB_ENC_INT8
:
3938 case REDIS_RDB_ENC_INT16
:
3939 case REDIS_RDB_ENC_INT32
:
3940 return rdbLoadIntegerObject(fp
,len
,encode
);
3941 case REDIS_RDB_ENC_LZF
:
3942 return rdbLoadLzfStringObject(fp
);
3944 redisPanic("Unknown RDB encoding type");
3948 if (len
== REDIS_RDB_LENERR
) return NULL
;
3949 val
= sdsnewlen(NULL
,len
);
3950 if (len
&& fread(val
,len
,1,fp
) == 0) {
3954 return createObject(REDIS_STRING
,val
);
3957 static robj
*rdbLoadStringObject(FILE *fp
) {
3958 return rdbGenericLoadStringObject(fp
,0);
3961 static robj
*rdbLoadEncodedStringObject(FILE *fp
) {
3962 return rdbGenericLoadStringObject(fp
,1);
3965 /* For information about double serialization check rdbSaveDoubleValue() */
3966 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
3970 if (fread(&len
,1,1,fp
) == 0) return -1;
3972 case 255: *val
= R_NegInf
; return 0;
3973 case 254: *val
= R_PosInf
; return 0;
3974 case 253: *val
= R_Nan
; return 0;
3976 if (fread(buf
,len
,1,fp
) == 0) return -1;
3978 sscanf(buf
, "%lg", val
);
3983 /* Load a Redis object of the specified type from the specified file.
3984 * On success a newly allocated object is returned, otherwise NULL. */
3985 static robj
*rdbLoadObject(int type
, FILE *fp
) {
3988 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
3989 if (type
== REDIS_STRING
) {
3990 /* Read string value */
3991 if ((o
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
3992 o
= tryObjectEncoding(o
);
3993 } else if (type
== REDIS_LIST
|| type
== REDIS_SET
) {
3994 /* Read list/set value */
3997 if ((listlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3998 o
= (type
== REDIS_LIST
) ? createListObject() : createSetObject();
3999 /* It's faster to expand the dict to the right size asap in order
4000 * to avoid rehashing */
4001 if (type
== REDIS_SET
&& listlen
> DICT_HT_INITIAL_SIZE
)
4002 dictExpand(o
->ptr
,listlen
);
4003 /* Load every single element of the list/set */
4007 if ((ele
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4008 ele
= tryObjectEncoding(ele
);
4009 if (type
== REDIS_LIST
) {
4010 listAddNodeTail((list
*)o
->ptr
,ele
);
4012 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
4015 } else if (type
== REDIS_ZSET
) {
4016 /* Read list/set value */
4020 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4021 o
= createZsetObject();
4023 /* Load every single element of the list/set */
4026 double *score
= zmalloc(sizeof(double));
4028 if ((ele
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4029 ele
= tryObjectEncoding(ele
);
4030 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
4031 dictAdd(zs
->dict
,ele
,score
);
4032 zslInsert(zs
->zsl
,*score
,ele
);
4033 incrRefCount(ele
); /* added to skiplist */
4035 } else if (type
== REDIS_HASH
) {
4038 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4039 o
= createHashObject();
4040 /* Too many entries? Use an hash table. */
4041 if (hashlen
> server
.hash_max_zipmap_entries
)
4042 convertToRealHash(o
);
4043 /* Load every key/value, then set it into the zipmap or hash
4044 * table, as needed. */
4048 if ((key
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
4049 if ((val
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
4050 /* If we are using a zipmap and there are too big values
4051 * the object is converted to real hash table encoding. */
4052 if (o
->encoding
!= REDIS_ENCODING_HT
&&
4053 (sdslen(key
->ptr
) > server
.hash_max_zipmap_value
||
4054 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
))
4056 convertToRealHash(o
);
4059 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
4060 unsigned char *zm
= o
->ptr
;
4062 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
4063 val
->ptr
,sdslen(val
->ptr
),NULL
);
4068 key
= tryObjectEncoding(key
);
4069 val
= tryObjectEncoding(val
);
4070 dictAdd((dict
*)o
->ptr
,key
,val
);
4074 redisPanic("Unknown object type");
4079 static int rdbLoad(char *filename
) {
4082 int type
, retval
, rdbver
;
4083 int swap_all_values
= 0;
4084 dict
*d
= server
.db
[0].dict
;
4085 redisDb
*db
= server
.db
+0;
4087 time_t expiretime
, now
= time(NULL
);
4088 long long loadedkeys
= 0;
4090 fp
= fopen(filename
,"r");
4091 if (!fp
) return REDIS_ERR
;
4092 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
4094 if (memcmp(buf
,"REDIS",5) != 0) {
4096 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
4099 rdbver
= atoi(buf
+5);
4102 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
4110 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
4111 if (type
== REDIS_EXPIRETIME
) {
4112 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
4113 /* We read the time so we need to read the object type again */
4114 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
4116 if (type
== REDIS_EOF
) break;
4117 /* Handle SELECT DB opcode as a special case */
4118 if (type
== REDIS_SELECTDB
) {
4119 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
4121 if (dbid
>= (unsigned)server
.dbnum
) {
4122 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
4125 db
= server
.db
+dbid
;
4130 if ((key
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
4132 if ((val
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
4133 /* Check if the key already expired */
4134 if (expiretime
!= -1 && expiretime
< now
) {
4139 /* Add the new object in the hash table */
4140 retval
= dictAdd(d
,key
,val
);
4141 if (retval
== DICT_ERR
) {
4142 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", key
->ptr
);
4146 /* Set the expire time if needed */
4147 if (expiretime
!= -1) setExpire(db
,key
,expiretime
);
4149 /* Handle swapping while loading big datasets when VM is on */
4151 /* If we detecter we are hopeless about fitting something in memory
4152 * we just swap every new key on disk. Directly...
4153 * Note that's important to check for this condition before resorting
4154 * to random sampling, otherwise we may try to swap already
4156 if (swap_all_values
) {
4157 dictEntry
*de
= dictFind(d
,key
);
4159 /* de may be NULL since the key already expired */
4161 key
= dictGetEntryKey(de
);
4162 val
= dictGetEntryVal(de
);
4164 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
4165 dictGetEntryVal(de
) = NULL
;
4171 /* If we have still some hope of having some value fitting memory
4172 * then we try random sampling. */
4173 if (!swap_all_values
&& server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
4174 while (zmalloc_used_memory() > server
.vm_max_memory
) {
4175 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
4177 if (zmalloc_used_memory() > server
.vm_max_memory
)
4178 swap_all_values
= 1; /* We are already using too much mem */
4184 eoferr
: /* unexpected end of file is handled here with a fatal exit */
4185 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
4187 return REDIS_ERR
; /* Just to avoid warning */
4190 /*================================== Shutdown =============================== */
4191 static int prepareForShutdown() {
4192 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4193 /* Kill the saving child if there is a background saving in progress.
4194 We want to avoid race conditions, for instance our saving child may
4195 overwrite the synchronous saving did by SHUTDOWN. */
4196 if (server
.bgsavechildpid
!= -1) {
4197 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4198 kill(server
.bgsavechildpid
,SIGKILL
);
4199 rdbRemoveTempFile(server
.bgsavechildpid
);
4201 if (server
.appendonly
) {
4202 /* Append only file: fsync() the AOF and exit */
4203 aof_fsync(server
.appendfd
);
4204 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4206 /* Snapshotting. Perform a SYNC SAVE and exit */
4207 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4208 if (server
.daemonize
)
4209 unlink(server
.pidfile
);
4210 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4212 /* Ooops.. error saving! The best we can do is to continue
4213 * operating. Note that if there was a background saving process,
4214 * in the next cron() Redis will be notified that the background
4215 * saving aborted, handling special stuff like slaves pending for
4216 * synchronization... */
4217 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4221 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4225 /*================================== Commands =============================== */
4227 static void authCommand(redisClient
*c
) {
4228 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
4229 c
->authenticated
= 1;
4230 addReply(c
,shared
.ok
);
4232 c
->authenticated
= 0;
4233 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
4237 static void pingCommand(redisClient
*c
) {
4238 addReply(c
,shared
.pong
);
4241 static void echoCommand(redisClient
*c
) {
4242 addReplyBulk(c
,c
->argv
[1]);
4245 /*=================================== Strings =============================== */
4247 static void setGenericCommand(redisClient
*c
, int nx
, robj
*key
, robj
*val
, robj
*expire
) {
4249 long seconds
= 0; /* initialized to avoid an harmness warning */
4252 if (getLongFromObjectOrReply(c
, expire
, &seconds
, NULL
) != REDIS_OK
)
4255 addReplySds(c
,sdsnew("-ERR invalid expire time in SETEX\r\n"));
4260 touchWatchedKey(c
->db
,key
);
4261 if (nx
) deleteIfVolatile(c
->db
,key
);
4262 retval
= dictAdd(c
->db
->dict
,key
,val
);
4263 if (retval
== DICT_ERR
) {
4265 /* If the key is about a swapped value, we want a new key object
4266 * to overwrite the old. So we delete the old key in the database.
4267 * This will also make sure that swap pages about the old object
4268 * will be marked as free. */
4269 if (server
.vm_enabled
&& deleteIfSwapped(c
->db
,key
))
4271 dictReplace(c
->db
->dict
,key
,val
);
4274 addReply(c
,shared
.czero
);
4282 removeExpire(c
->db
,key
);
4283 if (expire
) setExpire(c
->db
,key
,time(NULL
)+seconds
);
4284 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4287 static void setCommand(redisClient
*c
) {
4288 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[2],NULL
);
4291 static void setnxCommand(redisClient
*c
) {
4292 setGenericCommand(c
,1,c
->argv
[1],c
->argv
[2],NULL
);
4295 static void setexCommand(redisClient
*c
) {
4296 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[3],c
->argv
[2]);
4299 static int getGenericCommand(redisClient
*c
) {
4302 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
4305 if (o
->type
!= REDIS_STRING
) {
4306 addReply(c
,shared
.wrongtypeerr
);
4314 static void getCommand(redisClient
*c
) {
4315 getGenericCommand(c
);
4318 static void getsetCommand(redisClient
*c
) {
4319 if (getGenericCommand(c
) == REDIS_ERR
) return;
4320 if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) {
4321 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4323 incrRefCount(c
->argv
[1]);
4325 incrRefCount(c
->argv
[2]);
4327 removeExpire(c
->db
,c
->argv
[1]);
4330 static void mgetCommand(redisClient
*c
) {
4333 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
4334 for (j
= 1; j
< c
->argc
; j
++) {
4335 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
4337 addReply(c
,shared
.nullbulk
);
4339 if (o
->type
!= REDIS_STRING
) {
4340 addReply(c
,shared
.nullbulk
);
4348 static void msetGenericCommand(redisClient
*c
, int nx
) {
4349 int j
, busykeys
= 0;
4351 if ((c
->argc
% 2) == 0) {
4352 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
4355 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
4356 * set nothing at all if at least one already key exists. */
4358 for (j
= 1; j
< c
->argc
; j
+= 2) {
4359 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
4365 addReply(c
, shared
.czero
);
4369 for (j
= 1; j
< c
->argc
; j
+= 2) {
4372 c
->argv
[j
+1] = tryObjectEncoding(c
->argv
[j
+1]);
4373 retval
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
4374 if (retval
== DICT_ERR
) {
4375 dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
4376 incrRefCount(c
->argv
[j
+1]);
4378 incrRefCount(c
->argv
[j
]);
4379 incrRefCount(c
->argv
[j
+1]);
4381 removeExpire(c
->db
,c
->argv
[j
]);
4383 server
.dirty
+= (c
->argc
-1)/2;
4384 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4387 static void msetCommand(redisClient
*c
) {
4388 msetGenericCommand(c
,0);
4391 static void msetnxCommand(redisClient
*c
) {
4392 msetGenericCommand(c
,1);
4395 static void incrDecrCommand(redisClient
*c
, long long incr
) {
4400 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4401 if (o
!= NULL
&& checkType(c
,o
,REDIS_STRING
)) return;
4402 if (getLongLongFromObjectOrReply(c
,o
,&value
,NULL
) != REDIS_OK
) return;
4405 o
= createStringObjectFromLongLong(value
);
4406 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],o
);
4407 if (retval
== DICT_ERR
) {
4408 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4409 removeExpire(c
->db
,c
->argv
[1]);
4411 incrRefCount(c
->argv
[1]);
4414 addReply(c
,shared
.colon
);
4416 addReply(c
,shared
.crlf
);
4419 static void incrCommand(redisClient
*c
) {
4420 incrDecrCommand(c
,1);
4423 static void decrCommand(redisClient
*c
) {
4424 incrDecrCommand(c
,-1);
4427 static void incrbyCommand(redisClient
*c
) {
4430 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4431 incrDecrCommand(c
,incr
);
4434 static void decrbyCommand(redisClient
*c
) {
4437 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4438 incrDecrCommand(c
,-incr
);
4441 static void appendCommand(redisClient
*c
) {
4446 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4448 /* Create the key */
4449 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4450 incrRefCount(c
->argv
[1]);
4451 incrRefCount(c
->argv
[2]);
4452 totlen
= stringObjectLen(c
->argv
[2]);
4456 de
= dictFind(c
->db
->dict
,c
->argv
[1]);
4459 o
= dictGetEntryVal(de
);
4460 if (o
->type
!= REDIS_STRING
) {
4461 addReply(c
,shared
.wrongtypeerr
);
4464 /* If the object is specially encoded or shared we have to make
4466 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
4467 robj
*decoded
= getDecodedObject(o
);
4469 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
4470 decrRefCount(decoded
);
4471 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4474 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
4475 o
->ptr
= sdscatlen(o
->ptr
,
4476 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
4478 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
4479 (unsigned long) c
->argv
[2]->ptr
);
4481 totlen
= sdslen(o
->ptr
);
4484 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
4487 static void substrCommand(redisClient
*c
) {
4489 long start
= atoi(c
->argv
[2]->ptr
);
4490 long end
= atoi(c
->argv
[3]->ptr
);
4491 size_t rangelen
, strlen
;
4494 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4495 checkType(c
,o
,REDIS_STRING
)) return;
4497 o
= getDecodedObject(o
);
4498 strlen
= sdslen(o
->ptr
);
4500 /* convert negative indexes */
4501 if (start
< 0) start
= strlen
+start
;
4502 if (end
< 0) end
= strlen
+end
;
4503 if (start
< 0) start
= 0;
4504 if (end
< 0) end
= 0;
4506 /* indexes sanity checks */
4507 if (start
> end
|| (size_t)start
>= strlen
) {
4508 /* Out of range start or start > end result in null reply */
4509 addReply(c
,shared
.nullbulk
);
4513 if ((size_t)end
>= strlen
) end
= strlen
-1;
4514 rangelen
= (end
-start
)+1;
4516 /* Return the result */
4517 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4518 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4519 addReplySds(c
,range
);
4520 addReply(c
,shared
.crlf
);
4524 /* ========================= Type agnostic commands ========================= */
4526 static void delCommand(redisClient
*c
) {
4529 for (j
= 1; j
< c
->argc
; j
++) {
4530 if (deleteKey(c
->db
,c
->argv
[j
])) {
4531 touchWatchedKey(c
->db
,c
->argv
[j
]);
4536 addReplyLongLong(c
,deleted
);
4539 static void existsCommand(redisClient
*c
) {
4540 expireIfNeeded(c
->db
,c
->argv
[1]);
4541 if (dictFind(c
->db
->dict
,c
->argv
[1])) {
4542 addReply(c
, shared
.cone
);
4544 addReply(c
, shared
.czero
);
4548 static void selectCommand(redisClient
*c
) {
4549 int id
= atoi(c
->argv
[1]->ptr
);
4551 if (selectDb(c
,id
) == REDIS_ERR
) {
4552 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4554 addReply(c
,shared
.ok
);
4558 static void randomkeyCommand(redisClient
*c
) {
4563 de
= dictGetRandomKey(c
->db
->dict
);
4564 if (!de
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break;
4568 addReply(c
,shared
.nullbulk
);
4572 key
= dictGetEntryKey(de
);
4573 if (server
.vm_enabled
) {
4574 key
= dupStringObject(key
);
4575 addReplyBulk(c
,key
);
4578 addReplyBulk(c
,key
);
4582 static void keysCommand(redisClient
*c
) {
4585 sds pattern
= c
->argv
[1]->ptr
;
4586 int plen
= sdslen(pattern
);
4587 unsigned long numkeys
= 0;
4588 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4590 di
= dictGetIterator(c
->db
->dict
);
4592 decrRefCount(lenobj
);
4593 while((de
= dictNext(di
)) != NULL
) {
4594 robj
*keyobj
= dictGetEntryKey(de
);
4596 sds key
= keyobj
->ptr
;
4597 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4598 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4599 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4600 addReplyBulk(c
,keyobj
);
4605 dictReleaseIterator(di
);
4606 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4609 static void dbsizeCommand(redisClient
*c
) {
4611 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4614 static void lastsaveCommand(redisClient
*c
) {
4616 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4619 static void typeCommand(redisClient
*c
) {
4623 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4628 case REDIS_STRING
: type
= "+string"; break;
4629 case REDIS_LIST
: type
= "+list"; break;
4630 case REDIS_SET
: type
= "+set"; break;
4631 case REDIS_ZSET
: type
= "+zset"; break;
4632 case REDIS_HASH
: type
= "+hash"; break;
4633 default: type
= "+unknown"; break;
4636 addReplySds(c
,sdsnew(type
));
4637 addReply(c
,shared
.crlf
);
4640 static void saveCommand(redisClient
*c
) {
4641 if (server
.bgsavechildpid
!= -1) {
4642 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4645 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4646 addReply(c
,shared
.ok
);
4648 addReply(c
,shared
.err
);
4652 static void bgsaveCommand(redisClient
*c
) {
4653 if (server
.bgsavechildpid
!= -1) {
4654 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4657 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4658 char *status
= "+Background saving started\r\n";
4659 addReplySds(c
,sdsnew(status
));
4661 addReply(c
,shared
.err
);
4665 static void shutdownCommand(redisClient
*c
) {
4666 if (prepareForShutdown() == REDIS_OK
)
4668 addReplySds(c
, sdsnew("-ERR Errors trying to SHUTDOWN. Check logs.\r\n"));
4671 static void renameGenericCommand(redisClient
*c
, int nx
) {
4674 /* To use the same key as src and dst is probably an error */
4675 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4676 addReply(c
,shared
.sameobjecterr
);
4680 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4684 deleteIfVolatile(c
->db
,c
->argv
[2]);
4685 if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) {
4688 addReply(c
,shared
.czero
);
4691 dictReplace(c
->db
->dict
,c
->argv
[2],o
);
4693 incrRefCount(c
->argv
[2]);
4695 deleteKey(c
->db
,c
->argv
[1]);
4696 touchWatchedKey(c
->db
,c
->argv
[2]);
4698 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4701 static void renameCommand(redisClient
*c
) {
4702 renameGenericCommand(c
,0);
4705 static void renamenxCommand(redisClient
*c
) {
4706 renameGenericCommand(c
,1);
4709 static void moveCommand(redisClient
*c
) {
4714 /* Obtain source and target DB pointers */
4717 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4718 addReply(c
,shared
.outofrangeerr
);
4722 selectDb(c
,srcid
); /* Back to the source DB */
4724 /* If the user is moving using as target the same
4725 * DB as the source DB it is probably an error. */
4727 addReply(c
,shared
.sameobjecterr
);
4731 /* Check if the element exists and get a reference */
4732 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4734 addReply(c
,shared
.czero
);
4738 /* Try to add the element to the target DB */
4739 deleteIfVolatile(dst
,c
->argv
[1]);
4740 if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) {
4741 addReply(c
,shared
.czero
);
4744 incrRefCount(c
->argv
[1]);
4747 /* OK! key moved, free the entry in the source DB */
4748 deleteKey(src
,c
->argv
[1]);
4750 addReply(c
,shared
.cone
);
4753 /* =================================== Lists ================================ */
4754 static void pushGenericCommand(redisClient
*c
, int where
) {
4758 lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4760 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4761 addReply(c
,shared
.cone
);
4764 lobj
= createListObject();
4766 if (where
== REDIS_HEAD
) {
4767 listAddNodeHead(list
,c
->argv
[2]);
4769 listAddNodeTail(list
,c
->argv
[2]);
4771 dictAdd(c
->db
->dict
,c
->argv
[1],lobj
);
4772 incrRefCount(c
->argv
[1]);
4773 incrRefCount(c
->argv
[2]);
4775 if (lobj
->type
!= REDIS_LIST
) {
4776 addReply(c
,shared
.wrongtypeerr
);
4779 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4780 addReply(c
,shared
.cone
);
4784 if (where
== REDIS_HEAD
) {
4785 listAddNodeHead(list
,c
->argv
[2]);
4787 listAddNodeTail(list
,c
->argv
[2]);
4789 incrRefCount(c
->argv
[2]);
4792 addReplyLongLong(c
,listLength(list
));
4795 static void lpushCommand(redisClient
*c
) {
4796 pushGenericCommand(c
,REDIS_HEAD
);
4799 static void rpushCommand(redisClient
*c
) {
4800 pushGenericCommand(c
,REDIS_TAIL
);
4803 static void llenCommand(redisClient
*c
) {
4807 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4808 checkType(c
,o
,REDIS_LIST
)) return;
4811 addReplyUlong(c
,listLength(l
));
4814 static void lindexCommand(redisClient
*c
) {
4816 int index
= atoi(c
->argv
[2]->ptr
);
4820 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4821 checkType(c
,o
,REDIS_LIST
)) return;
4824 ln
= listIndex(list
, index
);
4826 addReply(c
,shared
.nullbulk
);
4828 robj
*ele
= listNodeValue(ln
);
4829 addReplyBulk(c
,ele
);
4833 static void lsetCommand(redisClient
*c
) {
4835 int index
= atoi(c
->argv
[2]->ptr
);
4839 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
||
4840 checkType(c
,o
,REDIS_LIST
)) return;
4843 ln
= listIndex(list
, index
);
4845 addReply(c
,shared
.outofrangeerr
);
4847 robj
*ele
= listNodeValue(ln
);
4850 listNodeValue(ln
) = c
->argv
[3];
4851 incrRefCount(c
->argv
[3]);
4852 addReply(c
,shared
.ok
);
4857 static void popGenericCommand(redisClient
*c
, int where
) {
4862 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4863 checkType(c
,o
,REDIS_LIST
)) return;
4866 if (where
== REDIS_HEAD
)
4867 ln
= listFirst(list
);
4869 ln
= listLast(list
);
4872 addReply(c
,shared
.nullbulk
);
4874 robj
*ele
= listNodeValue(ln
);
4875 addReplyBulk(c
,ele
);
4876 listDelNode(list
,ln
);
4877 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4882 static void lpopCommand(redisClient
*c
) {
4883 popGenericCommand(c
,REDIS_HEAD
);
4886 static void rpopCommand(redisClient
*c
) {
4887 popGenericCommand(c
,REDIS_TAIL
);
4890 static void lrangeCommand(redisClient
*c
) {
4892 int start
= atoi(c
->argv
[2]->ptr
);
4893 int end
= atoi(c
->argv
[3]->ptr
);
4900 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
4901 || checkType(c
,o
,REDIS_LIST
)) return;
4903 llen
= listLength(list
);
4905 /* convert negative indexes */
4906 if (start
< 0) start
= llen
+start
;
4907 if (end
< 0) end
= llen
+end
;
4908 if (start
< 0) start
= 0;
4909 if (end
< 0) end
= 0;
4911 /* indexes sanity checks */
4912 if (start
> end
|| start
>= llen
) {
4913 /* Out of range start or start > end result in empty list */
4914 addReply(c
,shared
.emptymultibulk
);
4917 if (end
>= llen
) end
= llen
-1;
4918 rangelen
= (end
-start
)+1;
4920 /* Return the result in form of a multi-bulk reply */
4921 ln
= listIndex(list
, start
);
4922 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
4923 for (j
= 0; j
< rangelen
; j
++) {
4924 ele
= listNodeValue(ln
);
4925 addReplyBulk(c
,ele
);
4930 static void ltrimCommand(redisClient
*c
) {
4932 int start
= atoi(c
->argv
[2]->ptr
);
4933 int end
= atoi(c
->argv
[3]->ptr
);
4935 int j
, ltrim
, rtrim
;
4939 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
4940 checkType(c
,o
,REDIS_LIST
)) return;
4942 llen
= listLength(list
);
4944 /* convert negative indexes */
4945 if (start
< 0) start
= llen
+start
;
4946 if (end
< 0) end
= llen
+end
;
4947 if (start
< 0) start
= 0;
4948 if (end
< 0) end
= 0;
4950 /* indexes sanity checks */
4951 if (start
> end
|| start
>= llen
) {
4952 /* Out of range start or start > end result in empty list */
4956 if (end
>= llen
) end
= llen
-1;
4961 /* Remove list elements to perform the trim */
4962 for (j
= 0; j
< ltrim
; j
++) {
4963 ln
= listFirst(list
);
4964 listDelNode(list
,ln
);
4966 for (j
= 0; j
< rtrim
; j
++) {
4967 ln
= listLast(list
);
4968 listDelNode(list
,ln
);
4970 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4972 addReply(c
,shared
.ok
);
4975 static void lremCommand(redisClient
*c
) {
4978 listNode
*ln
, *next
;
4979 int toremove
= atoi(c
->argv
[2]->ptr
);
4983 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4984 checkType(c
,o
,REDIS_LIST
)) return;
4988 toremove
= -toremove
;
4991 ln
= fromtail
? list
->tail
: list
->head
;
4993 robj
*ele
= listNodeValue(ln
);
4995 next
= fromtail
? ln
->prev
: ln
->next
;
4996 if (equalStringObjects(ele
,c
->argv
[3])) {
4997 listDelNode(list
,ln
);
5000 if (toremove
&& removed
== toremove
) break;
5004 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5005 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
5008 /* This is the semantic of this command:
5009 * RPOPLPUSH srclist dstlist:
5010 * IF LLEN(srclist) > 0
5011 * element = RPOP srclist
5012 * LPUSH dstlist element
5019 * The idea is to be able to get an element from a list in a reliable way
5020 * since the element is not just returned but pushed against another list
5021 * as well. This command was originally proposed by Ezra Zygmuntowicz.
5023 static void rpoplpushcommand(redisClient
*c
) {
5028 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5029 checkType(c
,sobj
,REDIS_LIST
)) return;
5030 srclist
= sobj
->ptr
;
5031 ln
= listLast(srclist
);
5034 addReply(c
,shared
.nullbulk
);
5036 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
5037 robj
*ele
= listNodeValue(ln
);
5040 if (dobj
&& dobj
->type
!= REDIS_LIST
) {
5041 addReply(c
,shared
.wrongtypeerr
);
5045 /* Add the element to the target list (unless it's directly
5046 * passed to some BLPOP-ing client */
5047 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) {
5049 /* Create the list if the key does not exist */
5050 dobj
= createListObject();
5051 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
);
5052 incrRefCount(c
->argv
[2]);
5054 dstlist
= dobj
->ptr
;
5055 listAddNodeHead(dstlist
,ele
);
5059 /* Send the element to the client as reply as well */
5060 addReplyBulk(c
,ele
);
5062 /* Finally remove the element from the source list */
5063 listDelNode(srclist
,ln
);
5064 if (listLength(srclist
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5069 /* ==================================== Sets ================================ */
5071 static void saddCommand(redisClient
*c
) {
5074 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5076 set
= createSetObject();
5077 dictAdd(c
->db
->dict
,c
->argv
[1],set
);
5078 incrRefCount(c
->argv
[1]);
5080 if (set
->type
!= REDIS_SET
) {
5081 addReply(c
,shared
.wrongtypeerr
);
5085 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
5086 incrRefCount(c
->argv
[2]);
5088 addReply(c
,shared
.cone
);
5090 addReply(c
,shared
.czero
);
5094 static void sremCommand(redisClient
*c
) {
5097 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5098 checkType(c
,set
,REDIS_SET
)) return;
5100 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
5102 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
5103 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5104 addReply(c
,shared
.cone
);
5106 addReply(c
,shared
.czero
);
5110 static void smoveCommand(redisClient
*c
) {
5111 robj
*srcset
, *dstset
;
5113 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5114 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
5116 /* If the source key does not exist return 0, if it's of the wrong type
5118 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
5119 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
5122 /* Error if the destination key is not a set as well */
5123 if (dstset
&& dstset
->type
!= REDIS_SET
) {
5124 addReply(c
,shared
.wrongtypeerr
);
5127 /* Remove the element from the source set */
5128 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
5129 /* Key not found in the src set! return zero */
5130 addReply(c
,shared
.czero
);
5133 if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset
!= dstset
)
5134 deleteKey(c
->db
,c
->argv
[1]);
5136 /* Add the element to the destination set */
5138 dstset
= createSetObject();
5139 dictAdd(c
->db
->dict
,c
->argv
[2],dstset
);
5140 incrRefCount(c
->argv
[2]);
5142 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
5143 incrRefCount(c
->argv
[3]);
5144 addReply(c
,shared
.cone
);
5147 static void sismemberCommand(redisClient
*c
) {
5150 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5151 checkType(c
,set
,REDIS_SET
)) return;
5153 if (dictFind(set
->ptr
,c
->argv
[2]))
5154 addReply(c
,shared
.cone
);
5156 addReply(c
,shared
.czero
);
5159 static void scardCommand(redisClient
*c
) {
5163 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5164 checkType(c
,o
,REDIS_SET
)) return;
5167 addReplyUlong(c
,dictSize(s
));
5170 static void spopCommand(redisClient
*c
) {
5174 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5175 checkType(c
,set
,REDIS_SET
)) return;
5177 de
= dictGetRandomKey(set
->ptr
);
5179 addReply(c
,shared
.nullbulk
);
5181 robj
*ele
= dictGetEntryKey(de
);
5183 addReplyBulk(c
,ele
);
5184 dictDelete(set
->ptr
,ele
);
5185 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
5186 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5191 static void srandmemberCommand(redisClient
*c
) {
5195 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5196 checkType(c
,set
,REDIS_SET
)) return;
5198 de
= dictGetRandomKey(set
->ptr
);
5200 addReply(c
,shared
.nullbulk
);
5202 robj
*ele
= dictGetEntryKey(de
);
5204 addReplyBulk(c
,ele
);
5208 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
5209 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
5211 return dictSize(*d1
)-dictSize(*d2
);
5214 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
5215 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
5218 robj
*lenobj
= NULL
, *dstset
= NULL
;
5219 unsigned long j
, cardinality
= 0;
5221 for (j
= 0; j
< setsnum
; j
++) {
5225 lookupKeyWrite(c
->db
,setskeys
[j
]) :
5226 lookupKeyRead(c
->db
,setskeys
[j
]);
5230 if (deleteKey(c
->db
,dstkey
))
5232 addReply(c
,shared
.czero
);
5234 addReply(c
,shared
.emptymultibulk
);
5238 if (setobj
->type
!= REDIS_SET
) {
5240 addReply(c
,shared
.wrongtypeerr
);
5243 dv
[j
] = setobj
->ptr
;
5245 /* Sort sets from the smallest to largest, this will improve our
5246 * algorithm's performace */
5247 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
5249 /* The first thing we should output is the total number of elements...
5250 * since this is a multi-bulk write, but at this stage we don't know
5251 * the intersection set size, so we use a trick, append an empty object
5252 * to the output list and save the pointer to later modify it with the
5255 lenobj
= createObject(REDIS_STRING
,NULL
);
5257 decrRefCount(lenobj
);
5259 /* If we have a target key where to store the resulting set
5260 * create this key with an empty set inside */
5261 dstset
= createSetObject();
5264 /* Iterate all the elements of the first (smallest) set, and test
5265 * the element against all the other sets, if at least one set does
5266 * not include the element it is discarded */
5267 di
= dictGetIterator(dv
[0]);
5269 while((de
= dictNext(di
)) != NULL
) {
5272 for (j
= 1; j
< setsnum
; j
++)
5273 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
5275 continue; /* at least one set does not contain the member */
5276 ele
= dictGetEntryKey(de
);
5278 addReplyBulk(c
,ele
);
5281 dictAdd(dstset
->ptr
,ele
,NULL
);
5285 dictReleaseIterator(di
);
5288 /* Store the resulting set into the target, if the intersection
5289 * is not an empty set. */
5290 deleteKey(c
->db
,dstkey
);
5291 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5292 dictAdd(c
->db
->dict
,dstkey
,dstset
);
5293 incrRefCount(dstkey
);
5294 addReplyLongLong(c
,dictSize((dict
*)dstset
->ptr
));
5296 decrRefCount(dstset
);
5297 addReply(c
,shared
.czero
);
5301 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
5306 static void sinterCommand(redisClient
*c
) {
5307 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
5310 static void sinterstoreCommand(redisClient
*c
) {
5311 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
5314 #define REDIS_OP_UNION 0
5315 #define REDIS_OP_DIFF 1
5316 #define REDIS_OP_INTER 2
5318 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
5319 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
5322 robj
*dstset
= NULL
;
5323 int j
, cardinality
= 0;
5325 for (j
= 0; j
< setsnum
; j
++) {
5329 lookupKeyWrite(c
->db
,setskeys
[j
]) :
5330 lookupKeyRead(c
->db
,setskeys
[j
]);
5335 if (setobj
->type
!= REDIS_SET
) {
5337 addReply(c
,shared
.wrongtypeerr
);
5340 dv
[j
] = setobj
->ptr
;
5343 /* We need a temp set object to store our union. If the dstkey
5344 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
5345 * this set object will be the resulting object to set into the target key*/
5346 dstset
= createSetObject();
5348 /* Iterate all the elements of all the sets, add every element a single
5349 * time to the result set */
5350 for (j
= 0; j
< setsnum
; j
++) {
5351 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
5352 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
5354 di
= dictGetIterator(dv
[j
]);
5356 while((de
= dictNext(di
)) != NULL
) {
5359 /* dictAdd will not add the same element multiple times */
5360 ele
= dictGetEntryKey(de
);
5361 if (op
== REDIS_OP_UNION
|| j
== 0) {
5362 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
5366 } else if (op
== REDIS_OP_DIFF
) {
5367 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
5372 dictReleaseIterator(di
);
5374 /* result set is empty? Exit asap. */
5375 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
5378 /* Output the content of the resulting set, if not in STORE mode */
5380 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
5381 di
= dictGetIterator(dstset
->ptr
);
5382 while((de
= dictNext(di
)) != NULL
) {
5385 ele
= dictGetEntryKey(de
);
5386 addReplyBulk(c
,ele
);
5388 dictReleaseIterator(di
);
5389 decrRefCount(dstset
);
5391 /* If we have a target key where to store the resulting set
5392 * create this key with the result set inside */
5393 deleteKey(c
->db
,dstkey
);
5394 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5395 dictAdd(c
->db
->dict
,dstkey
,dstset
);
5396 incrRefCount(dstkey
);
5397 addReplyLongLong(c
,dictSize((dict
*)dstset
->ptr
));
5399 decrRefCount(dstset
);
5400 addReply(c
,shared
.czero
);
5407 static void sunionCommand(redisClient
*c
) {
5408 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
5411 static void sunionstoreCommand(redisClient
*c
) {
5412 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
5415 static void sdiffCommand(redisClient
*c
) {
5416 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
5419 static void sdiffstoreCommand(redisClient
*c
) {
5420 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
5423 /* ==================================== ZSets =============================== */
5425 /* ZSETs are ordered sets using two data structures to hold the same elements
5426 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
5429 * The elements are added to an hash table mapping Redis objects to scores.
5430 * At the same time the elements are added to a skip list mapping scores
5431 * to Redis objects (so objects are sorted by scores in this "view"). */
5433 /* This skiplist implementation is almost a C translation of the original
5434 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
5435 * Alternative to Balanced Trees", modified in three ways:
5436 * a) this implementation allows for repeated values.
5437 * b) the comparison is not just by key (our 'score') but by satellite data.
5438 * c) there is a back pointer, so it's a doubly linked list with the back
5439 * pointers being only at "level 1". This allows to traverse the list
5440 * from tail to head, useful for ZREVRANGE. */
5442 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
5443 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
5445 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
5447 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
5455 static zskiplist
*zslCreate(void) {
5459 zsl
= zmalloc(sizeof(*zsl
));
5462 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
5463 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
5464 zsl
->header
->forward
[j
] = NULL
;
5466 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
5467 if (j
< ZSKIPLIST_MAXLEVEL
-1)
5468 zsl
->header
->span
[j
] = 0;
5470 zsl
->header
->backward
= NULL
;
5475 static void zslFreeNode(zskiplistNode
*node
) {
5476 decrRefCount(node
->obj
);
5477 zfree(node
->forward
);
5482 static void zslFree(zskiplist
*zsl
) {
5483 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
5485 zfree(zsl
->header
->forward
);
5486 zfree(zsl
->header
->span
);
5489 next
= node
->forward
[0];
5496 static int zslRandomLevel(void) {
5498 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
5500 return (level
<ZSKIPLIST_MAXLEVEL
) ? level
: ZSKIPLIST_MAXLEVEL
;
5503 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
5504 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5505 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
5509 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5510 /* store rank that is crossed to reach the insert position */
5511 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
5513 while (x
->forward
[i
] &&
5514 (x
->forward
[i
]->score
< score
||
5515 (x
->forward
[i
]->score
== score
&&
5516 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
5517 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
5522 /* we assume the key is not already inside, since we allow duplicated
5523 * scores, and the re-insertion of score and redis object should never
5524 * happpen since the caller of zslInsert() should test in the hash table
5525 * if the element is already inside or not. */
5526 level
= zslRandomLevel();
5527 if (level
> zsl
->level
) {
5528 for (i
= zsl
->level
; i
< level
; i
++) {
5530 update
[i
] = zsl
->header
;
5531 update
[i
]->span
[i
-1] = zsl
->length
;
5535 x
= zslCreateNode(level
,score
,obj
);
5536 for (i
= 0; i
< level
; i
++) {
5537 x
->forward
[i
] = update
[i
]->forward
[i
];
5538 update
[i
]->forward
[i
] = x
;
5540 /* update span covered by update[i] as x is inserted here */
5542 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5543 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5547 /* increment span for untouched levels */
5548 for (i
= level
; i
< zsl
->level
; i
++) {
5549 update
[i
]->span
[i
-1]++;
5552 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
5554 x
->forward
[0]->backward
= x
;
5560 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
5561 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
5563 for (i
= 0; i
< zsl
->level
; i
++) {
5564 if (update
[i
]->forward
[i
] == x
) {
5566 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5568 update
[i
]->forward
[i
] = x
->forward
[i
];
5570 /* invariant: i > 0, because update[0]->forward[0]
5571 * is always equal to x */
5572 update
[i
]->span
[i
-1] -= 1;
5575 if (x
->forward
[0]) {
5576 x
->forward
[0]->backward
= x
->backward
;
5578 zsl
->tail
= x
->backward
;
5580 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5585 /* Delete an element with matching score/object from the skiplist. */
5586 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
5587 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5591 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5592 while (x
->forward
[i
] &&
5593 (x
->forward
[i
]->score
< score
||
5594 (x
->forward
[i
]->score
== score
&&
5595 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
5599 /* We may have multiple elements with the same score, what we need
5600 * is to find the element with both the right score and object. */
5602 if (x
&& score
== x
->score
&& equalStringObjects(x
->obj
,obj
)) {
5603 zslDeleteNode(zsl
, x
, update
);
5607 return 0; /* not found */
5609 return 0; /* not found */
5612 /* Delete all the elements with score between min and max from the skiplist.
5613 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5614 * Note that this function takes the reference to the hash table view of the
5615 * sorted set, in order to remove the elements from the hash table too. */
5616 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
5617 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5618 unsigned long removed
= 0;
5622 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5623 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
5627 /* We may have multiple elements with the same score, what we need
5628 * is to find the element with both the right score and object. */
5630 while (x
&& x
->score
<= max
) {
5631 zskiplistNode
*next
= x
->forward
[0];
5632 zslDeleteNode(zsl
, x
, update
);
5633 dictDelete(dict
,x
->obj
);
5638 return removed
; /* not found */
5641 /* Delete all the elements with rank between start and end from the skiplist.
5642 * Start and end are inclusive. Note that start and end need to be 1-based */
5643 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
5644 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5645 unsigned long traversed
= 0, removed
= 0;
5649 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5650 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
5651 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5659 while (x
&& traversed
<= end
) {
5660 zskiplistNode
*next
= x
->forward
[0];
5661 zslDeleteNode(zsl
, x
, update
);
5662 dictDelete(dict
,x
->obj
);
5671 /* Find the first node having a score equal or greater than the specified one.
5672 * Returns NULL if there is no match. */
5673 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
5678 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5679 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
5682 /* We may have multiple elements with the same score, what we need
5683 * is to find the element with both the right score and object. */
5684 return x
->forward
[0];
5687 /* Find the rank for an element by both score and key.
5688 * Returns 0 when the element cannot be found, rank otherwise.
5689 * Note that the rank is 1-based due to the span of zsl->header to the
5691 static unsigned long zslGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
5693 unsigned long rank
= 0;
5697 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5698 while (x
->forward
[i
] &&
5699 (x
->forward
[i
]->score
< score
||
5700 (x
->forward
[i
]->score
== score
&&
5701 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
5702 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
5706 /* x might be equal to zsl->header, so test if obj is non-NULL */
5707 if (x
->obj
&& equalStringObjects(x
->obj
,o
)) {
5714 /* Finds an element by its rank. The rank argument needs to be 1-based. */
5715 zskiplistNode
* zslGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
5717 unsigned long traversed
= 0;
5721 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5722 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
5724 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5727 if (traversed
== rank
) {
5734 /* The actual Z-commands implementations */
5736 /* This generic command implements both ZADD and ZINCRBY.
5737 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
5738 * the increment if the operation is a ZINCRBY (doincrement == 1). */
5739 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
5744 zsetobj
= lookupKeyWrite(c
->db
,key
);
5745 if (zsetobj
== NULL
) {
5746 zsetobj
= createZsetObject();
5747 dictAdd(c
->db
->dict
,key
,zsetobj
);
5750 if (zsetobj
->type
!= REDIS_ZSET
) {
5751 addReply(c
,shared
.wrongtypeerr
);
5757 /* Ok now since we implement both ZADD and ZINCRBY here the code
5758 * needs to handle the two different conditions. It's all about setting
5759 * '*score', that is, the new score to set, to the right value. */
5760 score
= zmalloc(sizeof(double));
5764 /* Read the old score. If the element was not present starts from 0 */
5765 de
= dictFind(zs
->dict
,ele
);
5767 double *oldscore
= dictGetEntryVal(de
);
5768 *score
= *oldscore
+ scoreval
;
5776 /* What follows is a simple remove and re-insert operation that is common
5777 * to both ZADD and ZINCRBY... */
5778 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
5779 /* case 1: New element */
5780 incrRefCount(ele
); /* added to hash */
5781 zslInsert(zs
->zsl
,*score
,ele
);
5782 incrRefCount(ele
); /* added to skiplist */
5785 addReplyDouble(c
,*score
);
5787 addReply(c
,shared
.cone
);
5792 /* case 2: Score update operation */
5793 de
= dictFind(zs
->dict
,ele
);
5794 redisAssert(de
!= NULL
);
5795 oldscore
= dictGetEntryVal(de
);
5796 if (*score
!= *oldscore
) {
5799 /* Remove and insert the element in the skip list with new score */
5800 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
5801 redisAssert(deleted
!= 0);
5802 zslInsert(zs
->zsl
,*score
,ele
);
5804 /* Update the score in the hash table */
5805 dictReplace(zs
->dict
,ele
,score
);
5811 addReplyDouble(c
,*score
);
5813 addReply(c
,shared
.czero
);
5817 static void zaddCommand(redisClient
*c
) {
5820 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
5821 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
5824 static void zincrbyCommand(redisClient
*c
) {
5827 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
5828 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
5831 static void zremCommand(redisClient
*c
) {
5838 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5839 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5842 de
= dictFind(zs
->dict
,c
->argv
[2]);
5844 addReply(c
,shared
.czero
);
5847 /* Delete from the skiplist */
5848 oldscore
= dictGetEntryVal(de
);
5849 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
5850 redisAssert(deleted
!= 0);
5852 /* Delete from the hash table */
5853 dictDelete(zs
->dict
,c
->argv
[2]);
5854 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5855 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5857 addReply(c
,shared
.cone
);
5860 static void zremrangebyscoreCommand(redisClient
*c
) {
5867 if ((getDoubleFromObjectOrReply(c
, c
->argv
[2], &min
, NULL
) != REDIS_OK
) ||
5868 (getDoubleFromObjectOrReply(c
, c
->argv
[3], &max
, NULL
) != REDIS_OK
)) return;
5870 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5871 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5874 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
5875 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5876 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5877 server
.dirty
+= deleted
;
5878 addReplyLongLong(c
,deleted
);
5881 static void zremrangebyrankCommand(redisClient
*c
) {
5889 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
5890 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
5892 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5893 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5895 llen
= zs
->zsl
->length
;
5897 /* convert negative indexes */
5898 if (start
< 0) start
= llen
+start
;
5899 if (end
< 0) end
= llen
+end
;
5900 if (start
< 0) start
= 0;
5901 if (end
< 0) end
= 0;
5903 /* indexes sanity checks */
5904 if (start
> end
|| start
>= llen
) {
5905 addReply(c
,shared
.czero
);
5908 if (end
>= llen
) end
= llen
-1;
5910 /* increment start and end because zsl*Rank functions
5911 * use 1-based rank */
5912 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
5913 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5914 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5915 server
.dirty
+= deleted
;
5916 addReplyLongLong(c
, deleted
);
5924 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
5925 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
5926 unsigned long size1
, size2
;
5927 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
5928 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
5929 return size1
- size2
;
5932 #define REDIS_AGGR_SUM 1
5933 #define REDIS_AGGR_MIN 2
5934 #define REDIS_AGGR_MAX 3
5935 #define zunionInterDictValue(_e) (dictGetEntryVal(_e) == NULL ? 1.0 : *(double*)dictGetEntryVal(_e))
5937 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
5938 if (aggregate
== REDIS_AGGR_SUM
) {
5939 *target
= *target
+ val
;
5940 } else if (aggregate
== REDIS_AGGR_MIN
) {
5941 *target
= val
< *target
? val
: *target
;
5942 } else if (aggregate
== REDIS_AGGR_MAX
) {
5943 *target
= val
> *target
? val
: *target
;
5946 redisPanic("Unknown ZUNION/INTER aggregate type");
5950 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
5952 int aggregate
= REDIS_AGGR_SUM
;
5959 /* expect setnum input keys to be given */
5960 setnum
= atoi(c
->argv
[2]->ptr
);
5962 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNIONSTORE/ZINTERSTORE\r\n"));
5966 /* test if the expected number of keys would overflow */
5967 if (3+setnum
> c
->argc
) {
5968 addReply(c
,shared
.syntaxerr
);
5972 /* read keys to be used for input */
5973 src
= zmalloc(sizeof(zsetopsrc
) * setnum
);
5974 for (i
= 0, j
= 3; i
< setnum
; i
++, j
++) {
5975 robj
*obj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
5979 if (obj
->type
== REDIS_ZSET
) {
5980 src
[i
].dict
= ((zset
*)obj
->ptr
)->dict
;
5981 } else if (obj
->type
== REDIS_SET
) {
5982 src
[i
].dict
= (obj
->ptr
);
5985 addReply(c
,shared
.wrongtypeerr
);
5990 /* default all weights to 1 */
5991 src
[i
].weight
= 1.0;
5994 /* parse optional extra arguments */
5996 int remaining
= c
->argc
- j
;
5999 if (remaining
>= (setnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
6001 for (i
= 0; i
< setnum
; i
++, j
++, remaining
--) {
6002 if (getDoubleFromObjectOrReply(c
, c
->argv
[j
], &src
[i
].weight
, NULL
) != REDIS_OK
)
6005 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
6007 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
6008 aggregate
= REDIS_AGGR_SUM
;
6009 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
6010 aggregate
= REDIS_AGGR_MIN
;
6011 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
6012 aggregate
= REDIS_AGGR_MAX
;
6015 addReply(c
,shared
.syntaxerr
);
6021 addReply(c
,shared
.syntaxerr
);
6027 /* sort sets from the smallest to largest, this will improve our
6028 * algorithm's performance */
6029 qsort(src
,setnum
,sizeof(zsetopsrc
),qsortCompareZsetopsrcByCardinality
);
6031 dstobj
= createZsetObject();
6032 dstzset
= dstobj
->ptr
;
6034 if (op
== REDIS_OP_INTER
) {
6035 /* skip going over all entries if the smallest zset is NULL or empty */
6036 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
6037 /* precondition: as src[0].dict is non-empty and the zsets are ordered
6038 * from small to large, all src[i > 0].dict are non-empty too */
6039 di
= dictGetIterator(src
[0].dict
);
6040 while((de
= dictNext(di
)) != NULL
) {
6041 double *score
= zmalloc(sizeof(double)), value
;
6042 *score
= src
[0].weight
* zunionInterDictValue(de
);
6044 for (j
= 1; j
< setnum
; j
++) {
6045 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
6047 value
= src
[j
].weight
* zunionInterDictValue(other
);
6048 zunionInterAggregate(score
, value
, aggregate
);
6054 /* skip entry when not present in every source dict */
6058 robj
*o
= dictGetEntryKey(de
);
6059 dictAdd(dstzset
->dict
,o
,score
);
6060 incrRefCount(o
); /* added to dictionary */
6061 zslInsert(dstzset
->zsl
,*score
,o
);
6062 incrRefCount(o
); /* added to skiplist */
6065 dictReleaseIterator(di
);
6067 } else if (op
== REDIS_OP_UNION
) {
6068 for (i
= 0; i
< setnum
; i
++) {
6069 if (!src
[i
].dict
) continue;
6071 di
= dictGetIterator(src
[i
].dict
);
6072 while((de
= dictNext(di
)) != NULL
) {
6073 /* skip key when already processed */
6074 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
6076 double *score
= zmalloc(sizeof(double)), value
;
6077 *score
= src
[i
].weight
* zunionInterDictValue(de
);
6079 /* because the zsets are sorted by size, its only possible
6080 * for sets at larger indices to hold this entry */
6081 for (j
= (i
+1); j
< setnum
; j
++) {
6082 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
6084 value
= src
[j
].weight
* zunionInterDictValue(other
);
6085 zunionInterAggregate(score
, value
, aggregate
);
6089 robj
*o
= dictGetEntryKey(de
);
6090 dictAdd(dstzset
->dict
,o
,score
);
6091 incrRefCount(o
); /* added to dictionary */
6092 zslInsert(dstzset
->zsl
,*score
,o
);
6093 incrRefCount(o
); /* added to skiplist */
6095 dictReleaseIterator(di
);
6098 /* unknown operator */
6099 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
6102 deleteKey(c
->db
,dstkey
);
6103 if (dstzset
->zsl
->length
) {
6104 dictAdd(c
->db
->dict
,dstkey
,dstobj
);
6105 incrRefCount(dstkey
);
6106 addReplyLongLong(c
, dstzset
->zsl
->length
);
6109 decrRefCount(dstobj
);
6110 addReply(c
, shared
.czero
);
6115 static void zunionstoreCommand(redisClient
*c
) {
6116 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
6119 static void zinterstoreCommand(redisClient
*c
) {
6120 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
6123 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
6135 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
6136 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
6138 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
6140 } else if (c
->argc
>= 5) {
6141 addReply(c
,shared
.syntaxerr
);
6145 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6146 || checkType(c
,o
,REDIS_ZSET
)) return;
6151 /* convert negative indexes */
6152 if (start
< 0) start
= llen
+start
;
6153 if (end
< 0) end
= llen
+end
;
6154 if (start
< 0) start
= 0;
6155 if (end
< 0) end
= 0;
6157 /* indexes sanity checks */
6158 if (start
> end
|| start
>= llen
) {
6159 /* Out of range start or start > end result in empty list */
6160 addReply(c
,shared
.emptymultibulk
);
6163 if (end
>= llen
) end
= llen
-1;
6164 rangelen
= (end
-start
)+1;
6166 /* check if starting point is trivial, before searching
6167 * the element in log(N) time */
6169 ln
= start
== 0 ? zsl
->tail
: zslGetElementByRank(zsl
, llen
-start
);
6172 zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1);
6175 /* Return the result in form of a multi-bulk reply */
6176 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
6177 withscores
? (rangelen
*2) : rangelen
));
6178 for (j
= 0; j
< rangelen
; j
++) {
6180 addReplyBulk(c
,ele
);
6182 addReplyDouble(c
,ln
->score
);
6183 ln
= reverse
? ln
->backward
: ln
->forward
[0];
6187 static void zrangeCommand(redisClient
*c
) {
6188 zrangeGenericCommand(c
,0);
6191 static void zrevrangeCommand(redisClient
*c
) {
6192 zrangeGenericCommand(c
,1);
6195 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
6196 * If justcount is non-zero, just the count is returned. */
6197 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
6200 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
6201 int offset
= 0, limit
= -1;
6205 /* Parse the min-max interval. If one of the values is prefixed
6206 * by the "(" character, it's considered "open". For instance
6207 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
6208 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
6209 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
6210 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
6213 min
= strtod(c
->argv
[2]->ptr
,NULL
);
6215 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
6216 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
6219 max
= strtod(c
->argv
[3]->ptr
,NULL
);
6222 /* Parse "WITHSCORES": note that if the command was called with
6223 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
6224 * enter the following paths to parse WITHSCORES and LIMIT. */
6225 if (c
->argc
== 5 || c
->argc
== 8) {
6226 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
6231 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
6235 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
6240 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
6241 addReply(c
,shared
.syntaxerr
);
6243 } else if (c
->argc
== (7 + withscores
)) {
6244 offset
= atoi(c
->argv
[5]->ptr
);
6245 limit
= atoi(c
->argv
[6]->ptr
);
6246 if (offset
< 0) offset
= 0;
6249 /* Ok, lookup the key and get the range */
6250 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
6252 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6254 if (o
->type
!= REDIS_ZSET
) {
6255 addReply(c
,shared
.wrongtypeerr
);
6257 zset
*zsetobj
= o
->ptr
;
6258 zskiplist
*zsl
= zsetobj
->zsl
;
6260 robj
*ele
, *lenobj
= NULL
;
6261 unsigned long rangelen
= 0;
6263 /* Get the first node with the score >= min, or with
6264 * score > min if 'minex' is true. */
6265 ln
= zslFirstWithScore(zsl
,min
);
6266 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
6269 /* No element matching the speciifed interval */
6270 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6274 /* We don't know in advance how many matching elements there
6275 * are in the list, so we push this object that will represent
6276 * the multi-bulk length in the output buffer, and will "fix"
6279 lenobj
= createObject(REDIS_STRING
,NULL
);
6281 decrRefCount(lenobj
);
6284 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
6287 ln
= ln
->forward
[0];
6290 if (limit
== 0) break;
6293 addReplyBulk(c
,ele
);
6295 addReplyDouble(c
,ln
->score
);
6297 ln
= ln
->forward
[0];
6299 if (limit
> 0) limit
--;
6302 addReplyLongLong(c
,(long)rangelen
);
6304 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
6305 withscores
? (rangelen
*2) : rangelen
);
6311 static void zrangebyscoreCommand(redisClient
*c
) {
6312 genericZrangebyscoreCommand(c
,0);
6315 static void zcountCommand(redisClient
*c
) {
6316 genericZrangebyscoreCommand(c
,1);
6319 static void zcardCommand(redisClient
*c
) {
6323 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6324 checkType(c
,o
,REDIS_ZSET
)) return;
6327 addReplyUlong(c
,zs
->zsl
->length
);
6330 static void zscoreCommand(redisClient
*c
) {
6335 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6336 checkType(c
,o
,REDIS_ZSET
)) return;
6339 de
= dictFind(zs
->dict
,c
->argv
[2]);
6341 addReply(c
,shared
.nullbulk
);
6343 double *score
= dictGetEntryVal(de
);
6345 addReplyDouble(c
,*score
);
6349 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
6357 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6358 checkType(c
,o
,REDIS_ZSET
)) return;
6362 de
= dictFind(zs
->dict
,c
->argv
[2]);
6364 addReply(c
,shared
.nullbulk
);
6368 score
= dictGetEntryVal(de
);
6369 rank
= zslGetRank(zsl
, *score
, c
->argv
[2]);
6372 addReplyLongLong(c
, zsl
->length
- rank
);
6374 addReplyLongLong(c
, rank
-1);
6377 addReply(c
,shared
.nullbulk
);
6381 static void zrankCommand(redisClient
*c
) {
6382 zrankGenericCommand(c
, 0);
6385 static void zrevrankCommand(redisClient
*c
) {
6386 zrankGenericCommand(c
, 1);
6389 /* ========================= Hashes utility functions ======================= */
6390 #define REDIS_HASH_KEY 1
6391 #define REDIS_HASH_VALUE 2
6393 /* Check the length of a number of objects to see if we need to convert a
6394 * zipmap to a real hash. Note that we only check string encoded objects
6395 * as their string length can be queried in constant time. */
6396 static void hashTryConversion(robj
*subject
, robj
**argv
, int start
, int end
) {
6398 if (subject
->encoding
!= REDIS_ENCODING_ZIPMAP
) return;
6400 for (i
= start
; i
<= end
; i
++) {
6401 if (argv
[i
]->encoding
== REDIS_ENCODING_RAW
&&
6402 sdslen(argv
[i
]->ptr
) > server
.hash_max_zipmap_value
)
6404 convertToRealHash(subject
);
6410 /* Encode given objects in-place when the hash uses a dict. */
6411 static void hashTryObjectEncoding(robj
*subject
, robj
**o1
, robj
**o2
) {
6412 if (subject
->encoding
== REDIS_ENCODING_HT
) {
6413 if (o1
) *o1
= tryObjectEncoding(*o1
);
6414 if (o2
) *o2
= tryObjectEncoding(*o2
);
6418 /* Get the value from a hash identified by key. Returns either a string
6419 * object or NULL if the value cannot be found. The refcount of the object
6420 * is always increased by 1 when the value was found. */
6421 static robj
*hashGet(robj
*o
, robj
*key
) {
6423 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6426 key
= getDecodedObject(key
);
6427 if (zipmapGet(o
->ptr
,key
->ptr
,sdslen(key
->ptr
),&v
,&vlen
)) {
6428 value
= createStringObject((char*)v
,vlen
);
6432 dictEntry
*de
= dictFind(o
->ptr
,key
);
6434 value
= dictGetEntryVal(de
);
6435 incrRefCount(value
);
6441 /* Test if the key exists in the given hash. Returns 1 if the key
6442 * exists and 0 when it doesn't. */
6443 static int hashExists(robj
*o
, robj
*key
) {
6444 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6445 key
= getDecodedObject(key
);
6446 if (zipmapExists(o
->ptr
,key
->ptr
,sdslen(key
->ptr
))) {
6452 if (dictFind(o
->ptr
,key
) != NULL
) {
6459 /* Add an element, discard the old if the key already exists.
6460 * Return 0 on insert and 1 on update. */
6461 static int hashSet(robj
*o
, robj
*key
, robj
*value
) {
6463 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6464 key
= getDecodedObject(key
);
6465 value
= getDecodedObject(value
);
6466 o
->ptr
= zipmapSet(o
->ptr
,
6467 key
->ptr
,sdslen(key
->ptr
),
6468 value
->ptr
,sdslen(value
->ptr
), &update
);
6470 decrRefCount(value
);
6472 /* Check if the zipmap needs to be upgraded to a real hash table */
6473 if (zipmapLen(o
->ptr
) > server
.hash_max_zipmap_entries
)
6474 convertToRealHash(o
);
6476 if (dictReplace(o
->ptr
,key
,value
)) {
6483 incrRefCount(value
);
6488 /* Delete an element from a hash.
6489 * Return 1 on deleted and 0 on not found. */
6490 static int hashDelete(robj
*o
, robj
*key
) {
6492 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6493 key
= getDecodedObject(key
);
6494 o
->ptr
= zipmapDel(o
->ptr
,key
->ptr
,sdslen(key
->ptr
), &deleted
);
6497 deleted
= dictDelete((dict
*)o
->ptr
,key
) == DICT_OK
;
6498 /* Always check if the dictionary needs a resize after a delete. */
6499 if (deleted
&& htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
6504 /* Return the number of elements in a hash. */
6505 static unsigned long hashLength(robj
*o
) {
6506 return (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
6507 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
6510 /* Structure to hold hash iteration abstration. Note that iteration over
6511 * hashes involves both fields and values. Because it is possible that
6512 * not both are required, store pointers in the iterator to avoid
6513 * unnecessary memory allocation for fields/values. */
6517 unsigned char *zk
, *zv
;
6518 unsigned int zklen
, zvlen
;
6524 static hashIterator
*hashInitIterator(robj
*subject
) {
6525 hashIterator
*hi
= zmalloc(sizeof(hashIterator
));
6526 hi
->encoding
= subject
->encoding
;
6527 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6528 hi
->zi
= zipmapRewind(subject
->ptr
);
6529 } else if (hi
->encoding
== REDIS_ENCODING_HT
) {
6530 hi
->di
= dictGetIterator(subject
->ptr
);
6537 static void hashReleaseIterator(hashIterator
*hi
) {
6538 if (hi
->encoding
== REDIS_ENCODING_HT
) {
6539 dictReleaseIterator(hi
->di
);
6544 /* Move to the next entry in the hash. Return REDIS_OK when the next entry
6545 * could be found and REDIS_ERR when the iterator reaches the end. */
6546 static int hashNext(hashIterator
*hi
) {
6547 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6548 if ((hi
->zi
= zipmapNext(hi
->zi
, &hi
->zk
, &hi
->zklen
,
6549 &hi
->zv
, &hi
->zvlen
)) == NULL
) return REDIS_ERR
;
6551 if ((hi
->de
= dictNext(hi
->di
)) == NULL
) return REDIS_ERR
;
6556 /* Get key or value object at current iteration position.
6557 * This increases the refcount of the field object by 1. */
6558 static robj
*hashCurrent(hashIterator
*hi
, int what
) {
6560 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6561 if (what
& REDIS_HASH_KEY
) {
6562 o
= createStringObject((char*)hi
->zk
,hi
->zklen
);
6564 o
= createStringObject((char*)hi
->zv
,hi
->zvlen
);
6567 if (what
& REDIS_HASH_KEY
) {
6568 o
= dictGetEntryKey(hi
->de
);
6570 o
= dictGetEntryVal(hi
->de
);
6577 static robj
*hashLookupWriteOrCreate(redisClient
*c
, robj
*key
) {
6578 robj
*o
= lookupKeyWrite(c
->db
,key
);
6580 o
= createHashObject();
6581 dictAdd(c
->db
->dict
,key
,o
);
6584 if (o
->type
!= REDIS_HASH
) {
6585 addReply(c
,shared
.wrongtypeerr
);
6592 /* ============================= Hash commands ============================== */
6593 static void hsetCommand(redisClient
*c
) {
6597 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6598 hashTryConversion(o
,c
->argv
,2,3);
6599 hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
6600 update
= hashSet(o
,c
->argv
[2],c
->argv
[3]);
6601 addReply(c
, update
? shared
.czero
: shared
.cone
);
6605 static void hsetnxCommand(redisClient
*c
) {
6607 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6608 hashTryConversion(o
,c
->argv
,2,3);
6610 if (hashExists(o
, c
->argv
[2])) {
6611 addReply(c
, shared
.czero
);
6613 hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
6614 hashSet(o
,c
->argv
[2],c
->argv
[3]);
6615 addReply(c
, shared
.cone
);
6620 static void hmsetCommand(redisClient
*c
) {
6624 if ((c
->argc
% 2) == 1) {
6625 addReplySds(c
,sdsnew("-ERR wrong number of arguments for HMSET\r\n"));
6629 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6630 hashTryConversion(o
,c
->argv
,2,c
->argc
-1);
6631 for (i
= 2; i
< c
->argc
; i
+= 2) {
6632 hashTryObjectEncoding(o
,&c
->argv
[i
], &c
->argv
[i
+1]);
6633 hashSet(o
,c
->argv
[i
],c
->argv
[i
+1]);
6635 addReply(c
, shared
.ok
);
6639 static void hincrbyCommand(redisClient
*c
) {
6640 long long value
, incr
;
6641 robj
*o
, *current
, *new;
6643 if (getLongLongFromObjectOrReply(c
,c
->argv
[3],&incr
,NULL
) != REDIS_OK
) return;
6644 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6645 if ((current
= hashGet(o
,c
->argv
[2])) != NULL
) {
6646 if (getLongLongFromObjectOrReply(c
,current
,&value
,
6647 "hash value is not an integer") != REDIS_OK
) {
6648 decrRefCount(current
);
6651 decrRefCount(current
);
6657 new = createStringObjectFromLongLong(value
);
6658 hashTryObjectEncoding(o
,&c
->argv
[2],NULL
);
6659 hashSet(o
,c
->argv
[2],new);
6661 addReplyLongLong(c
,value
);
6665 static void hgetCommand(redisClient
*c
) {
6667 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6668 checkType(c
,o
,REDIS_HASH
)) return;
6670 if ((value
= hashGet(o
,c
->argv
[2])) != NULL
) {
6671 addReplyBulk(c
,value
);
6672 decrRefCount(value
);
6674 addReply(c
,shared
.nullbulk
);
6678 static void hmgetCommand(redisClient
*c
) {
6681 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
6682 if (o
!= NULL
&& o
->type
!= REDIS_HASH
) {
6683 addReply(c
,shared
.wrongtypeerr
);
6686 /* Note the check for o != NULL happens inside the loop. This is
6687 * done because objects that cannot be found are considered to be
6688 * an empty hash. The reply should then be a series of NULLs. */
6689 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-2));
6690 for (i
= 2; i
< c
->argc
; i
++) {
6691 if (o
!= NULL
&& (value
= hashGet(o
,c
->argv
[i
])) != NULL
) {
6692 addReplyBulk(c
,value
);
6693 decrRefCount(value
);
6695 addReply(c
,shared
.nullbulk
);
6700 static void hdelCommand(redisClient
*c
) {
6702 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6703 checkType(c
,o
,REDIS_HASH
)) return;
6705 if (hashDelete(o
,c
->argv
[2])) {
6706 if (hashLength(o
) == 0) deleteKey(c
->db
,c
->argv
[1]);
6707 addReply(c
,shared
.cone
);
6710 addReply(c
,shared
.czero
);
6714 static void hlenCommand(redisClient
*c
) {
6716 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6717 checkType(c
,o
,REDIS_HASH
)) return;
6719 addReplyUlong(c
,hashLength(o
));
6722 static void genericHgetallCommand(redisClient
*c
, int flags
) {
6723 robj
*o
, *lenobj
, *obj
;
6724 unsigned long count
= 0;
6727 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6728 || checkType(c
,o
,REDIS_HASH
)) return;
6730 lenobj
= createObject(REDIS_STRING
,NULL
);
6732 decrRefCount(lenobj
);
6734 hi
= hashInitIterator(o
);
6735 while (hashNext(hi
) != REDIS_ERR
) {
6736 if (flags
& REDIS_HASH_KEY
) {
6737 obj
= hashCurrent(hi
,REDIS_HASH_KEY
);
6738 addReplyBulk(c
,obj
);
6742 if (flags
& REDIS_HASH_VALUE
) {
6743 obj
= hashCurrent(hi
,REDIS_HASH_VALUE
);
6744 addReplyBulk(c
,obj
);
6749 hashReleaseIterator(hi
);
6751 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
6754 static void hkeysCommand(redisClient
*c
) {
6755 genericHgetallCommand(c
,REDIS_HASH_KEY
);
6758 static void hvalsCommand(redisClient
*c
) {
6759 genericHgetallCommand(c
,REDIS_HASH_VALUE
);
6762 static void hgetallCommand(redisClient
*c
) {
6763 genericHgetallCommand(c
,REDIS_HASH_KEY
|REDIS_HASH_VALUE
);
6766 static void hexistsCommand(redisClient
*c
) {
6768 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6769 checkType(c
,o
,REDIS_HASH
)) return;
6771 addReply(c
, hashExists(o
,c
->argv
[2]) ? shared
.cone
: shared
.czero
);
6774 static void convertToRealHash(robj
*o
) {
6775 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
6776 unsigned int klen
, vlen
;
6777 dict
*dict
= dictCreate(&hashDictType
,NULL
);
6779 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
6780 p
= zipmapRewind(zm
);
6781 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
6782 robj
*keyobj
, *valobj
;
6784 keyobj
= createStringObject((char*)key
,klen
);
6785 valobj
= createStringObject((char*)val
,vlen
);
6786 keyobj
= tryObjectEncoding(keyobj
);
6787 valobj
= tryObjectEncoding(valobj
);
6788 dictAdd(dict
,keyobj
,valobj
);
6790 o
->encoding
= REDIS_ENCODING_HT
;
6795 /* ========================= Non type-specific commands ==================== */
6797 static void flushdbCommand(redisClient
*c
) {
6798 server
.dirty
+= dictSize(c
->db
->dict
);
6799 touchWatchedKeysOnFlush(c
->db
->id
);
6800 dictEmpty(c
->db
->dict
);
6801 dictEmpty(c
->db
->expires
);
6802 addReply(c
,shared
.ok
);
6805 static void flushallCommand(redisClient
*c
) {
6806 touchWatchedKeysOnFlush(-1);
6807 server
.dirty
+= emptyDb();
6808 addReply(c
,shared
.ok
);
6809 if (server
.bgsavechildpid
!= -1) {
6810 kill(server
.bgsavechildpid
,SIGKILL
);
6811 rdbRemoveTempFile(server
.bgsavechildpid
);
6813 rdbSave(server
.dbfilename
);
6817 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
6818 redisSortOperation
*so
= zmalloc(sizeof(*so
));
6820 so
->pattern
= pattern
;
6824 /* Return the value associated to the key with a name obtained
6825 * substituting the first occurence of '*' in 'pattern' with 'subst'.
6826 * The returned object will always have its refcount increased by 1
6827 * when it is non-NULL. */
6828 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
6831 robj keyobj
, fieldobj
, *o
;
6832 int prefixlen
, sublen
, postfixlen
, fieldlen
;
6833 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
6837 char buf
[REDIS_SORTKEY_MAX
+1];
6838 } keyname
, fieldname
;
6840 /* If the pattern is "#" return the substitution object itself in order
6841 * to implement the "SORT ... GET #" feature. */
6842 spat
= pattern
->ptr
;
6843 if (spat
[0] == '#' && spat
[1] == '\0') {
6844 incrRefCount(subst
);
6848 /* The substitution object may be specially encoded. If so we create
6849 * a decoded object on the fly. Otherwise getDecodedObject will just
6850 * increment the ref count, that we'll decrement later. */
6851 subst
= getDecodedObject(subst
);
6854 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
6855 p
= strchr(spat
,'*');
6857 decrRefCount(subst
);
6861 /* Find out if we're dealing with a hash dereference. */
6862 if ((f
= strstr(p
+1, "->")) != NULL
) {
6863 fieldlen
= sdslen(spat
)-(f
-spat
);
6864 /* this also copies \0 character */
6865 memcpy(fieldname
.buf
,f
+2,fieldlen
-1);
6866 fieldname
.len
= fieldlen
-2;
6872 sublen
= sdslen(ssub
);
6873 postfixlen
= sdslen(spat
)-(prefixlen
+1)-fieldlen
;
6874 memcpy(keyname
.buf
,spat
,prefixlen
);
6875 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
6876 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
6877 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
6878 keyname
.len
= prefixlen
+sublen
+postfixlen
;
6879 decrRefCount(subst
);
6881 /* Lookup substituted key */
6882 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2));
6883 o
= lookupKeyRead(db
,&keyobj
);
6884 if (o
== NULL
) return NULL
;
6887 if (o
->type
!= REDIS_HASH
|| fieldname
.len
< 1) return NULL
;
6889 /* Retrieve value from hash by the field name. This operation
6890 * already increases the refcount of the returned object. */
6891 initStaticStringObject(fieldobj
,((char*)&fieldname
)+(sizeof(long)*2));
6892 o
= hashGet(o
, &fieldobj
);
6894 if (o
->type
!= REDIS_STRING
) return NULL
;
6896 /* Every object that this function returns needs to have its refcount
6897 * increased. sortCommand decreases it again. */
6904 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
6905 * the additional parameter is not standard but a BSD-specific we have to
6906 * pass sorting parameters via the global 'server' structure */
6907 static int sortCompare(const void *s1
, const void *s2
) {
6908 const redisSortObject
*so1
= s1
, *so2
= s2
;
6911 if (!server
.sort_alpha
) {
6912 /* Numeric sorting. Here it's trivial as we precomputed scores */
6913 if (so1
->u
.score
> so2
->u
.score
) {
6915 } else if (so1
->u
.score
< so2
->u
.score
) {
6921 /* Alphanumeric sorting */
6922 if (server
.sort_bypattern
) {
6923 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
6924 /* At least one compare object is NULL */
6925 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
6927 else if (so1
->u
.cmpobj
== NULL
)
6932 /* We have both the objects, use strcoll */
6933 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
6936 /* Compare elements directly. */
6937 cmp
= compareStringObjects(so1
->obj
,so2
->obj
);
6940 return server
.sort_desc
? -cmp
: cmp
;
6943 /* The SORT command is the most complex command in Redis. Warning: this code
6944 * is optimized for speed and a bit less for readability */
6945 static void sortCommand(redisClient
*c
) {
6948 int desc
= 0, alpha
= 0;
6949 int limit_start
= 0, limit_count
= -1, start
, end
;
6950 int j
, dontsort
= 0, vectorlen
;
6951 int getop
= 0; /* GET operation counter */
6952 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
6953 redisSortObject
*vector
; /* Resulting vector to sort */
6955 /* Lookup the key to sort. It must be of the right types */
6956 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
6957 if (sortval
== NULL
) {
6958 addReply(c
,shared
.emptymultibulk
);
6961 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
6962 sortval
->type
!= REDIS_ZSET
)
6964 addReply(c
,shared
.wrongtypeerr
);
6968 /* Create a list of operations to perform for every sorted element.
6969 * Operations can be GET/DEL/INCR/DECR */
6970 operations
= listCreate();
6971 listSetFreeMethod(operations
,zfree
);
6974 /* Now we need to protect sortval incrementing its count, in the future
6975 * SORT may have options able to overwrite/delete keys during the sorting
6976 * and the sorted key itself may get destroied */
6977 incrRefCount(sortval
);
6979 /* The SORT command has an SQL-alike syntax, parse it */
6980 while(j
< c
->argc
) {
6981 int leftargs
= c
->argc
-j
-1;
6982 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
6984 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
6986 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
6988 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
6989 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
6990 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
6992 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
6993 storekey
= c
->argv
[j
+1];
6995 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
6996 sortby
= c
->argv
[j
+1];
6997 /* If the BY pattern does not contain '*', i.e. it is constant,
6998 * we don't need to sort nor to lookup the weight keys. */
6999 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
7001 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
7002 listAddNodeTail(operations
,createSortOperation(
7003 REDIS_SORT_GET
,c
->argv
[j
+1]));
7007 decrRefCount(sortval
);
7008 listRelease(operations
);
7009 addReply(c
,shared
.syntaxerr
);
7015 /* Load the sorting vector with all the objects to sort */
7016 switch(sortval
->type
) {
7017 case REDIS_LIST
: vectorlen
= listLength((list
*)sortval
->ptr
); break;
7018 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
7019 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
7020 default: vectorlen
= 0; redisPanic("Bad SORT type"); /* Avoid GCC warning */
7022 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
7025 if (sortval
->type
== REDIS_LIST
) {
7026 list
*list
= sortval
->ptr
;
7030 listRewind(list
,&li
);
7031 while((ln
= listNext(&li
))) {
7032 robj
*ele
= ln
->value
;
7033 vector
[j
].obj
= ele
;
7034 vector
[j
].u
.score
= 0;
7035 vector
[j
].u
.cmpobj
= NULL
;
7043 if (sortval
->type
== REDIS_SET
) {
7046 zset
*zs
= sortval
->ptr
;
7050 di
= dictGetIterator(set
);
7051 while((setele
= dictNext(di
)) != NULL
) {
7052 vector
[j
].obj
= dictGetEntryKey(setele
);
7053 vector
[j
].u
.score
= 0;
7054 vector
[j
].u
.cmpobj
= NULL
;
7057 dictReleaseIterator(di
);
7059 redisAssert(j
== vectorlen
);
7061 /* Now it's time to load the right scores in the sorting vector */
7062 if (dontsort
== 0) {
7063 for (j
= 0; j
< vectorlen
; j
++) {
7066 /* lookup value to sort by */
7067 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
7068 if (!byval
) continue;
7070 /* use object itself to sort by */
7071 byval
= vector
[j
].obj
;
7075 if (sortby
) vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
7077 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
7078 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
7079 } else if (byval
->encoding
== REDIS_ENCODING_INT
) {
7080 /* Don't need to decode the object if it's
7081 * integer-encoded (the only encoding supported) so
7082 * far. We can just cast it */
7083 vector
[j
].u
.score
= (long)byval
->ptr
;
7085 redisAssert(1 != 1);
7089 /* when the object was retrieved using lookupKeyByPattern,
7090 * its refcount needs to be decreased. */
7092 decrRefCount(byval
);
7097 /* We are ready to sort the vector... perform a bit of sanity check
7098 * on the LIMIT option too. We'll use a partial version of quicksort. */
7099 start
= (limit_start
< 0) ? 0 : limit_start
;
7100 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
7101 if (start
>= vectorlen
) {
7102 start
= vectorlen
-1;
7105 if (end
>= vectorlen
) end
= vectorlen
-1;
7107 if (dontsort
== 0) {
7108 server
.sort_desc
= desc
;
7109 server
.sort_alpha
= alpha
;
7110 server
.sort_bypattern
= sortby
? 1 : 0;
7111 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
7112 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
7114 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
7117 /* Send command output to the output buffer, performing the specified
7118 * GET/DEL/INCR/DECR operations if any. */
7119 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
7120 if (storekey
== NULL
) {
7121 /* STORE option not specified, sent the sorting result to client */
7122 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
7123 for (j
= start
; j
<= end
; j
++) {
7127 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
7128 listRewind(operations
,&li
);
7129 while((ln
= listNext(&li
))) {
7130 redisSortOperation
*sop
= ln
->value
;
7131 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
7134 if (sop
->type
== REDIS_SORT_GET
) {
7136 addReply(c
,shared
.nullbulk
);
7138 addReplyBulk(c
,val
);
7142 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
7147 robj
*listObject
= createListObject();
7148 list
*listPtr
= (list
*) listObject
->ptr
;
7150 /* STORE option specified, set the sorting result as a List object */
7151 for (j
= start
; j
<= end
; j
++) {
7156 listAddNodeTail(listPtr
,vector
[j
].obj
);
7157 incrRefCount(vector
[j
].obj
);
7159 listRewind(operations
,&li
);
7160 while((ln
= listNext(&li
))) {
7161 redisSortOperation
*sop
= ln
->value
;
7162 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
7165 if (sop
->type
== REDIS_SORT_GET
) {
7167 listAddNodeTail(listPtr
,createStringObject("",0));
7169 /* We should do a incrRefCount on val because it is
7170 * added to the list, but also a decrRefCount because
7171 * it is returned by lookupKeyByPattern. This results
7172 * in doing nothing at all. */
7173 listAddNodeTail(listPtr
,val
);
7176 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
7180 if (dictReplace(c
->db
->dict
,storekey
,listObject
)) {
7181 incrRefCount(storekey
);
7183 /* Note: we add 1 because the DB is dirty anyway since even if the
7184 * SORT result is empty a new key is set and maybe the old content
7186 server
.dirty
+= 1+outputlen
;
7187 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
7191 decrRefCount(sortval
);
7192 listRelease(operations
);
7193 for (j
= 0; j
< vectorlen
; j
++) {
7194 if (alpha
&& vector
[j
].u
.cmpobj
)
7195 decrRefCount(vector
[j
].u
.cmpobj
);
7200 /* Convert an amount of bytes into a human readable string in the form
7201 * of 100B, 2G, 100M, 4K, and so forth. */
7202 static void bytesToHuman(char *s
, unsigned long long n
) {
7207 sprintf(s
,"%lluB",n
);
7209 } else if (n
< (1024*1024)) {
7210 d
= (double)n
/(1024);
7211 sprintf(s
,"%.2fK",d
);
7212 } else if (n
< (1024LL*1024*1024)) {
7213 d
= (double)n
/(1024*1024);
7214 sprintf(s
,"%.2fM",d
);
7215 } else if (n
< (1024LL*1024*1024*1024)) {
7216 d
= (double)n
/(1024LL*1024*1024);
7217 sprintf(s
,"%.2fG",d
);
7221 /* Create the string returned by the INFO command. This is decoupled
7222 * by the INFO command itself as we need to report the same information
7223 * on memory corruption problems. */
7224 static sds
genRedisInfoString(void) {
7226 time_t uptime
= time(NULL
)-server
.stat_starttime
;
7230 bytesToHuman(hmem
,zmalloc_used_memory());
7231 info
= sdscatprintf(sdsempty(),
7232 "redis_version:%s\r\n"
7233 "redis_git_sha1:%s\r\n"
7234 "redis_git_dirty:%d\r\n"
7236 "multiplexing_api:%s\r\n"
7237 "process_id:%ld\r\n"
7238 "uptime_in_seconds:%ld\r\n"
7239 "uptime_in_days:%ld\r\n"
7240 "connected_clients:%d\r\n"
7241 "connected_slaves:%d\r\n"
7242 "blocked_clients:%d\r\n"
7243 "used_memory:%zu\r\n"
7244 "used_memory_human:%s\r\n"
7245 "changes_since_last_save:%lld\r\n"
7246 "bgsave_in_progress:%d\r\n"
7247 "last_save_time:%ld\r\n"
7248 "bgrewriteaof_in_progress:%d\r\n"
7249 "total_connections_received:%lld\r\n"
7250 "total_commands_processed:%lld\r\n"
7251 "expired_keys:%lld\r\n"
7252 "hash_max_zipmap_entries:%zu\r\n"
7253 "hash_max_zipmap_value:%zu\r\n"
7254 "pubsub_channels:%ld\r\n"
7255 "pubsub_patterns:%u\r\n"
7260 strtol(REDIS_GIT_DIRTY
,NULL
,10) > 0,
7261 (sizeof(long) == 8) ? "64" : "32",
7266 listLength(server
.clients
)-listLength(server
.slaves
),
7267 listLength(server
.slaves
),
7268 server
.blpop_blocked_clients
,
7269 zmalloc_used_memory(),
7272 server
.bgsavechildpid
!= -1,
7274 server
.bgrewritechildpid
!= -1,
7275 server
.stat_numconnections
,
7276 server
.stat_numcommands
,
7277 server
.stat_expiredkeys
,
7278 server
.hash_max_zipmap_entries
,
7279 server
.hash_max_zipmap_value
,
7280 dictSize(server
.pubsub_channels
),
7281 listLength(server
.pubsub_patterns
),
7282 server
.vm_enabled
!= 0,
7283 server
.masterhost
== NULL
? "master" : "slave"
7285 if (server
.masterhost
) {
7286 info
= sdscatprintf(info
,
7287 "master_host:%s\r\n"
7288 "master_port:%d\r\n"
7289 "master_link_status:%s\r\n"
7290 "master_last_io_seconds_ago:%d\r\n"
7293 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
7295 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
7298 if (server
.vm_enabled
) {
7300 info
= sdscatprintf(info
,
7301 "vm_conf_max_memory:%llu\r\n"
7302 "vm_conf_page_size:%llu\r\n"
7303 "vm_conf_pages:%llu\r\n"
7304 "vm_stats_used_pages:%llu\r\n"
7305 "vm_stats_swapped_objects:%llu\r\n"
7306 "vm_stats_swappin_count:%llu\r\n"
7307 "vm_stats_swappout_count:%llu\r\n"
7308 "vm_stats_io_newjobs_len:%lu\r\n"
7309 "vm_stats_io_processing_len:%lu\r\n"
7310 "vm_stats_io_processed_len:%lu\r\n"
7311 "vm_stats_io_active_threads:%lu\r\n"
7312 "vm_stats_blocked_clients:%lu\r\n"
7313 ,(unsigned long long) server
.vm_max_memory
,
7314 (unsigned long long) server
.vm_page_size
,
7315 (unsigned long long) server
.vm_pages
,
7316 (unsigned long long) server
.vm_stats_used_pages
,
7317 (unsigned long long) server
.vm_stats_swapped_objects
,
7318 (unsigned long long) server
.vm_stats_swapins
,
7319 (unsigned long long) server
.vm_stats_swapouts
,
7320 (unsigned long) listLength(server
.io_newjobs
),
7321 (unsigned long) listLength(server
.io_processing
),
7322 (unsigned long) listLength(server
.io_processed
),
7323 (unsigned long) server
.io_active_threads
,
7324 (unsigned long) server
.vm_blocked_clients
7328 for (j
= 0; j
< server
.dbnum
; j
++) {
7329 long long keys
, vkeys
;
7331 keys
= dictSize(server
.db
[j
].dict
);
7332 vkeys
= dictSize(server
.db
[j
].expires
);
7333 if (keys
|| vkeys
) {
7334 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
7341 static void infoCommand(redisClient
*c
) {
7342 sds info
= genRedisInfoString();
7343 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
7344 (unsigned long)sdslen(info
)));
7345 addReplySds(c
,info
);
7346 addReply(c
,shared
.crlf
);
7349 static void monitorCommand(redisClient
*c
) {
7350 /* ignore MONITOR if aleady slave or in monitor mode */
7351 if (c
->flags
& REDIS_SLAVE
) return;
7353 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
7355 listAddNodeTail(server
.monitors
,c
);
7356 addReply(c
,shared
.ok
);
7359 /* ================================= Expire ================================= */
7360 static int removeExpire(redisDb
*db
, robj
*key
) {
7361 if (dictDelete(db
->expires
,key
) == DICT_OK
) {
7368 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
7369 if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) {
7377 /* Return the expire time of the specified key, or -1 if no expire
7378 * is associated with this key (i.e. the key is non volatile) */
7379 static time_t getExpire(redisDb
*db
, robj
*key
) {
7382 /* No expire? return ASAP */
7383 if (dictSize(db
->expires
) == 0 ||
7384 (de
= dictFind(db
->expires
,key
)) == NULL
) return -1;
7386 return (time_t) dictGetEntryVal(de
);
7389 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
7393 /* No expire? return ASAP */
7394 if (dictSize(db
->expires
) == 0 ||
7395 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7397 /* Lookup the expire */
7398 when
= (time_t) dictGetEntryVal(de
);
7399 if (time(NULL
) <= when
) return 0;
7401 /* Delete the key */
7402 dictDelete(db
->expires
,key
);
7403 server
.stat_expiredkeys
++;
7404 return dictDelete(db
->dict
,key
) == DICT_OK
;
7407 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
7410 /* No expire? return ASAP */
7411 if (dictSize(db
->expires
) == 0 ||
7412 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7414 /* Delete the key */
7416 server
.stat_expiredkeys
++;
7417 dictDelete(db
->expires
,key
);
7418 return dictDelete(db
->dict
,key
) == DICT_OK
;
7421 static void expireGenericCommand(redisClient
*c
, robj
*key
, robj
*param
, long offset
) {
7425 if (getLongFromObjectOrReply(c
, param
, &seconds
, NULL
) != REDIS_OK
) return;
7429 de
= dictFind(c
->db
->dict
,key
);
7431 addReply(c
,shared
.czero
);
7435 if (deleteKey(c
->db
,key
)) server
.dirty
++;
7436 addReply(c
, shared
.cone
);
7439 time_t when
= time(NULL
)+seconds
;
7440 if (setExpire(c
->db
,key
,when
)) {
7441 addReply(c
,shared
.cone
);
7444 addReply(c
,shared
.czero
);
7450 static void expireCommand(redisClient
*c
) {
7451 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],0);
7454 static void expireatCommand(redisClient
*c
) {
7455 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],time(NULL
));
7458 static void ttlCommand(redisClient
*c
) {
7462 expire
= getExpire(c
->db
,c
->argv
[1]);
7464 ttl
= (int) (expire
-time(NULL
));
7465 if (ttl
< 0) ttl
= -1;
7467 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
7470 /* ================================ MULTI/EXEC ============================== */
7472 /* Client state initialization for MULTI/EXEC */
7473 static void initClientMultiState(redisClient
*c
) {
7474 c
->mstate
.commands
= NULL
;
7475 c
->mstate
.count
= 0;
7478 /* Release all the resources associated with MULTI/EXEC state */
7479 static void freeClientMultiState(redisClient
*c
) {
7482 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7484 multiCmd
*mc
= c
->mstate
.commands
+j
;
7486 for (i
= 0; i
< mc
->argc
; i
++)
7487 decrRefCount(mc
->argv
[i
]);
7490 zfree(c
->mstate
.commands
);
7493 /* Add a new command into the MULTI commands queue */
7494 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
7498 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
7499 sizeof(multiCmd
)*(c
->mstate
.count
+1));
7500 mc
= c
->mstate
.commands
+c
->mstate
.count
;
7503 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
7504 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
7505 for (j
= 0; j
< c
->argc
; j
++)
7506 incrRefCount(mc
->argv
[j
]);
7510 static void multiCommand(redisClient
*c
) {
7511 if (c
->flags
& REDIS_MULTI
) {
7512 addReplySds(c
,sdsnew("-ERR MULTI calls can not be nested\r\n"));
7515 c
->flags
|= REDIS_MULTI
;
7516 addReply(c
,shared
.ok
);
7519 static void discardCommand(redisClient
*c
) {
7520 if (!(c
->flags
& REDIS_MULTI
)) {
7521 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
7525 freeClientMultiState(c
);
7526 initClientMultiState(c
);
7527 c
->flags
&= (~REDIS_MULTI
);
7528 addReply(c
,shared
.ok
);
7531 /* Send a MULTI command to all the slaves and AOF file. Check the execCommand
7532 * implememntation for more information. */
7533 static void execCommandReplicateMulti(redisClient
*c
) {
7534 struct redisCommand
*cmd
;
7535 robj
*multistring
= createStringObject("MULTI",5);
7537 cmd
= lookupCommand("multi");
7538 if (server
.appendonly
)
7539 feedAppendOnlyFile(cmd
,c
->db
->id
,&multistring
,1);
7540 if (listLength(server
.slaves
))
7541 replicationFeedSlaves(server
.slaves
,c
->db
->id
,&multistring
,1);
7542 decrRefCount(multistring
);
7545 static void execCommand(redisClient
*c
) {
7550 if (!(c
->flags
& REDIS_MULTI
)) {
7551 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
7555 /* Check if we need to abort the EXEC if some WATCHed key was touched.
7556 * A failed EXEC will return a multi bulk nil object. */
7557 if (c
->flags
& REDIS_DIRTY_CAS
) {
7558 freeClientMultiState(c
);
7559 initClientMultiState(c
);
7560 c
->flags
&= ~(REDIS_MULTI
|REDIS_DIRTY_CAS
);
7562 addReply(c
,shared
.nullmultibulk
);
7566 /* Replicate a MULTI request now that we are sure the block is executed.
7567 * This way we'll deliver the MULTI/..../EXEC block as a whole and
7568 * both the AOF and the replication link will have the same consistency
7569 * and atomicity guarantees. */
7570 execCommandReplicateMulti(c
);
7572 /* Exec all the queued commands */
7573 unwatchAllKeys(c
); /* Unwatch ASAP otherwise we'll waste CPU cycles */
7574 orig_argv
= c
->argv
;
7575 orig_argc
= c
->argc
;
7576 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
7577 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7578 c
->argc
= c
->mstate
.commands
[j
].argc
;
7579 c
->argv
= c
->mstate
.commands
[j
].argv
;
7580 call(c
,c
->mstate
.commands
[j
].cmd
);
7582 c
->argv
= orig_argv
;
7583 c
->argc
= orig_argc
;
7584 freeClientMultiState(c
);
7585 initClientMultiState(c
);
7586 c
->flags
&= ~(REDIS_MULTI
|REDIS_DIRTY_CAS
);
7587 /* Make sure the EXEC command is always replicated / AOF, since we
7588 * always send the MULTI command (we can't know beforehand if the
7589 * next operations will contain at least a modification to the DB). */
7593 /* =========================== Blocking Operations ========================= */
7595 /* Currently Redis blocking operations support is limited to list POP ops,
7596 * so the current implementation is not fully generic, but it is also not
7597 * completely specific so it will not require a rewrite to support new
7598 * kind of blocking operations in the future.
7600 * Still it's important to note that list blocking operations can be already
7601 * used as a notification mechanism in order to implement other blocking
7602 * operations at application level, so there must be a very strong evidence
7603 * of usefulness and generality before new blocking operations are implemented.
7605 * This is how the current blocking POP works, we use BLPOP as example:
7606 * - If the user calls BLPOP and the key exists and contains a non empty list
7607 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
7608 * if there is not to block.
7609 * - If instead BLPOP is called and the key does not exists or the list is
7610 * empty we need to block. In order to do so we remove the notification for
7611 * new data to read in the client socket (so that we'll not serve new
7612 * requests if the blocking request is not served). Also we put the client
7613 * in a dictionary (db->blocking_keys) mapping keys to a list of clients
7614 * blocking for this keys.
7615 * - If a PUSH operation against a key with blocked clients waiting is
7616 * performed, we serve the first in the list: basically instead to push
7617 * the new element inside the list we return it to the (first / oldest)
7618 * blocking client, unblock the client, and remove it form the list.
7620 * The above comment and the source code should be enough in order to understand
7621 * the implementation and modify / fix it later.
7624 /* Set a client in blocking mode for the specified key, with the specified
7626 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
7631 c
->blocking_keys
= zmalloc(sizeof(robj
*)*numkeys
);
7632 c
->blocking_keys_num
= numkeys
;
7633 c
->blockingto
= timeout
;
7634 for (j
= 0; j
< numkeys
; j
++) {
7635 /* Add the key in the client structure, to map clients -> keys */
7636 c
->blocking_keys
[j
] = keys
[j
];
7637 incrRefCount(keys
[j
]);
7639 /* And in the other "side", to map keys -> clients */
7640 de
= dictFind(c
->db
->blocking_keys
,keys
[j
]);
7644 /* For every key we take a list of clients blocked for it */
7646 retval
= dictAdd(c
->db
->blocking_keys
,keys
[j
],l
);
7647 incrRefCount(keys
[j
]);
7648 assert(retval
== DICT_OK
);
7650 l
= dictGetEntryVal(de
);
7652 listAddNodeTail(l
,c
);
7654 /* Mark the client as a blocked client */
7655 c
->flags
|= REDIS_BLOCKED
;
7656 server
.blpop_blocked_clients
++;
7659 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
7660 static void unblockClientWaitingData(redisClient
*c
) {
7665 assert(c
->blocking_keys
!= NULL
);
7666 /* The client may wait for multiple keys, so unblock it for every key. */
7667 for (j
= 0; j
< c
->blocking_keys_num
; j
++) {
7668 /* Remove this client from the list of clients waiting for this key. */
7669 de
= dictFind(c
->db
->blocking_keys
,c
->blocking_keys
[j
]);
7671 l
= dictGetEntryVal(de
);
7672 listDelNode(l
,listSearchKey(l
,c
));
7673 /* If the list is empty we need to remove it to avoid wasting memory */
7674 if (listLength(l
) == 0)
7675 dictDelete(c
->db
->blocking_keys
,c
->blocking_keys
[j
]);
7676 decrRefCount(c
->blocking_keys
[j
]);
7678 /* Cleanup the client structure */
7679 zfree(c
->blocking_keys
);
7680 c
->blocking_keys
= NULL
;
7681 c
->flags
&= (~REDIS_BLOCKED
);
7682 server
.blpop_blocked_clients
--;
7683 /* We want to process data if there is some command waiting
7684 * in the input buffer. Note that this is safe even if
7685 * unblockClientWaitingData() gets called from freeClient() because
7686 * freeClient() will be smart enough to call this function
7687 * *after* c->querybuf was set to NULL. */
7688 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
7691 /* This should be called from any function PUSHing into lists.
7692 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
7693 * 'ele' is the element pushed.
7695 * If the function returns 0 there was no client waiting for a list push
7698 * If the function returns 1 there was a client waiting for a list push
7699 * against this key, the element was passed to this client thus it's not
7700 * needed to actually add it to the list and the caller should return asap. */
7701 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
7702 struct dictEntry
*de
;
7703 redisClient
*receiver
;
7707 de
= dictFind(c
->db
->blocking_keys
,key
);
7708 if (de
== NULL
) return 0;
7709 l
= dictGetEntryVal(de
);
7712 receiver
= ln
->value
;
7714 addReplySds(receiver
,sdsnew("*2\r\n"));
7715 addReplyBulk(receiver
,key
);
7716 addReplyBulk(receiver
,ele
);
7717 unblockClientWaitingData(receiver
);
7721 /* Blocking RPOP/LPOP */
7722 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
7727 for (j
= 1; j
< c
->argc
-1; j
++) {
7728 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
7730 if (o
->type
!= REDIS_LIST
) {
7731 addReply(c
,shared
.wrongtypeerr
);
7734 list
*list
= o
->ptr
;
7735 if (listLength(list
) != 0) {
7736 /* If the list contains elements fall back to the usual
7737 * non-blocking POP operation */
7738 robj
*argv
[2], **orig_argv
;
7741 /* We need to alter the command arguments before to call
7742 * popGenericCommand() as the command takes a single key. */
7743 orig_argv
= c
->argv
;
7744 orig_argc
= c
->argc
;
7745 argv
[1] = c
->argv
[j
];
7749 /* Also the return value is different, we need to output
7750 * the multi bulk reply header and the key name. The
7751 * "real" command will add the last element (the value)
7752 * for us. If this souds like an hack to you it's just
7753 * because it is... */
7754 addReplySds(c
,sdsnew("*2\r\n"));
7755 addReplyBulk(c
,argv
[1]);
7756 popGenericCommand(c
,where
);
7758 /* Fix the client structure with the original stuff */
7759 c
->argv
= orig_argv
;
7760 c
->argc
= orig_argc
;
7766 /* If the list is empty or the key does not exists we must block */
7767 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
7768 if (timeout
> 0) timeout
+= time(NULL
);
7769 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
7772 static void blpopCommand(redisClient
*c
) {
7773 blockingPopGenericCommand(c
,REDIS_HEAD
);
7776 static void brpopCommand(redisClient
*c
) {
7777 blockingPopGenericCommand(c
,REDIS_TAIL
);
7780 /* =============================== Replication ============================= */
7782 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7783 ssize_t nwritten
, ret
= size
;
7784 time_t start
= time(NULL
);
7788 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
7789 nwritten
= write(fd
,ptr
,size
);
7790 if (nwritten
== -1) return -1;
7794 if ((time(NULL
)-start
) > timeout
) {
7802 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7803 ssize_t nread
, totread
= 0;
7804 time_t start
= time(NULL
);
7808 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
7809 nread
= read(fd
,ptr
,size
);
7810 if (nread
== -1) return -1;
7815 if ((time(NULL
)-start
) > timeout
) {
7823 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7830 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
7833 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
7844 static void syncCommand(redisClient
*c
) {
7845 /* ignore SYNC if aleady slave or in monitor mode */
7846 if (c
->flags
& REDIS_SLAVE
) return;
7848 /* SYNC can't be issued when the server has pending data to send to
7849 * the client about already issued commands. We need a fresh reply
7850 * buffer registering the differences between the BGSAVE and the current
7851 * dataset, so that we can copy to other slaves if needed. */
7852 if (listLength(c
->reply
) != 0) {
7853 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
7857 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
7858 /* Here we need to check if there is a background saving operation
7859 * in progress, or if it is required to start one */
7860 if (server
.bgsavechildpid
!= -1) {
7861 /* Ok a background save is in progress. Let's check if it is a good
7862 * one for replication, i.e. if there is another slave that is
7863 * registering differences since the server forked to save */
7868 listRewind(server
.slaves
,&li
);
7869 while((ln
= listNext(&li
))) {
7871 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
7874 /* Perfect, the server is already registering differences for
7875 * another slave. Set the right state, and copy the buffer. */
7876 listRelease(c
->reply
);
7877 c
->reply
= listDup(slave
->reply
);
7878 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7879 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
7881 /* No way, we need to wait for the next BGSAVE in order to
7882 * register differences */
7883 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7884 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
7887 /* Ok we don't have a BGSAVE in progress, let's start one */
7888 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
7889 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7890 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
7891 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
7894 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7897 c
->flags
|= REDIS_SLAVE
;
7899 listAddNodeTail(server
.slaves
,c
);
7903 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
7904 redisClient
*slave
= privdata
;
7906 REDIS_NOTUSED(mask
);
7907 char buf
[REDIS_IOBUF_LEN
];
7908 ssize_t nwritten
, buflen
;
7910 if (slave
->repldboff
== 0) {
7911 /* Write the bulk write count before to transfer the DB. In theory here
7912 * we don't know how much room there is in the output buffer of the
7913 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
7914 * operations) will never be smaller than the few bytes we need. */
7917 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
7919 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
7927 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
7928 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
7930 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
7931 (buflen
== 0) ? "premature EOF" : strerror(errno
));
7935 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
7936 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
7941 slave
->repldboff
+= nwritten
;
7942 if (slave
->repldboff
== slave
->repldbsize
) {
7943 close(slave
->repldbfd
);
7944 slave
->repldbfd
= -1;
7945 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7946 slave
->replstate
= REDIS_REPL_ONLINE
;
7947 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
7948 sendReplyToClient
, slave
) == AE_ERR
) {
7952 addReplySds(slave
,sdsempty());
7953 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
7957 /* This function is called at the end of every backgrond saving.
7958 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
7959 * otherwise REDIS_ERR is passed to the function.
7961 * The goal of this function is to handle slaves waiting for a successful
7962 * background saving in order to perform non-blocking synchronization. */
7963 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
7965 int startbgsave
= 0;
7968 listRewind(server
.slaves
,&li
);
7969 while((ln
= listNext(&li
))) {
7970 redisClient
*slave
= ln
->value
;
7972 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
7974 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7975 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
7976 struct redis_stat buf
;
7978 if (bgsaveerr
!= REDIS_OK
) {
7980 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
7983 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
7984 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
7986 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
7989 slave
->repldboff
= 0;
7990 slave
->repldbsize
= buf
.st_size
;
7991 slave
->replstate
= REDIS_REPL_SEND_BULK
;
7992 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7993 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
8000 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
8003 listRewind(server
.slaves
,&li
);
8004 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
8005 while((ln
= listNext(&li
))) {
8006 redisClient
*slave
= ln
->value
;
8008 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
8015 static int syncWithMaster(void) {
8016 char buf
[1024], tmpfile
[256], authcmd
[1024];
8018 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
8019 int dfd
, maxtries
= 5;
8022 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
8027 /* AUTH with the master if required. */
8028 if(server
.masterauth
) {
8029 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
8030 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
8032 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
8036 /* Read the AUTH result. */
8037 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
8039 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
8043 if (buf
[0] != '+') {
8045 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
8050 /* Issue the SYNC command */
8051 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
8053 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
8057 /* Read the bulk write count */
8058 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
8060 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
8064 if (buf
[0] != '$') {
8066 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
8069 dumpsize
= strtol(buf
+1,NULL
,10);
8070 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
8071 /* Read the bulk write data on a temp file */
8073 snprintf(tmpfile
,256,
8074 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
8075 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
8076 if (dfd
!= -1) break;
8081 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
8085 int nread
, nwritten
;
8087 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
8089 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
8095 nwritten
= write(dfd
,buf
,nread
);
8096 if (nwritten
== -1) {
8097 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
8105 if (rename(tmpfile
,server
.dbfilename
) == -1) {
8106 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
8112 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
8113 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
8117 server
.master
= createClient(fd
);
8118 server
.master
->flags
|= REDIS_MASTER
;
8119 server
.master
->authenticated
= 1;
8120 server
.replstate
= REDIS_REPL_CONNECTED
;
8124 static void slaveofCommand(redisClient
*c
) {
8125 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
8126 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
8127 if (server
.masterhost
) {
8128 sdsfree(server
.masterhost
);
8129 server
.masterhost
= NULL
;
8130 if (server
.master
) freeClient(server
.master
);
8131 server
.replstate
= REDIS_REPL_NONE
;
8132 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
8135 sdsfree(server
.masterhost
);
8136 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
8137 server
.masterport
= atoi(c
->argv
[2]->ptr
);
8138 if (server
.master
) freeClient(server
.master
);
8139 server
.replstate
= REDIS_REPL_CONNECT
;
8140 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
8141 server
.masterhost
, server
.masterport
);
8143 addReply(c
,shared
.ok
);
8146 /* ============================ Maxmemory directive ======================== */
8148 /* Try to free one object form the pre-allocated objects free list.
8149 * This is useful under low mem conditions as by default we take 1 million
8150 * free objects allocated. On success REDIS_OK is returned, otherwise
8152 static int tryFreeOneObjectFromFreelist(void) {
8155 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
8156 if (listLength(server
.objfreelist
)) {
8157 listNode
*head
= listFirst(server
.objfreelist
);
8158 o
= listNodeValue(head
);
8159 listDelNode(server
.objfreelist
,head
);
8160 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
8164 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
8169 /* This function gets called when 'maxmemory' is set on the config file to limit
8170 * the max memory used by the server, and we are out of memory.
8171 * This function will try to, in order:
8173 * - Free objects from the free list
8174 * - Try to remove keys with an EXPIRE set
8176 * It is not possible to free enough memory to reach used-memory < maxmemory
8177 * the server will start refusing commands that will enlarge even more the
8180 static void freeMemoryIfNeeded(void) {
8181 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
8182 int j
, k
, freed
= 0;
8184 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
8185 for (j
= 0; j
< server
.dbnum
; j
++) {
8187 robj
*minkey
= NULL
;
8188 struct dictEntry
*de
;
8190 if (dictSize(server
.db
[j
].expires
)) {
8192 /* From a sample of three keys drop the one nearest to
8193 * the natural expire */
8194 for (k
= 0; k
< 3; k
++) {
8197 de
= dictGetRandomKey(server
.db
[j
].expires
);
8198 t
= (time_t) dictGetEntryVal(de
);
8199 if (minttl
== -1 || t
< minttl
) {
8200 minkey
= dictGetEntryKey(de
);
8204 deleteKey(server
.db
+j
,minkey
);
8207 if (!freed
) return; /* nothing to free... */
8211 /* ============================== Append Only file ========================== */
8213 /* Write the append only file buffer on disk.
8215 * Since we are required to write the AOF before replying to the client,
8216 * and the only way the client socket can get a write is entering when the
8217 * the event loop, we accumulate all the AOF writes in a memory
8218 * buffer and write it on disk using this function just before entering
8219 * the event loop again. */
8220 static void flushAppendOnlyFile(void) {
8224 if (sdslen(server
.aofbuf
) == 0) return;
8226 /* We want to perform a single write. This should be guaranteed atomic
8227 * at least if the filesystem we are writing is a real physical one.
8228 * While this will save us against the server being killed I don't think
8229 * there is much to do about the whole server stopping for power problems
8231 nwritten
= write(server
.appendfd
,server
.aofbuf
,sdslen(server
.aofbuf
));
8232 if (nwritten
!= (signed)sdslen(server
.aofbuf
)) {
8233 /* Ooops, we are in troubles. The best thing to do for now is
8234 * aborting instead of giving the illusion that everything is
8235 * working as expected. */
8236 if (nwritten
== -1) {
8237 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
8239 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
8243 sdsfree(server
.aofbuf
);
8244 server
.aofbuf
= sdsempty();
8246 /* Don't Fsync if no-appendfsync-on-rewrite is set to yes and we have
8247 * childs performing heavy I/O on disk. */
8248 if (server
.no_appendfsync_on_rewrite
&&
8249 (server
.bgrewritechildpid
!= -1 || server
.bgsavechildpid
!= -1))
8251 /* Fsync if needed */
8253 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
8254 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
8255 now
-server
.lastfsync
> 1))
8257 /* aof_fsync is defined as fdatasync() for Linux in order to avoid
8258 * flushing metadata. */
8259 aof_fsync(server
.appendfd
); /* Let's try to get this data on the disk */
8260 server
.lastfsync
= now
;
8264 static sds
catAppendOnlyGenericCommand(sds buf
, int argc
, robj
**argv
) {
8266 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
8267 for (j
= 0; j
< argc
; j
++) {
8268 robj
*o
= getDecodedObject(argv
[j
]);
8269 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
8270 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
8271 buf
= sdscatlen(buf
,"\r\n",2);
8277 static sds
catAppendOnlyExpireAtCommand(sds buf
, robj
*key
, robj
*seconds
) {
8282 /* Make sure we can use strtol */
8283 seconds
= getDecodedObject(seconds
);
8284 when
= time(NULL
)+strtol(seconds
->ptr
,NULL
,10);
8285 decrRefCount(seconds
);
8287 argv
[0] = createStringObject("EXPIREAT",8);
8289 argv
[2] = createObject(REDIS_STRING
,
8290 sdscatprintf(sdsempty(),"%ld",when
));
8291 buf
= catAppendOnlyGenericCommand(buf
, argc
, argv
);
8292 decrRefCount(argv
[0]);
8293 decrRefCount(argv
[2]);
8297 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
8298 sds buf
= sdsempty();
8301 /* The DB this command was targetting is not the same as the last command
8302 * we appendend. To issue a SELECT command is needed. */
8303 if (dictid
!= server
.appendseldb
) {
8306 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
8307 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
8308 (unsigned long)strlen(seldb
),seldb
);
8309 server
.appendseldb
= dictid
;
8312 if (cmd
->proc
== expireCommand
) {
8313 /* Translate EXPIRE into EXPIREAT */
8314 buf
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]);
8315 } else if (cmd
->proc
== setexCommand
) {
8316 /* Translate SETEX to SET and EXPIREAT */
8317 tmpargv
[0] = createStringObject("SET",3);
8318 tmpargv
[1] = argv
[1];
8319 tmpargv
[2] = argv
[3];
8320 buf
= catAppendOnlyGenericCommand(buf
,3,tmpargv
);
8321 decrRefCount(tmpargv
[0]);
8322 buf
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]);
8324 buf
= catAppendOnlyGenericCommand(buf
,argc
,argv
);
8327 /* Append to the AOF buffer. This will be flushed on disk just before
8328 * of re-entering the event loop, so before the client will get a
8329 * positive reply about the operation performed. */
8330 server
.aofbuf
= sdscatlen(server
.aofbuf
,buf
,sdslen(buf
));
8332 /* If a background append only file rewriting is in progress we want to
8333 * accumulate the differences between the child DB and the current one
8334 * in a buffer, so that when the child process will do its work we
8335 * can append the differences to the new append only file. */
8336 if (server
.bgrewritechildpid
!= -1)
8337 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
8342 /* In Redis commands are always executed in the context of a client, so in
8343 * order to load the append only file we need to create a fake client. */
8344 static struct redisClient
*createFakeClient(void) {
8345 struct redisClient
*c
= zmalloc(sizeof(*c
));
8349 c
->querybuf
= sdsempty();
8353 /* We set the fake client as a slave waiting for the synchronization
8354 * so that Redis will not try to send replies to this client. */
8355 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
8356 c
->reply
= listCreate();
8357 listSetFreeMethod(c
->reply
,decrRefCount
);
8358 listSetDupMethod(c
->reply
,dupClientReplyValue
);
8359 initClientMultiState(c
);
8363 static void freeFakeClient(struct redisClient
*c
) {
8364 sdsfree(c
->querybuf
);
8365 listRelease(c
->reply
);
8366 freeClientMultiState(c
);
8370 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
8371 * error (the append only file is zero-length) REDIS_ERR is returned. On
8372 * fatal error an error message is logged and the program exists. */
8373 int loadAppendOnlyFile(char *filename
) {
8374 struct redisClient
*fakeClient
;
8375 FILE *fp
= fopen(filename
,"r");
8376 struct redis_stat sb
;
8377 unsigned long long loadedkeys
= 0;
8378 int appendonly
= server
.appendonly
;
8380 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
8384 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
8388 /* Temporarily disable AOF, to prevent EXEC from feeding a MULTI
8389 * to the same file we're about to read. */
8390 server
.appendonly
= 0;
8392 fakeClient
= createFakeClient();
8399 struct redisCommand
*cmd
;
8401 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
8407 if (buf
[0] != '*') goto fmterr
;
8409 argv
= zmalloc(sizeof(robj
*)*argc
);
8410 for (j
= 0; j
< argc
; j
++) {
8411 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
8412 if (buf
[0] != '$') goto fmterr
;
8413 len
= strtol(buf
+1,NULL
,10);
8414 argsds
= sdsnewlen(NULL
,len
);
8415 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
8416 argv
[j
] = createObject(REDIS_STRING
,argsds
);
8417 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
8420 /* Command lookup */
8421 cmd
= lookupCommand(argv
[0]->ptr
);
8423 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
8426 /* Try object encoding */
8427 if (cmd
->flags
& REDIS_CMD_BULK
)
8428 argv
[argc
-1] = tryObjectEncoding(argv
[argc
-1]);
8429 /* Run the command in the context of a fake client */
8430 fakeClient
->argc
= argc
;
8431 fakeClient
->argv
= argv
;
8432 cmd
->proc(fakeClient
);
8433 /* Discard the reply objects list from the fake client */
8434 while(listLength(fakeClient
->reply
))
8435 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
8436 /* Clean up, ready for the next command */
8437 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
8439 /* Handle swapping while loading big datasets when VM is on */
8441 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
8442 while (zmalloc_used_memory() > server
.vm_max_memory
) {
8443 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
8448 /* This point can only be reached when EOF is reached without errors.
8449 * If the client is in the middle of a MULTI/EXEC, log error and quit. */
8450 if (fakeClient
->flags
& REDIS_MULTI
) goto readerr
;
8453 freeFakeClient(fakeClient
);
8454 server
.appendonly
= appendonly
;
8459 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
8461 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
8465 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
8469 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
8470 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
8474 /* Avoid the incr/decr ref count business if possible to help
8475 * copy-on-write (we are often in a child process when this function
8477 * Also makes sure that key objects don't get incrRefCount-ed when VM
8479 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
8480 obj
= getDecodedObject(obj
);
8483 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
));
8484 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
;
8485 if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0)
8487 if (fwrite("\r\n",2,1,fp
) == 0) goto err
;
8488 if (decrrc
) decrRefCount(obj
);
8491 if (decrrc
) decrRefCount(obj
);
8495 /* Write binary-safe string into a file in the bulkformat
8496 * $<count>\r\n<payload>\r\n */
8497 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
8500 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
);
8501 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8502 if (len
&& fwrite(s
,len
,1,fp
) == 0) return 0;
8503 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
8507 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
8508 static int fwriteBulkDouble(FILE *fp
, double d
) {
8509 char buf
[128], dbuf
[128];
8511 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
8512 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
8513 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8514 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
8518 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
8519 static int fwriteBulkLong(FILE *fp
, long l
) {
8520 char buf
[128], lbuf
[128];
8522 snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
);
8523 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2);
8524 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8525 if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0;
8529 /* Write a sequence of commands able to fully rebuild the dataset into
8530 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
8531 static int rewriteAppendOnlyFile(char *filename
) {
8532 dictIterator
*di
= NULL
;
8537 time_t now
= time(NULL
);
8539 /* Note that we have to use a different temp name here compared to the
8540 * one used by rewriteAppendOnlyFileBackground() function. */
8541 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
8542 fp
= fopen(tmpfile
,"w");
8544 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
8547 for (j
= 0; j
< server
.dbnum
; j
++) {
8548 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
8549 redisDb
*db
= server
.db
+j
;
8551 if (dictSize(d
) == 0) continue;
8552 di
= dictGetIterator(d
);
8558 /* SELECT the new DB */
8559 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
8560 if (fwriteBulkLong(fp
,j
) == 0) goto werr
;
8562 /* Iterate this DB writing every entry */
8563 while((de
= dictNext(di
)) != NULL
) {
8568 key
= dictGetEntryKey(de
);
8569 /* If the value for this key is swapped, load a preview in memory.
8570 * We use a "swapped" flag to remember if we need to free the
8571 * value object instead to just increment the ref count anyway
8572 * in order to avoid copy-on-write of pages if we are forked() */
8573 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
8574 key
->storage
== REDIS_VM_SWAPPING
) {
8575 o
= dictGetEntryVal(de
);
8578 o
= vmPreviewObject(key
);
8581 expiretime
= getExpire(db
,key
);
8583 /* Save the key and associated value */
8584 if (o
->type
== REDIS_STRING
) {
8585 /* Emit a SET command */
8586 char cmd
[]="*3\r\n$3\r\nSET\r\n";
8587 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8589 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8590 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
8591 } else if (o
->type
== REDIS_LIST
) {
8592 /* Emit the RPUSHes needed to rebuild the list */
8593 list
*list
= o
->ptr
;
8597 listRewind(list
,&li
);
8598 while((ln
= listNext(&li
))) {
8599 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
8600 robj
*eleobj
= listNodeValue(ln
);
8602 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8603 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8604 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8606 } else if (o
->type
== REDIS_SET
) {
8607 /* Emit the SADDs needed to rebuild the set */
8609 dictIterator
*di
= dictGetIterator(set
);
8612 while((de
= dictNext(di
)) != NULL
) {
8613 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
8614 robj
*eleobj
= dictGetEntryKey(de
);
8616 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8617 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8618 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8620 dictReleaseIterator(di
);
8621 } else if (o
->type
== REDIS_ZSET
) {
8622 /* Emit the ZADDs needed to rebuild the sorted set */
8624 dictIterator
*di
= dictGetIterator(zs
->dict
);
8627 while((de
= dictNext(di
)) != NULL
) {
8628 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
8629 robj
*eleobj
= dictGetEntryKey(de
);
8630 double *score
= dictGetEntryVal(de
);
8632 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8633 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8634 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
8635 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8637 dictReleaseIterator(di
);
8638 } else if (o
->type
== REDIS_HASH
) {
8639 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
8641 /* Emit the HSETs needed to rebuild the hash */
8642 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8643 unsigned char *p
= zipmapRewind(o
->ptr
);
8644 unsigned char *field
, *val
;
8645 unsigned int flen
, vlen
;
8647 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
8648 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8649 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8650 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
8652 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
8656 dictIterator
*di
= dictGetIterator(o
->ptr
);
8659 while((de
= dictNext(di
)) != NULL
) {
8660 robj
*field
= dictGetEntryKey(de
);
8661 robj
*val
= dictGetEntryVal(de
);
8663 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8664 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8665 if (fwriteBulkObject(fp
,field
) == -1) return -1;
8666 if (fwriteBulkObject(fp
,val
) == -1) return -1;
8668 dictReleaseIterator(di
);
8671 redisPanic("Unknown object type");
8673 /* Save the expire time */
8674 if (expiretime
!= -1) {
8675 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
8676 /* If this key is already expired skip it */
8677 if (expiretime
< now
) continue;
8678 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8679 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8680 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
;
8682 if (swapped
) decrRefCount(o
);
8684 dictReleaseIterator(di
);
8687 /* Make sure data will not remain on the OS's output buffers */
8689 aof_fsync(fileno(fp
));
8692 /* Use RENAME to make sure the DB file is changed atomically only
8693 * if the generate DB file is ok. */
8694 if (rename(tmpfile
,filename
) == -1) {
8695 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
8699 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
8705 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
8706 if (di
) dictReleaseIterator(di
);
8710 /* This is how rewriting of the append only file in background works:
8712 * 1) The user calls BGREWRITEAOF
8713 * 2) Redis calls this function, that forks():
8714 * 2a) the child rewrite the append only file in a temp file.
8715 * 2b) the parent accumulates differences in server.bgrewritebuf.
8716 * 3) When the child finished '2a' exists.
8717 * 4) The parent will trap the exit code, if it's OK, will append the
8718 * data accumulated into server.bgrewritebuf into the temp file, and
8719 * finally will rename(2) the temp file in the actual file name.
8720 * The the new file is reopened as the new append only file. Profit!
8722 static int rewriteAppendOnlyFileBackground(void) {
8725 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
8726 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
8727 if ((childpid
= fork()) == 0) {
8731 if (server
.vm_enabled
) vmReopenSwapFile();
8733 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
8734 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
8741 if (childpid
== -1) {
8742 redisLog(REDIS_WARNING
,
8743 "Can't rewrite append only file in background: fork: %s",
8747 redisLog(REDIS_NOTICE
,
8748 "Background append only file rewriting started by pid %d",childpid
);
8749 server
.bgrewritechildpid
= childpid
;
8750 updateDictResizePolicy();
8751 /* We set appendseldb to -1 in order to force the next call to the
8752 * feedAppendOnlyFile() to issue a SELECT command, so the differences
8753 * accumulated by the parent into server.bgrewritebuf will start
8754 * with a SELECT statement and it will be safe to merge. */
8755 server
.appendseldb
= -1;
8758 return REDIS_OK
; /* unreached */
8761 static void bgrewriteaofCommand(redisClient
*c
) {
8762 if (server
.bgrewritechildpid
!= -1) {
8763 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
8766 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
8767 char *status
= "+Background append only file rewriting started\r\n";
8768 addReplySds(c
,sdsnew(status
));
8770 addReply(c
,shared
.err
);
8774 static void aofRemoveTempFile(pid_t childpid
) {
8777 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
8781 /* Virtual Memory is composed mainly of two subsystems:
8782 * - Blocking Virutal Memory
8783 * - Threaded Virtual Memory I/O
8784 * The two parts are not fully decoupled, but functions are split among two
8785 * different sections of the source code (delimited by comments) in order to
8786 * make more clear what functionality is about the blocking VM and what about
8787 * the threaded (not blocking) VM.
8791 * Redis VM is a blocking VM (one that blocks reading swapped values from
8792 * disk into memory when a value swapped out is needed in memory) that is made
8793 * unblocking by trying to examine the command argument vector in order to
8794 * load in background values that will likely be needed in order to exec
8795 * the command. The command is executed only once all the relevant keys
8796 * are loaded into memory.
8798 * This basically is almost as simple of a blocking VM, but almost as parallel
8799 * as a fully non-blocking VM.
8802 /* Called when the user switches from "appendonly yes" to "appendonly no"
8803 * at runtime using the CONFIG command. */
8804 static void stopAppendOnly(void) {
8805 flushAppendOnlyFile();
8806 aof_fsync(server
.appendfd
);
8807 close(server
.appendfd
);
8809 server
.appendfd
= -1;
8810 server
.appendseldb
= -1;
8811 server
.appendonly
= 0;
8812 /* rewrite operation in progress? kill it, wait child exit */
8813 if (server
.bgsavechildpid
!= -1) {
8816 if (kill(server
.bgsavechildpid
,SIGKILL
) != -1)
8817 wait3(&statloc
,0,NULL
);
8818 /* reset the buffer accumulating changes while the child saves */
8819 sdsfree(server
.bgrewritebuf
);
8820 server
.bgrewritebuf
= sdsempty();
8821 server
.bgsavechildpid
= -1;
8825 /* Called when the user switches from "appendonly no" to "appendonly yes"
8826 * at runtime using the CONFIG command. */
8827 static int startAppendOnly(void) {
8828 server
.appendonly
= 1;
8829 server
.lastfsync
= time(NULL
);
8830 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
8831 if (server
.appendfd
== -1) {
8832 redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, but I can't open the AOF file: %s",strerror(errno
));
8835 if (rewriteAppendOnlyFileBackground() == REDIS_ERR
) {
8836 server
.appendonly
= 0;
8837 close(server
.appendfd
);
8838 redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, I can't trigger a background AOF rewrite operation. Check the above logs for more info about the error.",strerror(errno
));
8844 /* =================== Virtual Memory - Blocking Side ====================== */
8846 static void vmInit(void) {
8852 if (server
.vm_max_threads
!= 0)
8853 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
8855 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
8856 /* Try to open the old swap file, otherwise create it */
8857 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
8858 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
8860 if (server
.vm_fp
== NULL
) {
8861 redisLog(REDIS_WARNING
,
8862 "Can't open the swap file: %s. Exiting.",
8866 server
.vm_fd
= fileno(server
.vm_fp
);
8867 /* Lock the swap file for writing, this is useful in order to avoid
8868 * another instance to use the same swap file for a config error. */
8869 fl
.l_type
= F_WRLCK
;
8870 fl
.l_whence
= SEEK_SET
;
8871 fl
.l_start
= fl
.l_len
= 0;
8872 if (fcntl(server
.vm_fd
,F_SETLK
,&fl
) == -1) {
8873 redisLog(REDIS_WARNING
,
8874 "Can't lock the swap file at '%s': %s. Make sure it is not used by another Redis instance.", server
.vm_swap_file
, strerror(errno
));
8878 server
.vm_next_page
= 0;
8879 server
.vm_near_pages
= 0;
8880 server
.vm_stats_used_pages
= 0;
8881 server
.vm_stats_swapped_objects
= 0;
8882 server
.vm_stats_swapouts
= 0;
8883 server
.vm_stats_swapins
= 0;
8884 totsize
= server
.vm_pages
*server
.vm_page_size
;
8885 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
8886 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
8887 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
8891 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
8893 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
8894 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
8895 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
8896 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
8898 /* Initialize threaded I/O (used by Virtual Memory) */
8899 server
.io_newjobs
= listCreate();
8900 server
.io_processing
= listCreate();
8901 server
.io_processed
= listCreate();
8902 server
.io_ready_clients
= listCreate();
8903 pthread_mutex_init(&server
.io_mutex
,NULL
);
8904 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
8905 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
8906 server
.io_active_threads
= 0;
8907 if (pipe(pipefds
) == -1) {
8908 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
8912 server
.io_ready_pipe_read
= pipefds
[0];
8913 server
.io_ready_pipe_write
= pipefds
[1];
8914 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
8915 /* LZF requires a lot of stack */
8916 pthread_attr_init(&server
.io_threads_attr
);
8917 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
8918 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
8919 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
8920 /* Listen for events in the threaded I/O pipe */
8921 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
8922 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
8923 oom("creating file event");
8926 /* Mark the page as used */
8927 static void vmMarkPageUsed(off_t page
) {
8928 off_t byte
= page
/8;
8930 redisAssert(vmFreePage(page
) == 1);
8931 server
.vm_bitmap
[byte
] |= 1<<bit
;
8934 /* Mark N contiguous pages as used, with 'page' being the first. */
8935 static void vmMarkPagesUsed(off_t page
, off_t count
) {
8938 for (j
= 0; j
< count
; j
++)
8939 vmMarkPageUsed(page
+j
);
8940 server
.vm_stats_used_pages
+= count
;
8941 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
8942 (long long)count
, (long long)page
);
8945 /* Mark the page as free */
8946 static void vmMarkPageFree(off_t page
) {
8947 off_t byte
= page
/8;
8949 redisAssert(vmFreePage(page
) == 0);
8950 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
8953 /* Mark N contiguous pages as free, with 'page' being the first. */
8954 static void vmMarkPagesFree(off_t page
, off_t count
) {
8957 for (j
= 0; j
< count
; j
++)
8958 vmMarkPageFree(page
+j
);
8959 server
.vm_stats_used_pages
-= count
;
8960 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
8961 (long long)count
, (long long)page
);
8964 /* Test if the page is free */
8965 static int vmFreePage(off_t page
) {
8966 off_t byte
= page
/8;
8968 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
8971 /* Find N contiguous free pages storing the first page of the cluster in *first.
8972 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
8973 * REDIS_ERR is returned.
8975 * This function uses a simple algorithm: we try to allocate
8976 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
8977 * again from the start of the swap file searching for free spaces.
8979 * If it looks pretty clear that there are no free pages near our offset
8980 * we try to find less populated places doing a forward jump of
8981 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
8982 * without hurry, and then we jump again and so forth...
8984 * This function can be improved using a free list to avoid to guess
8985 * too much, since we could collect data about freed pages.
8987 * note: I implemented this function just after watching an episode of
8988 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
8990 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
8991 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
8993 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
8994 server
.vm_near_pages
= 0;
8995 server
.vm_next_page
= 0;
8997 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
8998 base
= server
.vm_next_page
;
9000 while(offset
< server
.vm_pages
) {
9001 off_t
this = base
+offset
;
9003 /* If we overflow, restart from page zero */
9004 if (this >= server
.vm_pages
) {
9005 this -= server
.vm_pages
;
9007 /* Just overflowed, what we found on tail is no longer
9008 * interesting, as it's no longer contiguous. */
9012 if (vmFreePage(this)) {
9013 /* This is a free page */
9015 /* Already got N free pages? Return to the caller, with success */
9017 *first
= this-(n
-1);
9018 server
.vm_next_page
= this+1;
9019 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
9023 /* The current one is not a free page */
9027 /* Fast-forward if the current page is not free and we already
9028 * searched enough near this place. */
9030 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
9031 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
9033 /* Note that even if we rewind after the jump, we are don't need
9034 * to make sure numfree is set to zero as we only jump *if* it
9035 * is set to zero. */
9037 /* Otherwise just check the next page */
9044 /* Write the specified object at the specified page of the swap file */
9045 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
9046 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
9047 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
9048 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9049 redisLog(REDIS_WARNING
,
9050 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
9054 rdbSaveObject(server
.vm_fp
,o
);
9055 fflush(server
.vm_fp
);
9056 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9060 /* Swap the 'val' object relative to 'key' into disk. Store all the information
9061 * needed to later retrieve the object into the key object.
9062 * If we can't find enough contiguous empty pages to swap the object on disk
9063 * REDIS_ERR is returned. */
9064 static int vmSwapObjectBlocking(robj
*key
, robj
*val
) {
9065 off_t pages
= rdbSavedObjectPages(val
,NULL
);
9068 assert(key
->storage
== REDIS_VM_MEMORY
);
9069 assert(key
->refcount
== 1);
9070 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
;
9071 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
;
9072 key
->vm
.page
= page
;
9073 key
->vm
.usedpages
= pages
;
9074 key
->storage
= REDIS_VM_SWAPPED
;
9075 key
->vtype
= val
->type
;
9076 decrRefCount(val
); /* Deallocate the object from memory. */
9077 vmMarkPagesUsed(page
,pages
);
9078 redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)",
9079 (unsigned char*) key
->ptr
,
9080 (unsigned long long) page
, (unsigned long long) pages
);
9081 server
.vm_stats_swapped_objects
++;
9082 server
.vm_stats_swapouts
++;
9086 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
9089 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
9090 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
9091 redisLog(REDIS_WARNING
,
9092 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
9096 o
= rdbLoadObject(type
,server
.vm_fp
);
9098 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
9101 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9105 /* Load the value object relative to the 'key' object from swap to memory.
9106 * The newly allocated object is returned.
9108 * If preview is true the unserialized object is returned to the caller but
9109 * no changes are made to the key object, nor the pages are marked as freed */
9110 static robj
*vmGenericLoadObject(robj
*key
, int preview
) {
9113 redisAssert(key
->storage
== REDIS_VM_SWAPPED
|| key
->storage
== REDIS_VM_LOADING
);
9114 val
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
);
9116 key
->storage
= REDIS_VM_MEMORY
;
9117 key
->vm
.atime
= server
.unixtime
;
9118 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
9119 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk",
9120 (unsigned char*) key
->ptr
);
9121 server
.vm_stats_swapped_objects
--;
9123 redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk",
9124 (unsigned char*) key
->ptr
);
9126 server
.vm_stats_swapins
++;
9130 /* Plain object loading, from swap to memory */
9131 static robj
*vmLoadObject(robj
*key
) {
9132 /* If we are loading the object in background, stop it, we
9133 * need to load this object synchronously ASAP. */
9134 if (key
->storage
== REDIS_VM_LOADING
)
9135 vmCancelThreadedIOJob(key
);
9136 return vmGenericLoadObject(key
,0);
9139 /* Just load the value on disk, without to modify the key.
9140 * This is useful when we want to perform some operation on the value
9141 * without to really bring it from swap to memory, like while saving the
9142 * dataset or rewriting the append only log. */
9143 static robj
*vmPreviewObject(robj
*key
) {
9144 return vmGenericLoadObject(key
,1);
9147 /* How a good candidate is this object for swapping?
9148 * The better candidate it is, the greater the returned value.
9150 * Currently we try to perform a fast estimation of the object size in
9151 * memory, and combine it with aging informations.
9153 * Basically swappability = idle-time * log(estimated size)
9155 * Bigger objects are preferred over smaller objects, but not
9156 * proportionally, this is why we use the logarithm. This algorithm is
9157 * just a first try and will probably be tuned later. */
9158 static double computeObjectSwappability(robj
*o
) {
9159 time_t age
= server
.unixtime
- o
->vm
.atime
;
9163 struct dictEntry
*de
;
9166 if (age
<= 0) return 0;
9169 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
9172 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
9177 listNode
*ln
= listFirst(l
);
9179 asize
= sizeof(list
);
9181 robj
*ele
= ln
->value
;
9184 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9185 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9187 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
9192 z
= (o
->type
== REDIS_ZSET
);
9193 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
9195 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
9196 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
9201 de
= dictGetRandomKey(d
);
9202 ele
= dictGetEntryKey(de
);
9203 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9204 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9206 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
9207 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
9211 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
9212 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
9213 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
9214 unsigned int klen
, vlen
;
9215 unsigned char *key
, *val
;
9217 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
9221 asize
= len
*(klen
+vlen
+3);
9222 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
9224 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
9229 de
= dictGetRandomKey(d
);
9230 ele
= dictGetEntryKey(de
);
9231 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9232 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9234 ele
= dictGetEntryVal(de
);
9235 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9236 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9238 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
9243 return (double)age
*log(1+asize
);
9246 /* Try to swap an object that's a good candidate for swapping.
9247 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
9248 * to swap any object at all.
9250 * If 'usethreaded' is true, Redis will try to swap the object in background
9251 * using I/O threads. */
9252 static int vmSwapOneObject(int usethreads
) {
9254 struct dictEntry
*best
= NULL
;
9255 double best_swappability
= 0;
9256 redisDb
*best_db
= NULL
;
9259 for (j
= 0; j
< server
.dbnum
; j
++) {
9260 redisDb
*db
= server
.db
+j
;
9261 /* Why maxtries is set to 100?
9262 * Because this way (usually) we'll find 1 object even if just 1% - 2%
9263 * are swappable objects */
9266 if (dictSize(db
->dict
) == 0) continue;
9267 for (i
= 0; i
< 5; i
++) {
9269 double swappability
;
9271 if (maxtries
) maxtries
--;
9272 de
= dictGetRandomKey(db
->dict
);
9273 key
= dictGetEntryKey(de
);
9274 val
= dictGetEntryVal(de
);
9275 /* Only swap objects that are currently in memory.
9277 * Also don't swap shared objects if threaded VM is on, as we
9278 * try to ensure that the main thread does not touch the
9279 * object while the I/O thread is using it, but we can't
9280 * control other keys without adding additional mutex. */
9281 if (key
->storage
!= REDIS_VM_MEMORY
||
9282 (server
.vm_max_threads
!= 0 && val
->refcount
!= 1)) {
9283 if (maxtries
) i
--; /* don't count this try */
9286 swappability
= computeObjectSwappability(val
);
9287 if (!best
|| swappability
> best_swappability
) {
9289 best_swappability
= swappability
;
9294 if (best
== NULL
) return REDIS_ERR
;
9295 key
= dictGetEntryKey(best
);
9296 val
= dictGetEntryVal(best
);
9298 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
9299 key
->ptr
, best_swappability
);
9301 /* Unshare the key if needed */
9302 if (key
->refcount
> 1) {
9303 robj
*newkey
= dupStringObject(key
);
9305 key
= dictGetEntryKey(best
) = newkey
;
9309 vmSwapObjectThreaded(key
,val
,best_db
);
9312 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
9313 dictGetEntryVal(best
) = NULL
;
9321 static int vmSwapOneObjectBlocking() {
9322 return vmSwapOneObject(0);
9325 static int vmSwapOneObjectThreaded() {
9326 return vmSwapOneObject(1);
9329 /* Return true if it's safe to swap out objects in a given moment.
9330 * Basically we don't want to swap objects out while there is a BGSAVE
9331 * or a BGAEOREWRITE running in backgroud. */
9332 static int vmCanSwapOut(void) {
9333 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
9336 /* Delete a key if swapped. Returns 1 if the key was found, was swapped
9337 * and was deleted. Otherwise 0 is returned. */
9338 static int deleteIfSwapped(redisDb
*db
, robj
*key
) {
9342 if ((de
= dictFind(db
->dict
,key
)) == NULL
) return 0;
9343 foundkey
= dictGetEntryKey(de
);
9344 if (foundkey
->storage
== REDIS_VM_MEMORY
) return 0;
9349 /* =================== Virtual Memory - Threaded I/O ======================= */
9351 static void freeIOJob(iojob
*j
) {
9352 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
9353 j
->type
== REDIS_IOJOB_DO_SWAP
||
9354 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
9355 decrRefCount(j
->val
);
9356 /* We don't decrRefCount the j->key field as we did't incremented
9357 * the count creating IO Jobs. This is because the key field here is
9358 * just used as an indentifier and if a key is removed the Job should
9359 * never be touched again. */
9363 /* Every time a thread finished a Job, it writes a byte into the write side
9364 * of an unix pipe in order to "awake" the main thread, and this function
9366 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
9370 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
9372 REDIS_NOTUSED(mask
);
9373 REDIS_NOTUSED(privdata
);
9375 /* For every byte we read in the read side of the pipe, there is one
9376 * I/O job completed to process. */
9377 while((retval
= read(fd
,buf
,1)) == 1) {
9381 struct dictEntry
*de
;
9383 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
9385 /* Get the processed element (the oldest one) */
9387 assert(listLength(server
.io_processed
) != 0);
9388 if (toprocess
== -1) {
9389 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
9390 if (toprocess
<= 0) toprocess
= 1;
9392 ln
= listFirst(server
.io_processed
);
9394 listDelNode(server
.io_processed
,ln
);
9396 /* If this job is marked as canceled, just ignore it */
9401 /* Post process it in the main thread, as there are things we
9402 * can do just here to avoid race conditions and/or invasive locks */
9403 redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
);
9404 de
= dictFind(j
->db
->dict
,j
->key
);
9406 key
= dictGetEntryKey(de
);
9407 if (j
->type
== REDIS_IOJOB_LOAD
) {
9410 /* Key loaded, bring it at home */
9411 key
->storage
= REDIS_VM_MEMORY
;
9412 key
->vm
.atime
= server
.unixtime
;
9413 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
9414 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
9415 (unsigned char*) key
->ptr
);
9416 server
.vm_stats_swapped_objects
--;
9417 server
.vm_stats_swapins
++;
9418 dictGetEntryVal(de
) = j
->val
;
9419 incrRefCount(j
->val
);
9422 /* Handle clients waiting for this key to be loaded. */
9423 handleClientsBlockedOnSwappedKey(db
,key
);
9424 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9425 /* Now we know the amount of pages required to swap this object.
9426 * Let's find some space for it, and queue this task again
9427 * rebranded as REDIS_IOJOB_DO_SWAP. */
9428 if (!vmCanSwapOut() ||
9429 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
9431 /* Ooops... no space or we can't swap as there is
9432 * a fork()ed Redis trying to save stuff on disk. */
9434 key
->storage
= REDIS_VM_MEMORY
; /* undo operation */
9436 /* Note that we need to mark this pages as used now,
9437 * if the job will be canceled, we'll mark them as freed
9439 vmMarkPagesUsed(j
->page
,j
->pages
);
9440 j
->type
= REDIS_IOJOB_DO_SWAP
;
9445 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9448 /* Key swapped. We can finally free some memory. */
9449 if (key
->storage
!= REDIS_VM_SWAPPING
) {
9450 printf("key->storage: %d\n",key
->storage
);
9451 printf("key->name: %s\n",(char*)key
->ptr
);
9452 printf("key->refcount: %d\n",key
->refcount
);
9453 printf("val: %p\n",(void*)j
->val
);
9454 printf("val->type: %d\n",j
->val
->type
);
9455 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
9457 redisAssert(key
->storage
== REDIS_VM_SWAPPING
);
9458 val
= dictGetEntryVal(de
);
9459 key
->vm
.page
= j
->page
;
9460 key
->vm
.usedpages
= j
->pages
;
9461 key
->storage
= REDIS_VM_SWAPPED
;
9462 key
->vtype
= j
->val
->type
;
9463 decrRefCount(val
); /* Deallocate the object from memory. */
9464 dictGetEntryVal(de
) = NULL
;
9465 redisLog(REDIS_DEBUG
,
9466 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
9467 (unsigned char*) key
->ptr
,
9468 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
9469 server
.vm_stats_swapped_objects
++;
9470 server
.vm_stats_swapouts
++;
9472 /* Put a few more swap requests in queue if we are still
9474 if (trytoswap
&& vmCanSwapOut() &&
9475 zmalloc_used_memory() > server
.vm_max_memory
)
9480 more
= listLength(server
.io_newjobs
) <
9481 (unsigned) server
.vm_max_threads
;
9483 /* Don't waste CPU time if swappable objects are rare. */
9484 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
9492 if (processed
== toprocess
) return;
9494 if (retval
< 0 && errno
!= EAGAIN
) {
9495 redisLog(REDIS_WARNING
,
9496 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
9501 static void lockThreadedIO(void) {
9502 pthread_mutex_lock(&server
.io_mutex
);
9505 static void unlockThreadedIO(void) {
9506 pthread_mutex_unlock(&server
.io_mutex
);
9509 /* Remove the specified object from the threaded I/O queue if still not
9510 * processed, otherwise make sure to flag it as canceled. */
9511 static void vmCancelThreadedIOJob(robj
*o
) {
9513 server
.io_newjobs
, /* 0 */
9514 server
.io_processing
, /* 1 */
9515 server
.io_processed
/* 2 */
9519 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
9522 /* Search for a matching key in one of the queues */
9523 for (i
= 0; i
< 3; i
++) {
9527 listRewind(lists
[i
],&li
);
9528 while ((ln
= listNext(&li
)) != NULL
) {
9529 iojob
*job
= ln
->value
;
9531 if (job
->canceled
) continue; /* Skip this, already canceled. */
9532 if (job
->key
== o
) {
9533 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n",
9534 (void*)job
, (char*)o
->ptr
, job
->type
, i
);
9535 /* Mark the pages as free since the swap didn't happened
9536 * or happened but is now discarded. */
9537 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
9538 vmMarkPagesFree(job
->page
,job
->pages
);
9539 /* Cancel the job. It depends on the list the job is
9542 case 0: /* io_newjobs */
9543 /* If the job was yet not processed the best thing to do
9544 * is to remove it from the queue at all */
9546 listDelNode(lists
[i
],ln
);
9548 case 1: /* io_processing */
9549 /* Oh Shi- the thread is messing with the Job:
9551 * Probably it's accessing the object if this is a
9552 * PREPARE_SWAP or DO_SWAP job.
9553 * If it's a LOAD job it may be reading from disk and
9554 * if we don't wait for the job to terminate before to
9555 * cancel it, maybe in a few microseconds data can be
9556 * corrupted in this pages. So the short story is:
9558 * Better to wait for the job to move into the
9559 * next queue (processed)... */
9561 /* We try again and again until the job is completed. */
9563 /* But let's wait some time for the I/O thread
9564 * to finish with this job. After all this condition
9565 * should be very rare. */
9568 case 2: /* io_processed */
9569 /* The job was already processed, that's easy...
9570 * just mark it as canceled so that we'll ignore it
9571 * when processing completed jobs. */
9575 /* Finally we have to adjust the storage type of the object
9576 * in order to "UNDO" the operaiton. */
9577 if (o
->storage
== REDIS_VM_LOADING
)
9578 o
->storage
= REDIS_VM_SWAPPED
;
9579 else if (o
->storage
== REDIS_VM_SWAPPING
)
9580 o
->storage
= REDIS_VM_MEMORY
;
9587 assert(1 != 1); /* We should never reach this */
9590 static void *IOThreadEntryPoint(void *arg
) {
9595 pthread_detach(pthread_self());
9597 /* Get a new job to process */
9599 if (listLength(server
.io_newjobs
) == 0) {
9600 /* No new jobs in queue, exit. */
9601 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
9602 (long) pthread_self());
9603 server
.io_active_threads
--;
9607 ln
= listFirst(server
.io_newjobs
);
9609 listDelNode(server
.io_newjobs
,ln
);
9610 /* Add the job in the processing queue */
9611 j
->thread
= pthread_self();
9612 listAddNodeTail(server
.io_processing
,j
);
9613 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
9615 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
9616 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
9618 /* Process the Job */
9619 if (j
->type
== REDIS_IOJOB_LOAD
) {
9620 j
->val
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
);
9621 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9622 FILE *fp
= fopen("/dev/null","w+");
9623 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
9625 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9626 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
9630 /* Done: insert the job into the processed queue */
9631 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
9632 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
9634 listDelNode(server
.io_processing
,ln
);
9635 listAddNodeTail(server
.io_processed
,j
);
9638 /* Signal the main thread there is new stuff to process */
9639 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
9641 return NULL
; /* never reached */
9644 static void spawnIOThread(void) {
9646 sigset_t mask
, omask
;
9650 sigaddset(&mask
,SIGCHLD
);
9651 sigaddset(&mask
,SIGHUP
);
9652 sigaddset(&mask
,SIGPIPE
);
9653 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
9654 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
9655 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
9659 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
9660 server
.io_active_threads
++;
9663 /* We need to wait for the last thread to exit before we are able to
9664 * fork() in order to BGSAVE or BGREWRITEAOF. */
9665 static void waitEmptyIOJobsQueue(void) {
9667 int io_processed_len
;
9670 if (listLength(server
.io_newjobs
) == 0 &&
9671 listLength(server
.io_processing
) == 0 &&
9672 server
.io_active_threads
== 0)
9677 /* While waiting for empty jobs queue condition we post-process some
9678 * finshed job, as I/O threads may be hanging trying to write against
9679 * the io_ready_pipe_write FD but there are so much pending jobs that
9681 io_processed_len
= listLength(server
.io_processed
);
9683 if (io_processed_len
) {
9684 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
9685 usleep(1000); /* 1 millisecond */
9687 usleep(10000); /* 10 milliseconds */
9692 static void vmReopenSwapFile(void) {
9693 /* Note: we don't close the old one as we are in the child process
9694 * and don't want to mess at all with the original file object. */
9695 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
9696 if (server
.vm_fp
== NULL
) {
9697 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
9698 server
.vm_swap_file
);
9701 server
.vm_fd
= fileno(server
.vm_fp
);
9704 /* This function must be called while with threaded IO locked */
9705 static void queueIOJob(iojob
*j
) {
9706 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
9707 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
9708 listAddNodeTail(server
.io_newjobs
,j
);
9709 if (server
.io_active_threads
< server
.vm_max_threads
)
9713 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
9716 assert(key
->storage
== REDIS_VM_MEMORY
);
9717 assert(key
->refcount
== 1);
9719 j
= zmalloc(sizeof(*j
));
9720 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
9726 j
->thread
= (pthread_t
) -1;
9727 key
->storage
= REDIS_VM_SWAPPING
;
9735 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
9737 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
9738 * If there is not already a job loading the key, it is craeted.
9739 * The key is added to the io_keys list in the client structure, and also
9740 * in the hash table mapping swapped keys to waiting clients, that is,
9741 * server.io_waited_keys. */
9742 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
9743 struct dictEntry
*de
;
9747 /* If the key does not exist or is already in RAM we don't need to
9748 * block the client at all. */
9749 de
= dictFind(c
->db
->dict
,key
);
9750 if (de
== NULL
) return 0;
9751 o
= dictGetEntryKey(de
);
9752 if (o
->storage
== REDIS_VM_MEMORY
) {
9754 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
9755 /* We were swapping the key, undo it! */
9756 vmCancelThreadedIOJob(o
);
9760 /* OK: the key is either swapped, or being loaded just now. */
9762 /* Add the key to the list of keys this client is waiting for.
9763 * This maps clients to keys they are waiting for. */
9764 listAddNodeTail(c
->io_keys
,key
);
9767 /* Add the client to the swapped keys => clients waiting map. */
9768 de
= dictFind(c
->db
->io_keys
,key
);
9772 /* For every key we take a list of clients blocked for it */
9774 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
9776 assert(retval
== DICT_OK
);
9778 l
= dictGetEntryVal(de
);
9780 listAddNodeTail(l
,c
);
9782 /* Are we already loading the key from disk? If not create a job */
9783 if (o
->storage
== REDIS_VM_SWAPPED
) {
9786 o
->storage
= REDIS_VM_LOADING
;
9787 j
= zmalloc(sizeof(*j
));
9788 j
->type
= REDIS_IOJOB_LOAD
;
9791 j
->key
->vtype
= o
->vtype
;
9792 j
->page
= o
->vm
.page
;
9795 j
->thread
= (pthread_t
) -1;
9803 /* Preload keys for any command with first, last and step values for
9804 * the command keys prototype, as defined in the command table. */
9805 static void waitForMultipleSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
9807 if (cmd
->vm_firstkey
== 0) return;
9808 last
= cmd
->vm_lastkey
;
9809 if (last
< 0) last
= argc
+last
;
9810 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
) {
9811 redisAssert(j
< argc
);
9812 waitForSwappedKey(c
,argv
[j
]);
9816 /* Preload keys needed for the ZUNIONSTORE and ZINTERSTORE commands.
9817 * Note that the number of keys to preload is user-defined, so we need to
9818 * apply a sanity check against argc. */
9819 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
9823 num
= atoi(argv
[2]->ptr
);
9824 if (num
> (argc
-3)) return;
9825 for (i
= 0; i
< num
; i
++) {
9826 waitForSwappedKey(c
,argv
[3+i
]);
9830 /* Preload keys needed to execute the entire MULTI/EXEC block.
9832 * This function is called by blockClientOnSwappedKeys when EXEC is issued,
9833 * and will block the client when any command requires a swapped out value. */
9834 static void execBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
9836 struct redisCommand
*mcmd
;
9839 REDIS_NOTUSED(argc
);
9840 REDIS_NOTUSED(argv
);
9842 if (!(c
->flags
& REDIS_MULTI
)) return;
9843 for (i
= 0; i
< c
->mstate
.count
; i
++) {
9844 mcmd
= c
->mstate
.commands
[i
].cmd
;
9845 margc
= c
->mstate
.commands
[i
].argc
;
9846 margv
= c
->mstate
.commands
[i
].argv
;
9848 if (mcmd
->vm_preload_proc
!= NULL
) {
9849 mcmd
->vm_preload_proc(c
,mcmd
,margc
,margv
);
9851 waitForMultipleSwappedKeys(c
,mcmd
,margc
,margv
);
9856 /* Is this client attempting to run a command against swapped keys?
9857 * If so, block it ASAP, load the keys in background, then resume it.
9859 * The important idea about this function is that it can fail! If keys will
9860 * still be swapped when the client is resumed, this key lookups will
9861 * just block loading keys from disk. In practical terms this should only
9862 * happen with SORT BY command or if there is a bug in this function.
9864 * Return 1 if the client is marked as blocked, 0 if the client can
9865 * continue as the keys it is going to access appear to be in memory. */
9866 static int blockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
) {
9867 if (cmd
->vm_preload_proc
!= NULL
) {
9868 cmd
->vm_preload_proc(c
,cmd
,c
->argc
,c
->argv
);
9870 waitForMultipleSwappedKeys(c
,cmd
,c
->argc
,c
->argv
);
9873 /* If the client was blocked for at least one key, mark it as blocked. */
9874 if (listLength(c
->io_keys
)) {
9875 c
->flags
|= REDIS_IO_WAIT
;
9876 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
9877 server
.vm_blocked_clients
++;
9884 /* Remove the 'key' from the list of blocked keys for a given client.
9886 * The function returns 1 when there are no longer blocking keys after
9887 * the current one was removed (and the client can be unblocked). */
9888 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
9892 struct dictEntry
*de
;
9894 /* Remove the key from the list of keys this client is waiting for. */
9895 listRewind(c
->io_keys
,&li
);
9896 while ((ln
= listNext(&li
)) != NULL
) {
9897 if (equalStringObjects(ln
->value
,key
)) {
9898 listDelNode(c
->io_keys
,ln
);
9904 /* Remove the client form the key => waiting clients map. */
9905 de
= dictFind(c
->db
->io_keys
,key
);
9907 l
= dictGetEntryVal(de
);
9908 ln
= listSearchKey(l
,c
);
9911 if (listLength(l
) == 0)
9912 dictDelete(c
->db
->io_keys
,key
);
9914 return listLength(c
->io_keys
) == 0;
9917 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
9918 struct dictEntry
*de
;
9923 de
= dictFind(db
->io_keys
,key
);
9926 l
= dictGetEntryVal(de
);
9927 len
= listLength(l
);
9928 /* Note: we can't use something like while(listLength(l)) as the list
9929 * can be freed by the calling function when we remove the last element. */
9932 redisClient
*c
= ln
->value
;
9934 if (dontWaitForSwappedKey(c
,key
)) {
9935 /* Put the client in the list of clients ready to go as we
9936 * loaded all the keys about it. */
9937 listAddNodeTail(server
.io_ready_clients
,c
);
9942 /* =========================== Remote Configuration ========================= */
9944 static void configSetCommand(redisClient
*c
) {
9945 robj
*o
= getDecodedObject(c
->argv
[3]);
9948 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
9949 zfree(server
.dbfilename
);
9950 server
.dbfilename
= zstrdup(o
->ptr
);
9951 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
9952 zfree(server
.requirepass
);
9953 server
.requirepass
= zstrdup(o
->ptr
);
9954 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
9955 zfree(server
.masterauth
);
9956 server
.masterauth
= zstrdup(o
->ptr
);
9957 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
9958 if (getLongLongFromObject(o
,&ll
) == REDIS_ERR
||
9959 ll
< 0) goto badfmt
;
9960 server
.maxmemory
= ll
;
9961 } else if (!strcasecmp(c
->argv
[2]->ptr
,"timeout")) {
9962 if (getLongLongFromObject(o
,&ll
) == REDIS_ERR
||
9963 ll
< 0 || ll
> LONG_MAX
) goto badfmt
;
9964 server
.maxidletime
= ll
;
9965 } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendfsync")) {
9966 if (!strcasecmp(o
->ptr
,"no")) {
9967 server
.appendfsync
= APPENDFSYNC_NO
;
9968 } else if (!strcasecmp(o
->ptr
,"everysec")) {
9969 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
9970 } else if (!strcasecmp(o
->ptr
,"always")) {
9971 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
9975 } else if (!strcasecmp(c
->argv
[2]->ptr
,"no-appendfsync-on-rewrite")) {
9976 int yn
= yesnotoi(o
->ptr
);
9978 if (yn
== -1) goto badfmt
;
9979 server
.no_appendfsync_on_rewrite
= yn
;
9980 } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendonly")) {
9981 int old
= server
.appendonly
;
9982 int new = yesnotoi(o
->ptr
);
9984 if (new == -1) goto badfmt
;
9989 if (startAppendOnly() == REDIS_ERR
) {
9990 addReplySds(c
,sdscatprintf(sdsempty(),
9991 "-ERR Unable to turn on AOF. Check server logs.\r\n"));
9997 } else if (!strcasecmp(c
->argv
[2]->ptr
,"save")) {
9999 sds
*v
= sdssplitlen(o
->ptr
,sdslen(o
->ptr
)," ",1,&vlen
);
10001 /* Perform sanity check before setting the new config:
10002 * - Even number of args
10003 * - Seconds >= 1, changes >= 0 */
10005 sdsfreesplitres(v
,vlen
);
10008 for (j
= 0; j
< vlen
; j
++) {
10012 val
= strtoll(v
[j
], &eptr
, 10);
10013 if (eptr
[0] != '\0' ||
10014 ((j
& 1) == 0 && val
< 1) ||
10015 ((j
& 1) == 1 && val
< 0)) {
10016 sdsfreesplitres(v
,vlen
);
10020 /* Finally set the new config */
10021 resetServerSaveParams();
10022 for (j
= 0; j
< vlen
; j
+= 2) {
10026 seconds
= strtoll(v
[j
],NULL
,10);
10027 changes
= strtoll(v
[j
+1],NULL
,10);
10028 appendServerSaveParams(seconds
, changes
);
10030 sdsfreesplitres(v
,vlen
);
10032 addReplySds(c
,sdscatprintf(sdsempty(),
10033 "-ERR not supported CONFIG parameter %s\r\n",
10034 (char*)c
->argv
[2]->ptr
));
10039 addReply(c
,shared
.ok
);
10042 badfmt
: /* Bad format errors */
10043 addReplySds(c
,sdscatprintf(sdsempty(),
10044 "-ERR invalid argument '%s' for CONFIG SET '%s'\r\n",
10046 (char*)c
->argv
[2]->ptr
));
10050 static void configGetCommand(redisClient
*c
) {
10051 robj
*o
= getDecodedObject(c
->argv
[2]);
10052 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
10053 char *pattern
= o
->ptr
;
10056 addReply(c
,lenobj
);
10057 decrRefCount(lenobj
);
10059 if (stringmatch(pattern
,"dbfilename",0)) {
10060 addReplyBulkCString(c
,"dbfilename");
10061 addReplyBulkCString(c
,server
.dbfilename
);
10064 if (stringmatch(pattern
,"requirepass",0)) {
10065 addReplyBulkCString(c
,"requirepass");
10066 addReplyBulkCString(c
,server
.requirepass
);
10069 if (stringmatch(pattern
,"masterauth",0)) {
10070 addReplyBulkCString(c
,"masterauth");
10071 addReplyBulkCString(c
,server
.masterauth
);
10074 if (stringmatch(pattern
,"maxmemory",0)) {
10077 ll2string(buf
,128,server
.maxmemory
);
10078 addReplyBulkCString(c
,"maxmemory");
10079 addReplyBulkCString(c
,buf
);
10082 if (stringmatch(pattern
,"timeout",0)) {
10085 ll2string(buf
,128,server
.maxidletime
);
10086 addReplyBulkCString(c
,"timeout");
10087 addReplyBulkCString(c
,buf
);
10090 if (stringmatch(pattern
,"appendonly",0)) {
10091 addReplyBulkCString(c
,"appendonly");
10092 addReplyBulkCString(c
,server
.appendonly
? "yes" : "no");
10095 if (stringmatch(pattern
,"no-appendfsync-on-rewrite",0)) {
10096 addReplyBulkCString(c
,"no-appendfsync-on-rewrite");
10097 addReplyBulkCString(c
,server
.no_appendfsync_on_rewrite
? "yes" : "no");
10100 if (stringmatch(pattern
,"appendfsync",0)) {
10103 switch(server
.appendfsync
) {
10104 case APPENDFSYNC_NO
: policy
= "no"; break;
10105 case APPENDFSYNC_EVERYSEC
: policy
= "everysec"; break;
10106 case APPENDFSYNC_ALWAYS
: policy
= "always"; break;
10107 default: policy
= "unknown"; break; /* too harmless to panic */
10109 addReplyBulkCString(c
,"appendfsync");
10110 addReplyBulkCString(c
,policy
);
10113 if (stringmatch(pattern
,"save",0)) {
10114 sds buf
= sdsempty();
10117 for (j
= 0; j
< server
.saveparamslen
; j
++) {
10118 buf
= sdscatprintf(buf
,"%ld %d",
10119 server
.saveparams
[j
].seconds
,
10120 server
.saveparams
[j
].changes
);
10121 if (j
!= server
.saveparamslen
-1)
10122 buf
= sdscatlen(buf
," ",1);
10124 addReplyBulkCString(c
,"save");
10125 addReplyBulkCString(c
,buf
);
10130 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
10133 static void configCommand(redisClient
*c
) {
10134 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
10135 if (c
->argc
!= 4) goto badarity
;
10136 configSetCommand(c
);
10137 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
10138 if (c
->argc
!= 3) goto badarity
;
10139 configGetCommand(c
);
10140 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
10141 if (c
->argc
!= 2) goto badarity
;
10142 server
.stat_numcommands
= 0;
10143 server
.stat_numconnections
= 0;
10144 server
.stat_expiredkeys
= 0;
10145 server
.stat_starttime
= time(NULL
);
10146 addReply(c
,shared
.ok
);
10148 addReplySds(c
,sdscatprintf(sdsempty(),
10149 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
10154 addReplySds(c
,sdscatprintf(sdsempty(),
10155 "-ERR Wrong number of arguments for CONFIG %s\r\n",
10156 (char*) c
->argv
[1]->ptr
));
10159 /* =========================== Pubsub implementation ======================== */
10161 static void freePubsubPattern(void *p
) {
10162 pubsubPattern
*pat
= p
;
10164 decrRefCount(pat
->pattern
);
10168 static int listMatchPubsubPattern(void *a
, void *b
) {
10169 pubsubPattern
*pa
= a
, *pb
= b
;
10171 return (pa
->client
== pb
->client
) &&
10172 (equalStringObjects(pa
->pattern
,pb
->pattern
));
10175 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
10176 * 0 if the client was already subscribed to that channel. */
10177 static int pubsubSubscribeChannel(redisClient
*c
, robj
*channel
) {
10178 struct dictEntry
*de
;
10179 list
*clients
= NULL
;
10182 /* Add the channel to the client -> channels hash table */
10183 if (dictAdd(c
->pubsub_channels
,channel
,NULL
) == DICT_OK
) {
10185 incrRefCount(channel
);
10186 /* Add the client to the channel -> list of clients hash table */
10187 de
= dictFind(server
.pubsub_channels
,channel
);
10189 clients
= listCreate();
10190 dictAdd(server
.pubsub_channels
,channel
,clients
);
10191 incrRefCount(channel
);
10193 clients
= dictGetEntryVal(de
);
10195 listAddNodeTail(clients
,c
);
10197 /* Notify the client */
10198 addReply(c
,shared
.mbulk3
);
10199 addReply(c
,shared
.subscribebulk
);
10200 addReplyBulk(c
,channel
);
10201 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
10205 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
10206 * 0 if the client was not subscribed to the specified channel. */
10207 static int pubsubUnsubscribeChannel(redisClient
*c
, robj
*channel
, int notify
) {
10208 struct dictEntry
*de
;
10213 /* Remove the channel from the client -> channels hash table */
10214 incrRefCount(channel
); /* channel may be just a pointer to the same object
10215 we have in the hash tables. Protect it... */
10216 if (dictDelete(c
->pubsub_channels
,channel
) == DICT_OK
) {
10218 /* Remove the client from the channel -> clients list hash table */
10219 de
= dictFind(server
.pubsub_channels
,channel
);
10220 assert(de
!= NULL
);
10221 clients
= dictGetEntryVal(de
);
10222 ln
= listSearchKey(clients
,c
);
10223 assert(ln
!= NULL
);
10224 listDelNode(clients
,ln
);
10225 if (listLength(clients
) == 0) {
10226 /* Free the list and associated hash entry at all if this was
10227 * the latest client, so that it will be possible to abuse
10228 * Redis PUBSUB creating millions of channels. */
10229 dictDelete(server
.pubsub_channels
,channel
);
10232 /* Notify the client */
10234 addReply(c
,shared
.mbulk3
);
10235 addReply(c
,shared
.unsubscribebulk
);
10236 addReplyBulk(c
,channel
);
10237 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+
10238 listLength(c
->pubsub_patterns
));
10241 decrRefCount(channel
); /* it is finally safe to release it */
10245 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */
10246 static int pubsubSubscribePattern(redisClient
*c
, robj
*pattern
) {
10249 if (listSearchKey(c
->pubsub_patterns
,pattern
) == NULL
) {
10251 pubsubPattern
*pat
;
10252 listAddNodeTail(c
->pubsub_patterns
,pattern
);
10253 incrRefCount(pattern
);
10254 pat
= zmalloc(sizeof(*pat
));
10255 pat
->pattern
= getDecodedObject(pattern
);
10257 listAddNodeTail(server
.pubsub_patterns
,pat
);
10259 /* Notify the client */
10260 addReply(c
,shared
.mbulk3
);
10261 addReply(c
,shared
.psubscribebulk
);
10262 addReplyBulk(c
,pattern
);
10263 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
10267 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
10268 * 0 if the client was not subscribed to the specified channel. */
10269 static int pubsubUnsubscribePattern(redisClient
*c
, robj
*pattern
, int notify
) {
10274 incrRefCount(pattern
); /* Protect the object. May be the same we remove */
10275 if ((ln
= listSearchKey(c
->pubsub_patterns
,pattern
)) != NULL
) {
10277 listDelNode(c
->pubsub_patterns
,ln
);
10279 pat
.pattern
= pattern
;
10280 ln
= listSearchKey(server
.pubsub_patterns
,&pat
);
10281 listDelNode(server
.pubsub_patterns
,ln
);
10283 /* Notify the client */
10285 addReply(c
,shared
.mbulk3
);
10286 addReply(c
,shared
.punsubscribebulk
);
10287 addReplyBulk(c
,pattern
);
10288 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+
10289 listLength(c
->pubsub_patterns
));
10291 decrRefCount(pattern
);
10295 /* Unsubscribe from all the channels. Return the number of channels the
10296 * client was subscribed from. */
10297 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
) {
10298 dictIterator
*di
= dictGetIterator(c
->pubsub_channels
);
10302 while((de
= dictNext(di
)) != NULL
) {
10303 robj
*channel
= dictGetEntryKey(de
);
10305 count
+= pubsubUnsubscribeChannel(c
,channel
,notify
);
10307 dictReleaseIterator(di
);
10311 /* Unsubscribe from all the patterns. Return the number of patterns the
10312 * client was subscribed from. */
10313 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
) {
10318 listRewind(c
->pubsub_patterns
,&li
);
10319 while ((ln
= listNext(&li
)) != NULL
) {
10320 robj
*pattern
= ln
->value
;
10322 count
+= pubsubUnsubscribePattern(c
,pattern
,notify
);
10327 /* Publish a message */
10328 static int pubsubPublishMessage(robj
*channel
, robj
*message
) {
10330 struct dictEntry
*de
;
10334 /* Send to clients listening for that channel */
10335 de
= dictFind(server
.pubsub_channels
,channel
);
10337 list
*list
= dictGetEntryVal(de
);
10341 listRewind(list
,&li
);
10342 while ((ln
= listNext(&li
)) != NULL
) {
10343 redisClient
*c
= ln
->value
;
10345 addReply(c
,shared
.mbulk3
);
10346 addReply(c
,shared
.messagebulk
);
10347 addReplyBulk(c
,channel
);
10348 addReplyBulk(c
,message
);
10352 /* Send to clients listening to matching channels */
10353 if (listLength(server
.pubsub_patterns
)) {
10354 listRewind(server
.pubsub_patterns
,&li
);
10355 channel
= getDecodedObject(channel
);
10356 while ((ln
= listNext(&li
)) != NULL
) {
10357 pubsubPattern
*pat
= ln
->value
;
10359 if (stringmatchlen((char*)pat
->pattern
->ptr
,
10360 sdslen(pat
->pattern
->ptr
),
10361 (char*)channel
->ptr
,
10362 sdslen(channel
->ptr
),0)) {
10363 addReply(pat
->client
,shared
.mbulk4
);
10364 addReply(pat
->client
,shared
.pmessagebulk
);
10365 addReplyBulk(pat
->client
,pat
->pattern
);
10366 addReplyBulk(pat
->client
,channel
);
10367 addReplyBulk(pat
->client
,message
);
10371 decrRefCount(channel
);
10376 static void subscribeCommand(redisClient
*c
) {
10379 for (j
= 1; j
< c
->argc
; j
++)
10380 pubsubSubscribeChannel(c
,c
->argv
[j
]);
10383 static void unsubscribeCommand(redisClient
*c
) {
10384 if (c
->argc
== 1) {
10385 pubsubUnsubscribeAllChannels(c
,1);
10390 for (j
= 1; j
< c
->argc
; j
++)
10391 pubsubUnsubscribeChannel(c
,c
->argv
[j
],1);
10395 static void psubscribeCommand(redisClient
*c
) {
10398 for (j
= 1; j
< c
->argc
; j
++)
10399 pubsubSubscribePattern(c
,c
->argv
[j
]);
10402 static void punsubscribeCommand(redisClient
*c
) {
10403 if (c
->argc
== 1) {
10404 pubsubUnsubscribeAllPatterns(c
,1);
10409 for (j
= 1; j
< c
->argc
; j
++)
10410 pubsubUnsubscribePattern(c
,c
->argv
[j
],1);
10414 static void publishCommand(redisClient
*c
) {
10415 int receivers
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]);
10416 addReplyLongLong(c
,receivers
);
10419 /* ===================== WATCH (CAS alike for MULTI/EXEC) ===================
10421 * The implementation uses a per-DB hash table mapping keys to list of clients
10422 * WATCHing those keys, so that given a key that is going to be modified
10423 * we can mark all the associated clients as dirty.
10425 * Also every client contains a list of WATCHed keys so that's possible to
10426 * un-watch such keys when the client is freed or when UNWATCH is called. */
10428 /* In the client->watched_keys list we need to use watchedKey structures
10429 * as in order to identify a key in Redis we need both the key name and the
10431 typedef struct watchedKey
{
10436 /* Watch for the specified key */
10437 static void watchForKey(redisClient
*c
, robj
*key
) {
10438 list
*clients
= NULL
;
10443 /* Check if we are already watching for this key */
10444 listRewind(c
->watched_keys
,&li
);
10445 while((ln
= listNext(&li
))) {
10446 wk
= listNodeValue(ln
);
10447 if (wk
->db
== c
->db
&& equalStringObjects(key
,wk
->key
))
10448 return; /* Key already watched */
10450 /* This key is not already watched in this DB. Let's add it */
10451 clients
= dictFetchValue(c
->db
->watched_keys
,key
);
10453 clients
= listCreate();
10454 dictAdd(c
->db
->watched_keys
,key
,clients
);
10457 listAddNodeTail(clients
,c
);
10458 /* Add the new key to the lits of keys watched by this client */
10459 wk
= zmalloc(sizeof(*wk
));
10463 listAddNodeTail(c
->watched_keys
,wk
);
10466 /* Unwatch all the keys watched by this client. To clean the EXEC dirty
10467 * flag is up to the caller. */
10468 static void unwatchAllKeys(redisClient
*c
) {
10472 if (listLength(c
->watched_keys
) == 0) return;
10473 listRewind(c
->watched_keys
,&li
);
10474 while((ln
= listNext(&li
))) {
10478 /* Lookup the watched key -> clients list and remove the client
10480 wk
= listNodeValue(ln
);
10481 clients
= dictFetchValue(wk
->db
->watched_keys
, wk
->key
);
10482 assert(clients
!= NULL
);
10483 listDelNode(clients
,listSearchKey(clients
,c
));
10484 /* Kill the entry at all if this was the only client */
10485 if (listLength(clients
) == 0)
10486 dictDelete(wk
->db
->watched_keys
, wk
->key
);
10487 /* Remove this watched key from the client->watched list */
10488 listDelNode(c
->watched_keys
,ln
);
10489 decrRefCount(wk
->key
);
10494 /* "Touch" a key, so that if this key is being WATCHed by some client the
10495 * next EXEC will fail. */
10496 static void touchWatchedKey(redisDb
*db
, robj
*key
) {
10501 if (dictSize(db
->watched_keys
) == 0) return;
10502 clients
= dictFetchValue(db
->watched_keys
, key
);
10503 if (!clients
) return;
10505 /* Mark all the clients watching this key as REDIS_DIRTY_CAS */
10506 /* Check if we are already watching for this key */
10507 listRewind(clients
,&li
);
10508 while((ln
= listNext(&li
))) {
10509 redisClient
*c
= listNodeValue(ln
);
10511 c
->flags
|= REDIS_DIRTY_CAS
;
10515 /* On FLUSHDB or FLUSHALL all the watched keys that are present before the
10516 * flush but will be deleted as effect of the flushing operation should
10517 * be touched. "dbid" is the DB that's getting the flush. -1 if it is
10518 * a FLUSHALL operation (all the DBs flushed). */
10519 static void touchWatchedKeysOnFlush(int dbid
) {
10523 /* For every client, check all the waited keys */
10524 listRewind(server
.clients
,&li1
);
10525 while((ln
= listNext(&li1
))) {
10526 redisClient
*c
= listNodeValue(ln
);
10527 listRewind(c
->watched_keys
,&li2
);
10528 while((ln
= listNext(&li2
))) {
10529 watchedKey
*wk
= listNodeValue(ln
);
10531 /* For every watched key matching the specified DB, if the
10532 * key exists, mark the client as dirty, as the key will be
10534 if (dbid
== -1 || wk
->db
->id
== dbid
) {
10535 if (dictFind(wk
->db
->dict
, wk
->key
) != NULL
)
10536 c
->flags
|= REDIS_DIRTY_CAS
;
10542 static void watchCommand(redisClient
*c
) {
10545 if (c
->flags
& REDIS_MULTI
) {
10546 addReplySds(c
,sdsnew("-ERR WATCH inside MULTI is not allowed\r\n"));
10549 for (j
= 1; j
< c
->argc
; j
++)
10550 watchForKey(c
,c
->argv
[j
]);
10551 addReply(c
,shared
.ok
);
10554 static void unwatchCommand(redisClient
*c
) {
10556 c
->flags
&= (~REDIS_DIRTY_CAS
);
10557 addReply(c
,shared
.ok
);
10560 /* ================================= Debugging ============================== */
10562 /* Compute the sha1 of string at 's' with 'len' bytes long.
10563 * The SHA1 is then xored againt the string pointed by digest.
10564 * Since xor is commutative, this operation is used in order to
10565 * "add" digests relative to unordered elements.
10567 * So digest(a,b,c,d) will be the same of digest(b,a,c,d) */
10568 static void xorDigest(unsigned char *digest
, void *ptr
, size_t len
) {
10570 unsigned char hash
[20], *s
= ptr
;
10574 SHA1Update(&ctx
,s
,len
);
10575 SHA1Final(hash
,&ctx
);
10577 for (j
= 0; j
< 20; j
++)
10578 digest
[j
] ^= hash
[j
];
10581 static void xorObjectDigest(unsigned char *digest
, robj
*o
) {
10582 o
= getDecodedObject(o
);
10583 xorDigest(digest
,o
->ptr
,sdslen(o
->ptr
));
10587 /* This function instead of just computing the SHA1 and xoring it
10588 * against diget, also perform the digest of "digest" itself and
10589 * replace the old value with the new one.
10591 * So the final digest will be:
10593 * digest = SHA1(digest xor SHA1(data))
10595 * This function is used every time we want to preserve the order so
10596 * that digest(a,b,c,d) will be different than digest(b,c,d,a)
10598 * Also note that mixdigest("foo") followed by mixdigest("bar")
10599 * will lead to a different digest compared to "fo", "obar".
10601 static void mixDigest(unsigned char *digest
, void *ptr
, size_t len
) {
10605 xorDigest(digest
,s
,len
);
10607 SHA1Update(&ctx
,digest
,20);
10608 SHA1Final(digest
,&ctx
);
10611 static void mixObjectDigest(unsigned char *digest
, robj
*o
) {
10612 o
= getDecodedObject(o
);
10613 mixDigest(digest
,o
->ptr
,sdslen(o
->ptr
));
10617 /* Compute the dataset digest. Since keys, sets elements, hashes elements
10618 * are not ordered, we use a trick: every aggregate digest is the xor
10619 * of the digests of their elements. This way the order will not change
10620 * the result. For list instead we use a feedback entering the output digest
10621 * as input in order to ensure that a different ordered list will result in
10622 * a different digest. */
10623 static void computeDatasetDigest(unsigned char *final
) {
10624 unsigned char digest
[20];
10626 dictIterator
*di
= NULL
;
10631 memset(final
,0,20); /* Start with a clean result */
10633 for (j
= 0; j
< server
.dbnum
; j
++) {
10634 redisDb
*db
= server
.db
+j
;
10636 if (dictSize(db
->dict
) == 0) continue;
10637 di
= dictGetIterator(db
->dict
);
10639 /* hash the DB id, so the same dataset moved in a different
10640 * DB will lead to a different digest */
10642 mixDigest(final
,&aux
,sizeof(aux
));
10644 /* Iterate this DB writing every entry */
10645 while((de
= dictNext(di
)) != NULL
) {
10646 robj
*key
, *o
, *kcopy
;
10649 memset(digest
,0,20); /* This key-val digest */
10650 key
= dictGetEntryKey(de
);
10652 if (!server
.vm_enabled
) {
10653 mixObjectDigest(digest
,key
);
10654 o
= dictGetEntryVal(de
);
10656 /* Don't work with the key directly as when VM is active
10657 * this is unsafe: TODO: fix decrRefCount to check if the
10658 * count really reached 0 to avoid this mess */
10659 kcopy
= dupStringObject(key
);
10660 mixObjectDigest(digest
,kcopy
);
10661 o
= lookupKeyRead(db
,kcopy
);
10662 decrRefCount(kcopy
);
10664 aux
= htonl(o
->type
);
10665 mixDigest(digest
,&aux
,sizeof(aux
));
10666 expiretime
= getExpire(db
,key
);
10668 /* Save the key and associated value */
10669 if (o
->type
== REDIS_STRING
) {
10670 mixObjectDigest(digest
,o
);
10671 } else if (o
->type
== REDIS_LIST
) {
10672 list
*list
= o
->ptr
;
10676 listRewind(list
,&li
);
10677 while((ln
= listNext(&li
))) {
10678 robj
*eleobj
= listNodeValue(ln
);
10680 mixObjectDigest(digest
,eleobj
);
10682 } else if (o
->type
== REDIS_SET
) {
10683 dict
*set
= o
->ptr
;
10684 dictIterator
*di
= dictGetIterator(set
);
10687 while((de
= dictNext(di
)) != NULL
) {
10688 robj
*eleobj
= dictGetEntryKey(de
);
10690 xorObjectDigest(digest
,eleobj
);
10692 dictReleaseIterator(di
);
10693 } else if (o
->type
== REDIS_ZSET
) {
10695 dictIterator
*di
= dictGetIterator(zs
->dict
);
10698 while((de
= dictNext(di
)) != NULL
) {
10699 robj
*eleobj
= dictGetEntryKey(de
);
10700 double *score
= dictGetEntryVal(de
);
10701 unsigned char eledigest
[20];
10703 snprintf(buf
,sizeof(buf
),"%.17g",*score
);
10704 memset(eledigest
,0,20);
10705 mixObjectDigest(eledigest
,eleobj
);
10706 mixDigest(eledigest
,buf
,strlen(buf
));
10707 xorDigest(digest
,eledigest
,20);
10709 dictReleaseIterator(di
);
10710 } else if (o
->type
== REDIS_HASH
) {
10714 hi
= hashInitIterator(o
);
10715 while (hashNext(hi
) != REDIS_ERR
) {
10716 unsigned char eledigest
[20];
10718 memset(eledigest
,0,20);
10719 obj
= hashCurrent(hi
,REDIS_HASH_KEY
);
10720 mixObjectDigest(eledigest
,obj
);
10722 obj
= hashCurrent(hi
,REDIS_HASH_VALUE
);
10723 mixObjectDigest(eledigest
,obj
);
10725 xorDigest(digest
,eledigest
,20);
10727 hashReleaseIterator(hi
);
10729 redisPanic("Unknown object type");
10731 /* If the key has an expire, add it to the mix */
10732 if (expiretime
!= -1) xorDigest(digest
,"!!expire!!",10);
10733 /* We can finally xor the key-val digest to the final digest */
10734 xorDigest(final
,digest
,20);
10736 dictReleaseIterator(di
);
10740 static void debugCommand(redisClient
*c
) {
10741 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
10742 *((char*)-1) = 'x';
10743 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
10744 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
10745 addReply(c
,shared
.err
);
10749 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
10750 addReply(c
,shared
.err
);
10753 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
10754 addReply(c
,shared
.ok
);
10755 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
10757 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
10758 addReply(c
,shared
.err
);
10761 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
10762 addReply(c
,shared
.ok
);
10763 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
10764 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
10768 addReply(c
,shared
.nokeyerr
);
10771 key
= dictGetEntryKey(de
);
10772 val
= dictGetEntryVal(de
);
10773 if (!server
.vm_enabled
|| (key
->storage
== REDIS_VM_MEMORY
||
10774 key
->storage
== REDIS_VM_SWAPPING
)) {
10778 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
10779 strenc
= strencoding
[val
->encoding
];
10781 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
10784 addReplySds(c
,sdscatprintf(sdsempty(),
10785 "+Key at:%p refcount:%d, value at:%p refcount:%d "
10786 "encoding:%s serializedlength:%lld\r\n",
10787 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
,
10788 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
10790 addReplySds(c
,sdscatprintf(sdsempty(),
10791 "+Key at:%p refcount:%d, value swapped at: page %llu "
10792 "using %llu pages\r\n",
10793 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
,
10794 (unsigned long long) key
->vm
.usedpages
));
10796 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapin") && c
->argc
== 3) {
10797 lookupKeyRead(c
->db
,c
->argv
[2]);
10798 addReply(c
,shared
.ok
);
10799 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
10800 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
10803 if (!server
.vm_enabled
) {
10804 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
10808 addReply(c
,shared
.nokeyerr
);
10811 key
= dictGetEntryKey(de
);
10812 val
= dictGetEntryVal(de
);
10813 /* If the key is shared we want to create a copy */
10814 if (key
->refcount
> 1) {
10815 robj
*newkey
= dupStringObject(key
);
10817 key
= dictGetEntryKey(de
) = newkey
;
10820 if (key
->storage
!= REDIS_VM_MEMORY
) {
10821 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
10822 } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
10823 dictGetEntryVal(de
) = NULL
;
10824 addReply(c
,shared
.ok
);
10826 addReply(c
,shared
.err
);
10828 } else if (!strcasecmp(c
->argv
[1]->ptr
,"populate") && c
->argc
== 3) {
10833 if (getLongFromObjectOrReply(c
, c
->argv
[2], &keys
, NULL
) != REDIS_OK
)
10835 for (j
= 0; j
< keys
; j
++) {
10836 snprintf(buf
,sizeof(buf
),"key:%lu",j
);
10837 key
= createStringObject(buf
,strlen(buf
));
10838 if (lookupKeyRead(c
->db
,key
) != NULL
) {
10842 snprintf(buf
,sizeof(buf
),"value:%lu",j
);
10843 val
= createStringObject(buf
,strlen(buf
));
10844 dictAdd(c
->db
->dict
,key
,val
);
10846 addReply(c
,shared
.ok
);
10847 } else if (!strcasecmp(c
->argv
[1]->ptr
,"digest") && c
->argc
== 2) {
10848 unsigned char digest
[20];
10849 sds d
= sdsnew("+");
10852 computeDatasetDigest(digest
);
10853 for (j
= 0; j
< 20; j
++)
10854 d
= sdscatprintf(d
, "%02x",digest
[j
]);
10856 d
= sdscatlen(d
,"\r\n",2);
10859 addReplySds(c
,sdsnew(
10860 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n"));
10864 static void _redisAssert(char *estr
, char *file
, int line
) {
10865 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
10866 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true",file
,line
,estr
);
10867 #ifdef HAVE_BACKTRACE
10868 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
10869 *((char*)-1) = 'x';
10873 static void _redisPanic(char *msg
, char *file
, int line
) {
10874 redisLog(REDIS_WARNING
,"!!! Software Failure. Press left mouse button to continue");
10875 redisLog(REDIS_WARNING
,"Guru Meditation: %s #%s:%d",msg
,file
,line
);
10876 #ifdef HAVE_BACKTRACE
10877 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
10878 *((char*)-1) = 'x';
10882 /* =================================== Main! ================================ */
10885 int linuxOvercommitMemoryValue(void) {
10886 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
10889 if (!fp
) return -1;
10890 if (fgets(buf
,64,fp
) == NULL
) {
10899 void linuxOvercommitMemoryWarning(void) {
10900 if (linuxOvercommitMemoryValue() == 0) {
10901 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
10904 #endif /* __linux__ */
10906 static void daemonize(void) {
10910 if (fork() != 0) exit(0); /* parent exits */
10911 setsid(); /* create a new session */
10913 /* Every output goes to /dev/null. If Redis is daemonized but
10914 * the 'logfile' is set to 'stdout' in the configuration file
10915 * it will not log at all. */
10916 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
10917 dup2(fd
, STDIN_FILENO
);
10918 dup2(fd
, STDOUT_FILENO
);
10919 dup2(fd
, STDERR_FILENO
);
10920 if (fd
> STDERR_FILENO
) close(fd
);
10922 /* Try to write the pid file */
10923 fp
= fopen(server
.pidfile
,"w");
10925 fprintf(fp
,"%d\n",getpid());
10930 static void version() {
10931 printf("Redis server version %s (%s:%d)\n", REDIS_VERSION
,
10932 REDIS_GIT_SHA1
, atoi(REDIS_GIT_DIRTY
) > 0);
10936 static void usage() {
10937 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
10938 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
10942 int main(int argc
, char **argv
) {
10945 initServerConfig();
10947 if (strcmp(argv
[1], "-v") == 0 ||
10948 strcmp(argv
[1], "--version") == 0) version();
10949 if (strcmp(argv
[1], "--help") == 0) usage();
10950 resetServerSaveParams();
10951 loadServerConfig(argv
[1]);
10952 } else if ((argc
> 2)) {
10955 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
10957 if (server
.daemonize
) daemonize();
10959 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
10961 linuxOvercommitMemoryWarning();
10963 start
= time(NULL
);
10964 if (server
.appendonly
) {
10965 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
10966 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
10968 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
10969 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
10971 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
10972 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
10974 aeDeleteEventLoop(server
.el
);
10978 /* ============================= Backtrace support ========================= */
10980 #ifdef HAVE_BACKTRACE
10981 static char *findFuncName(void *pointer
, unsigned long *offset
);
10983 static void *getMcontextEip(ucontext_t
*uc
) {
10984 #if defined(__FreeBSD__)
10985 return (void*) uc
->uc_mcontext
.mc_eip
;
10986 #elif defined(__dietlibc__)
10987 return (void*) uc
->uc_mcontext
.eip
;
10988 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
10990 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
10992 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
10994 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
10995 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
10996 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
10998 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
11000 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
11001 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
11002 #elif defined(__ia64__) /* Linux IA64 */
11003 return (void*) uc
->uc_mcontext
.sc_ip
;
11009 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
11011 char **messages
= NULL
;
11012 int i
, trace_size
= 0;
11013 unsigned long offset
=0;
11014 ucontext_t
*uc
= (ucontext_t
*) secret
;
11016 REDIS_NOTUSED(info
);
11018 redisLog(REDIS_WARNING
,
11019 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
11020 infostring
= genRedisInfoString();
11021 redisLog(REDIS_WARNING
, "%s",infostring
);
11022 /* It's not safe to sdsfree() the returned string under memory
11023 * corruption conditions. Let it leak as we are going to abort */
11025 trace_size
= backtrace(trace
, 100);
11026 /* overwrite sigaction with caller's address */
11027 if (getMcontextEip(uc
) != NULL
) {
11028 trace
[1] = getMcontextEip(uc
);
11030 messages
= backtrace_symbols(trace
, trace_size
);
11032 for (i
=1; i
<trace_size
; ++i
) {
11033 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
11035 p
= strchr(messages
[i
],'+');
11036 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
11037 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
11039 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
11042 /* free(messages); Don't call free() with possibly corrupted memory. */
11046 static void sigtermHandler(int sig
) {
11047 REDIS_NOTUSED(sig
);
11049 redisLog(REDIS_WARNING
,"SIGTERM received, scheduling shutting down...");
11050 server
.shutdown_asap
= 1;
11053 static void setupSigSegvAction(void) {
11054 struct sigaction act
;
11056 sigemptyset (&act
.sa_mask
);
11057 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
11058 * is used. Otherwise, sa_handler is used */
11059 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
11060 act
.sa_sigaction
= segvHandler
;
11061 sigaction (SIGSEGV
, &act
, NULL
);
11062 sigaction (SIGBUS
, &act
, NULL
);
11063 sigaction (SIGFPE
, &act
, NULL
);
11064 sigaction (SIGILL
, &act
, NULL
);
11065 sigaction (SIGBUS
, &act
, NULL
);
11067 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
;
11068 act
.sa_handler
= sigtermHandler
;
11069 sigaction (SIGTERM
, &act
, NULL
);
11073 #include "staticsymbols.h"
11074 /* This function try to convert a pointer into a function name. It's used in
11075 * oreder to provide a backtrace under segmentation fault that's able to
11076 * display functions declared as static (otherwise the backtrace is useless). */
11077 static char *findFuncName(void *pointer
, unsigned long *offset
){
11079 unsigned long off
, minoff
= 0;
11081 /* Try to match against the Symbol with the smallest offset */
11082 for (i
=0; symsTable
[i
].pointer
; i
++) {
11083 unsigned long lp
= (unsigned long) pointer
;
11085 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
11086 off
=lp
-symsTable
[i
].pointer
;
11087 if (ret
< 0 || off
< minoff
) {
11093 if (ret
== -1) return NULL
;
11095 return symsTable
[ret
].name
;
11097 #else /* HAVE_BACKTRACE */
11098 static void setupSigSegvAction(void) {
11100 #endif /* HAVE_BACKTRACE */