2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "2.1.1"
45 #endif /* HAVE_BACKTRACE */
53 #include <arpa/inet.h>
57 #include <sys/resource.h>
65 #include "solarisfixes.h"
69 #include "ae.h" /* Event driven programming library */
70 #include "sds.h" /* Dynamic safe strings */
71 #include "anet.h" /* Networking the easy way */
72 #include "dict.h" /* Hash tables */
73 #include "adlist.h" /* Linked lists */
74 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
75 #include "lzf.h" /* LZF compression library */
76 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
77 #include "zipmap.h" /* Compact dictionary-alike data structure */
78 #include "sha1.h" /* SHA1 is used for DEBUG DIGEST */
79 #include "release.h" /* Release and/or git repository information */
85 /* Static server configuration */
86 #define REDIS_SERVERPORT 6379 /* TCP port */
87 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
88 #define REDIS_IOBUF_LEN 1024
89 #define REDIS_LOADBUF_LEN 1024
90 #define REDIS_STATIC_ARGS 8
91 #define REDIS_DEFAULT_DBNUM 16
92 #define REDIS_CONFIGLINE_MAX 1024
93 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
94 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
95 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* lookup 10 expires per loop */
96 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
97 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
99 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
100 #define REDIS_WRITEV_THRESHOLD 3
101 /* Max number of iovecs used for each writev call */
102 #define REDIS_WRITEV_IOVEC_COUNT 256
104 /* Hash table parameters */
105 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
108 #define REDIS_CMD_BULK 1 /* Bulk write command */
109 #define REDIS_CMD_INLINE 2 /* Inline command */
110 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
111 this flags will return an error when the 'maxmemory' option is set in the
112 config file and the server is using more than maxmemory bytes of memory.
113 In short this commands are denied on low memory conditions. */
114 #define REDIS_CMD_DENYOOM 4
115 #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */
118 #define REDIS_STRING 0
124 /* Objects encoding. Some kind of objects like Strings and Hashes can be
125 * internally represented in multiple ways. The 'encoding' field of the object
126 * is set to one of this fields for this object. */
127 #define REDIS_ENCODING_RAW 0 /* Raw representation */
128 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
129 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
130 #define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
132 static char* strencoding
[] = {
133 "raw", "int", "zipmap", "hashtable"
136 /* Object types only used for dumping to disk */
137 #define REDIS_EXPIRETIME 253
138 #define REDIS_SELECTDB 254
139 #define REDIS_EOF 255
141 /* Defines related to the dump file format. To store 32 bits lengths for short
142 * keys requires a lot of space, so we check the most significant 2 bits of
143 * the first byte to interpreter the length:
145 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
146 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
147 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
148 * 11|000000 this means: specially encoded object will follow. The six bits
149 * number specify the kind of object that follows.
150 * See the REDIS_RDB_ENC_* defines.
152 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
153 * values, will fit inside. */
154 #define REDIS_RDB_6BITLEN 0
155 #define REDIS_RDB_14BITLEN 1
156 #define REDIS_RDB_32BITLEN 2
157 #define REDIS_RDB_ENCVAL 3
158 #define REDIS_RDB_LENERR UINT_MAX
160 /* When a length of a string object stored on disk has the first two bits
161 * set, the remaining two bits specify a special encoding for the object
162 * accordingly to the following defines: */
163 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
164 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
165 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
166 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
168 /* Virtual memory object->where field. */
169 #define REDIS_VM_MEMORY 0 /* The object is on memory */
170 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
171 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
172 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
174 /* Virtual memory static configuration stuff.
175 * Check vmFindContiguousPages() to know more about this magic numbers. */
176 #define REDIS_VM_MAX_NEAR_PAGES 65536
177 #define REDIS_VM_MAX_RANDOM_JUMP 4096
178 #define REDIS_VM_MAX_THREADS 32
179 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
180 /* The following is the *percentage* of completed I/O jobs to process when the
181 * handelr is called. While Virtual Memory I/O operations are performed by
182 * threads, this operations must be processed by the main thread when completed
183 * in order to take effect. */
184 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
187 #define REDIS_SLAVE 1 /* This client is a slave server */
188 #define REDIS_MASTER 2 /* This client is a master server */
189 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
190 #define REDIS_MULTI 8 /* This client is in a MULTI context */
191 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
192 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
193 #define REDIS_DIRTY_CAS 64 /* Watched keys modified. EXEC will fail. */
195 /* Slave replication state - slave side */
196 #define REDIS_REPL_NONE 0 /* No active replication */
197 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
198 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
200 /* Slave replication state - from the point of view of master
201 * Note that in SEND_BULK and ONLINE state the slave receives new updates
202 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
203 * to start the next background saving in order to send updates to it. */
204 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
205 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
206 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
207 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
209 /* List related stuff */
213 /* Sort operations */
214 #define REDIS_SORT_GET 0
215 #define REDIS_SORT_ASC 1
216 #define REDIS_SORT_DESC 2
217 #define REDIS_SORTKEY_MAX 1024
220 #define REDIS_DEBUG 0
221 #define REDIS_VERBOSE 1
222 #define REDIS_NOTICE 2
223 #define REDIS_WARNING 3
225 /* Anti-warning macro... */
226 #define REDIS_NOTUSED(V) ((void) V)
228 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
229 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
231 /* Append only defines */
232 #define APPENDFSYNC_NO 0
233 #define APPENDFSYNC_ALWAYS 1
234 #define APPENDFSYNC_EVERYSEC 2
236 /* Hashes related defaults */
237 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
238 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
240 /* We can print the stacktrace, so our assert is defined this way: */
241 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
242 #define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1)
243 static void _redisAssert(char *estr
, char *file
, int line
);
244 static void _redisPanic(char *msg
, char *file
, int line
);
246 /*================================= Data types ============================== */
248 /* A redis object, that is a type able to hold a string / list / set */
250 /* The VM object structure */
251 struct redisObjectVM
{
252 off_t page
; /* the page at witch the object is stored on disk */
253 off_t usedpages
; /* number of pages used on disk */
254 time_t atime
; /* Last access time */
257 /* The actual Redis Object */
258 typedef struct redisObject
{
261 unsigned char encoding
;
262 unsigned char storage
; /* If this object is a key, where is the value?
263 * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
264 unsigned char vtype
; /* If this object is a key, and value is swapped out,
265 * this is the type of the swapped out object. */
267 /* VM fields, this are only allocated if VM is active, otherwise the
268 * object allocation function will just allocate
269 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
270 * Redis without VM active will not have any overhead. */
271 struct redisObjectVM vm
;
274 /* Macro used to initalize a Redis object allocated on the stack.
275 * Note that this macro is taken near the structure definition to make sure
276 * we'll update it when the structure is changed, to avoid bugs like
277 * bug #85 introduced exactly in this way. */
278 #define initStaticStringObject(_var,_ptr) do { \
280 _var.type = REDIS_STRING; \
281 _var.encoding = REDIS_ENCODING_RAW; \
283 if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \
286 typedef struct redisDb
{
287 dict
*dict
; /* The keyspace for this DB */
288 dict
*expires
; /* Timeout of keys with a timeout set */
289 dict
*blocking_keys
; /* Keys with clients waiting for data (BLPOP) */
290 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
291 dict
*watched_keys
; /* WATCHED keys for MULTI/EXEC CAS */
295 /* Client MULTI/EXEC state */
296 typedef struct multiCmd
{
299 struct redisCommand
*cmd
;
302 typedef struct multiState
{
303 multiCmd
*commands
; /* Array of MULTI commands */
304 int count
; /* Total number of MULTI commands */
307 /* With multiplexing we need to take per-clinet state.
308 * Clients are taken in a liked list. */
309 typedef struct redisClient
{
314 robj
**argv
, **mbargv
;
316 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
317 int multibulk
; /* multi bulk command format active */
320 time_t lastinteraction
; /* time of the last interaction, used for timeout */
321 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
322 int slaveseldb
; /* slave selected db, if this client is a slave */
323 int authenticated
; /* when requirepass is non-NULL */
324 int replstate
; /* replication state if this is a slave */
325 int repldbfd
; /* replication DB file descriptor */
326 long repldboff
; /* replication DB file offset */
327 off_t repldbsize
; /* replication DB file size */
328 multiState mstate
; /* MULTI/EXEC state */
329 robj
**blocking_keys
; /* The key we are waiting to terminate a blocking
330 * operation such as BLPOP. Otherwise NULL. */
331 int blocking_keys_num
; /* Number of blocking keys */
332 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
333 * is >= blockingto then the operation timed out. */
334 list
*io_keys
; /* Keys this client is waiting to be loaded from the
335 * swap file in order to continue. */
336 list
*watched_keys
; /* Keys WATCHED for MULTI/EXEC CAS */
337 dict
*pubsub_channels
; /* channels a client is interested in (SUBSCRIBE) */
338 list
*pubsub_patterns
; /* patterns a client is interested in (SUBSCRIBE) */
346 /* Global server state structure */
351 long long dirty
; /* changes to DB from the last save */
353 list
*slaves
, *monitors
;
354 char neterr
[ANET_ERR_LEN
];
356 int cronloops
; /* number of times the cron function run */
357 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
358 time_t lastsave
; /* Unix time of last save succeeede */
359 /* Fields used only for stats */
360 time_t stat_starttime
; /* server start time */
361 long long stat_numcommands
; /* number of processed commands */
362 long long stat_numconnections
; /* number of connections received */
363 long long stat_expiredkeys
; /* number of expired keys */
377 pid_t bgsavechildpid
;
378 pid_t bgrewritechildpid
;
379 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
380 sds aofbuf
; /* AOF buffer, written before entering the event loop */
381 struct saveparam
*saveparams
;
386 char *appendfilename
;
390 /* Replication related */
395 redisClient
*master
; /* client that is master for this slave */
397 unsigned int maxclients
;
398 unsigned long long maxmemory
;
399 unsigned int blpop_blocked_clients
;
400 unsigned int vm_blocked_clients
;
401 /* Sort parameters - qsort_r() is only available under BSD so we
402 * have to take this state global, in order to pass it to sortCompare() */
406 /* Virtual memory configuration */
411 unsigned long long vm_max_memory
;
413 size_t hash_max_zipmap_entries
;
414 size_t hash_max_zipmap_value
;
415 /* Virtual memory state */
418 off_t vm_next_page
; /* Next probably empty page */
419 off_t vm_near_pages
; /* Number of pages allocated sequentially */
420 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
421 time_t unixtime
; /* Unix time sampled every second. */
422 /* Virtual memory I/O threads stuff */
423 /* An I/O thread process an element taken from the io_jobs queue and
424 * put the result of the operation in the io_done list. While the
425 * job is being processed, it's put on io_processing queue. */
426 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
427 list
*io_processing
; /* List of VM I/O jobs being processed */
428 list
*io_processed
; /* List of VM I/O jobs already processed */
429 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
430 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
431 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
432 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
433 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
434 int io_active_threads
; /* Number of running I/O threads */
435 int vm_max_threads
; /* Max number of I/O threads running at the same time */
436 /* Our main thread is blocked on the event loop, locking for sockets ready
437 * to be read or written, so when a threaded I/O operation is ready to be
438 * processed by the main thread, the I/O thread will use a unix pipe to
439 * awake the main thread. The followings are the two pipe FDs. */
440 int io_ready_pipe_read
;
441 int io_ready_pipe_write
;
442 /* Virtual memory stats */
443 unsigned long long vm_stats_used_pages
;
444 unsigned long long vm_stats_swapped_objects
;
445 unsigned long long vm_stats_swapouts
;
446 unsigned long long vm_stats_swapins
;
448 dict
*pubsub_channels
; /* Map channels to list of subscribed clients */
449 list
*pubsub_patterns
; /* A list of pubsub_patterns */
454 typedef struct pubsubPattern
{
459 typedef void redisCommandProc(redisClient
*c
);
460 typedef void redisVmPreloadProc(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
461 struct redisCommand
{
463 redisCommandProc
*proc
;
466 /* Use a function to determine which keys need to be loaded
467 * in the background prior to executing this command. Takes precedence
468 * over vm_firstkey and others, ignored when NULL */
469 redisVmPreloadProc
*vm_preload_proc
;
470 /* What keys should be loaded in background when calling this command? */
471 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
472 int vm_lastkey
; /* THe last argument that's a key */
473 int vm_keystep
; /* The step between first and last key */
476 struct redisFunctionSym
{
478 unsigned long pointer
;
481 typedef struct _redisSortObject
{
489 typedef struct _redisSortOperation
{
492 } redisSortOperation
;
494 /* ZSETs use a specialized version of Skiplists */
496 typedef struct zskiplistNode
{
497 struct zskiplistNode
**forward
;
498 struct zskiplistNode
*backward
;
504 typedef struct zskiplist
{
505 struct zskiplistNode
*header
, *tail
;
506 unsigned long length
;
510 typedef struct zset
{
515 /* Our shared "common" objects */
517 #define REDIS_SHARED_INTEGERS 10000
518 struct sharedObjectsStruct
{
519 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
520 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
521 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
522 *outofrangeerr
, *plus
,
523 *select0
, *select1
, *select2
, *select3
, *select4
,
524 *select5
, *select6
, *select7
, *select8
, *select9
,
525 *messagebulk
, *pmessagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
,
526 *mbulk4
, *psubscribebulk
, *punsubscribebulk
,
527 *integers
[REDIS_SHARED_INTEGERS
];
530 /* Global vars that are actally used as constants. The following double
531 * values are used for double on-disk serialization, and are initialized
532 * at runtime to avoid strange compiler optimizations. */
534 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
536 /* VM threaded I/O request message */
537 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
538 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
539 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
540 typedef struct iojob
{
541 int type
; /* Request type, REDIS_IOJOB_* */
542 redisDb
*db
;/* Redis database */
543 robj
*key
; /* This I/O request is about swapping this key */
544 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
545 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
546 off_t page
; /* Swap page where to read/write the object */
547 off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */
548 int canceled
; /* True if this command was canceled by blocking side of VM */
549 pthread_t thread
; /* ID of the thread processing this entry */
552 /*================================ Prototypes =============================== */
554 static void freeStringObject(robj
*o
);
555 static void freeListObject(robj
*o
);
556 static void freeSetObject(robj
*o
);
557 static void decrRefCount(void *o
);
558 static robj
*createObject(int type
, void *ptr
);
559 static void freeClient(redisClient
*c
);
560 static int rdbLoad(char *filename
);
561 static void addReply(redisClient
*c
, robj
*obj
);
562 static void addReplySds(redisClient
*c
, sds s
);
563 static void incrRefCount(robj
*o
);
564 static int rdbSaveBackground(char *filename
);
565 static robj
*createStringObject(char *ptr
, size_t len
);
566 static robj
*dupStringObject(robj
*o
);
567 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
);
568 static void replicationFeedMonitors(list
*monitors
, int dictid
, robj
**argv
, int argc
);
569 static void flushAppendOnlyFile(void);
570 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
571 static int syncWithMaster(void);
572 static robj
*tryObjectEncoding(robj
*o
);
573 static robj
*getDecodedObject(robj
*o
);
574 static int removeExpire(redisDb
*db
, robj
*key
);
575 static int expireIfNeeded(redisDb
*db
, robj
*key
);
576 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
577 static int deleteIfSwapped(redisDb
*db
, robj
*key
);
578 static int deleteKey(redisDb
*db
, robj
*key
);
579 static time_t getExpire(redisDb
*db
, robj
*key
);
580 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
581 static void updateSlavesWaitingBgsave(int bgsaveerr
);
582 static void freeMemoryIfNeeded(void);
583 static int processCommand(redisClient
*c
);
584 static void setupSigSegvAction(void);
585 static void rdbRemoveTempFile(pid_t childpid
);
586 static void aofRemoveTempFile(pid_t childpid
);
587 static size_t stringObjectLen(robj
*o
);
588 static void processInputBuffer(redisClient
*c
);
589 static zskiplist
*zslCreate(void);
590 static void zslFree(zskiplist
*zsl
);
591 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
592 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
593 static void initClientMultiState(redisClient
*c
);
594 static void freeClientMultiState(redisClient
*c
);
595 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
596 static void unblockClientWaitingData(redisClient
*c
);
597 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
598 static void vmInit(void);
599 static void vmMarkPagesFree(off_t page
, off_t count
);
600 static robj
*vmLoadObject(robj
*key
);
601 static robj
*vmPreviewObject(robj
*key
);
602 static int vmSwapOneObjectBlocking(void);
603 static int vmSwapOneObjectThreaded(void);
604 static int vmCanSwapOut(void);
605 static int tryFreeOneObjectFromFreelist(void);
606 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
607 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
608 static void vmCancelThreadedIOJob(robj
*o
);
609 static void lockThreadedIO(void);
610 static void unlockThreadedIO(void);
611 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
612 static void freeIOJob(iojob
*j
);
613 static void queueIOJob(iojob
*j
);
614 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
615 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
616 static void waitEmptyIOJobsQueue(void);
617 static void vmReopenSwapFile(void);
618 static int vmFreePage(off_t page
);
619 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
620 static void execBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
621 static int blockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
);
622 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
623 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
624 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
625 static struct redisCommand
*lookupCommand(char *name
);
626 static void call(redisClient
*c
, struct redisCommand
*cmd
);
627 static void resetClient(redisClient
*c
);
628 static void convertToRealHash(robj
*o
);
629 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
);
630 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
);
631 static void freePubsubPattern(void *p
);
632 static int listMatchPubsubPattern(void *a
, void *b
);
633 static int compareStringObjects(robj
*a
, robj
*b
);
634 static int equalStringObjects(robj
*a
, robj
*b
);
636 static int rewriteAppendOnlyFileBackground(void);
637 static int vmSwapObjectBlocking(robj
*key
, robj
*val
);
638 static int prepareForShutdown();
639 static void touchWatchedKey(redisDb
*db
, robj
*key
);
640 static void touchWatchedKeysOnFlush(int dbid
);
641 static void unwatchAllKeys(redisClient
*c
);
643 static void authCommand(redisClient
*c
);
644 static void pingCommand(redisClient
*c
);
645 static void echoCommand(redisClient
*c
);
646 static void setCommand(redisClient
*c
);
647 static void setnxCommand(redisClient
*c
);
648 static void setexCommand(redisClient
*c
);
649 static void getCommand(redisClient
*c
);
650 static void delCommand(redisClient
*c
);
651 static void existsCommand(redisClient
*c
);
652 static void incrCommand(redisClient
*c
);
653 static void decrCommand(redisClient
*c
);
654 static void incrbyCommand(redisClient
*c
);
655 static void decrbyCommand(redisClient
*c
);
656 static void selectCommand(redisClient
*c
);
657 static void randomkeyCommand(redisClient
*c
);
658 static void keysCommand(redisClient
*c
);
659 static void dbsizeCommand(redisClient
*c
);
660 static void lastsaveCommand(redisClient
*c
);
661 static void saveCommand(redisClient
*c
);
662 static void bgsaveCommand(redisClient
*c
);
663 static void bgrewriteaofCommand(redisClient
*c
);
664 static void shutdownCommand(redisClient
*c
);
665 static void moveCommand(redisClient
*c
);
666 static void renameCommand(redisClient
*c
);
667 static void renamenxCommand(redisClient
*c
);
668 static void lpushCommand(redisClient
*c
);
669 static void rpushCommand(redisClient
*c
);
670 static void lpopCommand(redisClient
*c
);
671 static void rpopCommand(redisClient
*c
);
672 static void llenCommand(redisClient
*c
);
673 static void lindexCommand(redisClient
*c
);
674 static void lrangeCommand(redisClient
*c
);
675 static void ltrimCommand(redisClient
*c
);
676 static void typeCommand(redisClient
*c
);
677 static void lsetCommand(redisClient
*c
);
678 static void saddCommand(redisClient
*c
);
679 static void sremCommand(redisClient
*c
);
680 static void smoveCommand(redisClient
*c
);
681 static void sismemberCommand(redisClient
*c
);
682 static void scardCommand(redisClient
*c
);
683 static void spopCommand(redisClient
*c
);
684 static void srandmemberCommand(redisClient
*c
);
685 static void sinterCommand(redisClient
*c
);
686 static void sinterstoreCommand(redisClient
*c
);
687 static void sunionCommand(redisClient
*c
);
688 static void sunionstoreCommand(redisClient
*c
);
689 static void sdiffCommand(redisClient
*c
);
690 static void sdiffstoreCommand(redisClient
*c
);
691 static void syncCommand(redisClient
*c
);
692 static void flushdbCommand(redisClient
*c
);
693 static void flushallCommand(redisClient
*c
);
694 static void sortCommand(redisClient
*c
);
695 static void lremCommand(redisClient
*c
);
696 static void rpoplpushcommand(redisClient
*c
);
697 static void infoCommand(redisClient
*c
);
698 static void mgetCommand(redisClient
*c
);
699 static void monitorCommand(redisClient
*c
);
700 static void expireCommand(redisClient
*c
);
701 static void expireatCommand(redisClient
*c
);
702 static void getsetCommand(redisClient
*c
);
703 static void ttlCommand(redisClient
*c
);
704 static void slaveofCommand(redisClient
*c
);
705 static void debugCommand(redisClient
*c
);
706 static void msetCommand(redisClient
*c
);
707 static void msetnxCommand(redisClient
*c
);
708 static void zaddCommand(redisClient
*c
);
709 static void zincrbyCommand(redisClient
*c
);
710 static void zrangeCommand(redisClient
*c
);
711 static void zrangebyscoreCommand(redisClient
*c
);
712 static void zcountCommand(redisClient
*c
);
713 static void zrevrangeCommand(redisClient
*c
);
714 static void zcardCommand(redisClient
*c
);
715 static void zremCommand(redisClient
*c
);
716 static void zscoreCommand(redisClient
*c
);
717 static void zremrangebyscoreCommand(redisClient
*c
);
718 static void multiCommand(redisClient
*c
);
719 static void execCommand(redisClient
*c
);
720 static void discardCommand(redisClient
*c
);
721 static void blpopCommand(redisClient
*c
);
722 static void brpopCommand(redisClient
*c
);
723 static void appendCommand(redisClient
*c
);
724 static void substrCommand(redisClient
*c
);
725 static void zrankCommand(redisClient
*c
);
726 static void zrevrankCommand(redisClient
*c
);
727 static void hsetCommand(redisClient
*c
);
728 static void hsetnxCommand(redisClient
*c
);
729 static void hgetCommand(redisClient
*c
);
730 static void hmsetCommand(redisClient
*c
);
731 static void hmgetCommand(redisClient
*c
);
732 static void hdelCommand(redisClient
*c
);
733 static void hlenCommand(redisClient
*c
);
734 static void zremrangebyrankCommand(redisClient
*c
);
735 static void zunionstoreCommand(redisClient
*c
);
736 static void zinterstoreCommand(redisClient
*c
);
737 static void hkeysCommand(redisClient
*c
);
738 static void hvalsCommand(redisClient
*c
);
739 static void hgetallCommand(redisClient
*c
);
740 static void hexistsCommand(redisClient
*c
);
741 static void configCommand(redisClient
*c
);
742 static void hincrbyCommand(redisClient
*c
);
743 static void subscribeCommand(redisClient
*c
);
744 static void unsubscribeCommand(redisClient
*c
);
745 static void psubscribeCommand(redisClient
*c
);
746 static void punsubscribeCommand(redisClient
*c
);
747 static void publishCommand(redisClient
*c
);
748 static void watchCommand(redisClient
*c
);
749 static void unwatchCommand(redisClient
*c
);
751 /*================================= Globals ================================= */
754 static struct redisServer server
; /* server global state */
755 static struct redisCommand cmdTable
[] = {
756 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
757 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
758 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
759 {"setex",setexCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
760 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
761 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
762 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
763 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
764 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
765 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
766 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
767 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
768 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
769 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
770 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
771 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
772 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
773 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
774 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
775 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
776 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
777 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
778 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
779 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
780 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
781 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
782 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
783 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
784 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
785 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
786 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
787 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
788 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
789 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
790 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
791 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
792 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
793 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
794 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
795 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
796 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
797 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
798 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
799 {"zunionstore",zunionstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
800 {"zinterstore",zinterstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
801 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
802 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
803 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
804 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
805 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
806 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
807 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
808 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
809 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
810 {"hsetnx",hsetnxCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
811 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
812 {"hmset",hmsetCommand
,-4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
813 {"hmget",hmgetCommand
,-3,REDIS_CMD_BULK
,NULL
,1,1,1},
814 {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
815 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
816 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
817 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
818 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
819 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
820 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
821 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
822 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
823 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
824 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
825 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
826 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
827 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
828 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
829 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
830 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
831 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
832 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
833 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
834 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
835 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
836 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
837 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
838 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
839 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
840 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
841 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
842 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
843 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
844 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
845 {"exec",execCommand
,1,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,execBlockClientOnSwappedKeys
,0,0,0},
846 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
847 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
848 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
849 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
850 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
851 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
852 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
853 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
854 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
855 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
856 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
857 {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
858 {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
859 {"psubscribe",psubscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
860 {"punsubscribe",punsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
861 {"publish",publishCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_FORCE_REPLICATION
,NULL
,0,0,0},
862 {"watch",watchCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
863 {"unwatch",unwatchCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
864 {NULL
,NULL
,0,0,NULL
,0,0,0}
867 /*============================ Utility functions ============================ */
869 /* Glob-style pattern matching. */
870 static int stringmatchlen(const char *pattern
, int patternLen
,
871 const char *string
, int stringLen
, int nocase
)
876 while (pattern
[1] == '*') {
881 return 1; /* match */
883 if (stringmatchlen(pattern
+1, patternLen
-1,
884 string
, stringLen
, nocase
))
885 return 1; /* match */
889 return 0; /* no match */
893 return 0; /* no match */
903 not = pattern
[0] == '^';
910 if (pattern
[0] == '\\') {
913 if (pattern
[0] == string
[0])
915 } else if (pattern
[0] == ']') {
917 } else if (patternLen
== 0) {
921 } else if (pattern
[1] == '-' && patternLen
>= 3) {
922 int start
= pattern
[0];
923 int end
= pattern
[2];
931 start
= tolower(start
);
937 if (c
>= start
&& c
<= end
)
941 if (pattern
[0] == string
[0])
944 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
954 return 0; /* no match */
960 if (patternLen
>= 2) {
967 if (pattern
[0] != string
[0])
968 return 0; /* no match */
970 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
971 return 0; /* no match */
979 if (stringLen
== 0) {
980 while(*pattern
== '*') {
987 if (patternLen
== 0 && stringLen
== 0)
992 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
993 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
996 /* Convert a string representing an amount of memory into the number of
997 * bytes, so for instance memtoll("1Gi") will return 1073741824 that is
1000 * On parsing error, if *err is not NULL, it's set to 1, otherwise it's
1002 static long long memtoll(const char *p
, int *err
) {
1005 long mul
; /* unit multiplier */
1007 unsigned int digits
;
1010 /* Search the first non digit character. */
1013 while(*u
&& isdigit(*u
)) u
++;
1014 if (*u
== '\0' || !strcasecmp(u
,"b")) {
1016 } else if (!strcasecmp(u
,"k")) {
1018 } else if (!strcasecmp(u
,"kb")) {
1020 } else if (!strcasecmp(u
,"m")) {
1022 } else if (!strcasecmp(u
,"mb")) {
1024 } else if (!strcasecmp(u
,"g")) {
1025 mul
= 1000L*1000*1000;
1026 } else if (!strcasecmp(u
,"gb")) {
1027 mul
= 1024L*1024*1024;
1033 if (digits
>= sizeof(buf
)) {
1037 memcpy(buf
,p
,digits
);
1039 val
= strtoll(buf
,NULL
,10);
1043 /* Convert a long long into a string. Returns the number of
1044 * characters needed to represent the number, that can be shorter if passed
1045 * buffer length is not enough to store the whole number. */
1046 static int ll2string(char *s
, size_t len
, long long value
) {
1048 unsigned long long v
;
1051 if (len
== 0) return 0;
1052 v
= (value
< 0) ? -value
: value
;
1053 p
= buf
+31; /* point to the last character */
1058 if (value
< 0) *p
-- = '-';
1061 if (l
+1 > len
) l
= len
-1; /* Make sure it fits, including the nul term */
1067 static void redisLog(int level
, const char *fmt
, ...) {
1071 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
1075 if (level
>= server
.verbosity
) {
1081 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
1082 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
1083 vfprintf(fp
, fmt
, ap
);
1089 if (server
.logfile
) fclose(fp
);
1092 /*====================== Hash table type implementation ==================== */
1094 /* This is an hash table type that uses the SDS dynamic strings libary as
1095 * keys and radis objects as values (objects can hold SDS strings,
1098 static void dictVanillaFree(void *privdata
, void *val
)
1100 DICT_NOTUSED(privdata
);
1104 static void dictListDestructor(void *privdata
, void *val
)
1106 DICT_NOTUSED(privdata
);
1107 listRelease((list
*)val
);
1110 static int sdsDictKeyCompare(void *privdata
, const void *key1
,
1114 DICT_NOTUSED(privdata
);
1116 l1
= sdslen((sds
)key1
);
1117 l2
= sdslen((sds
)key2
);
1118 if (l1
!= l2
) return 0;
1119 return memcmp(key1
, key2
, l1
) == 0;
1122 static void dictRedisObjectDestructor(void *privdata
, void *val
)
1124 DICT_NOTUSED(privdata
);
1126 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
1130 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1133 const robj
*o1
= key1
, *o2
= key2
;
1134 return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1137 static unsigned int dictObjHash(const void *key
) {
1138 const robj
*o
= key
;
1139 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1142 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1145 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1148 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1149 o2
->encoding
== REDIS_ENCODING_INT
)
1150 return o1
->ptr
== o2
->ptr
;
1152 o1
= getDecodedObject(o1
);
1153 o2
= getDecodedObject(o2
);
1154 cmp
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1160 static unsigned int dictEncObjHash(const void *key
) {
1161 robj
*o
= (robj
*) key
;
1163 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1164 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1166 if (o
->encoding
== REDIS_ENCODING_INT
) {
1170 len
= ll2string(buf
,32,(long)o
->ptr
);
1171 return dictGenHashFunction((unsigned char*)buf
, len
);
1175 o
= getDecodedObject(o
);
1176 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1183 /* Sets type and expires */
1184 static dictType setDictType
= {
1185 dictEncObjHash
, /* hash function */
1188 dictEncObjKeyCompare
, /* key compare */
1189 dictRedisObjectDestructor
, /* key destructor */
1190 NULL
/* val destructor */
1193 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1194 static dictType zsetDictType
= {
1195 dictEncObjHash
, /* hash function */
1198 dictEncObjKeyCompare
, /* key compare */
1199 dictRedisObjectDestructor
, /* key destructor */
1200 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1204 static dictType dbDictType
= {
1205 dictObjHash
, /* hash function */
1208 dictObjKeyCompare
, /* key compare */
1209 dictRedisObjectDestructor
, /* key destructor */
1210 dictRedisObjectDestructor
/* val destructor */
1214 static dictType keyptrDictType
= {
1215 dictObjHash
, /* hash function */
1218 dictObjKeyCompare
, /* key compare */
1219 dictRedisObjectDestructor
, /* key destructor */
1220 NULL
/* val destructor */
1223 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1224 static dictType hashDictType
= {
1225 dictEncObjHash
, /* hash function */
1228 dictEncObjKeyCompare
, /* key compare */
1229 dictRedisObjectDestructor
, /* key destructor */
1230 dictRedisObjectDestructor
/* val destructor */
1233 /* Keylist hash table type has unencoded redis objects as keys and
1234 * lists as values. It's used for blocking operations (BLPOP) and to
1235 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1236 static dictType keylistDictType
= {
1237 dictObjHash
, /* hash function */
1240 dictObjKeyCompare
, /* key compare */
1241 dictRedisObjectDestructor
, /* key destructor */
1242 dictListDestructor
/* val destructor */
1245 static void version();
1247 /* ========================= Random utility functions ======================= */
1249 /* Redis generally does not try to recover from out of memory conditions
1250 * when allocating objects or strings, it is not clear if it will be possible
1251 * to report this condition to the client since the networking layer itself
1252 * is based on heap allocation for send buffers, so we simply abort.
1253 * At least the code will be simpler to read... */
1254 static void oom(const char *msg
) {
1255 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1260 /* ====================== Redis server networking stuff ===================== */
1261 static void closeTimedoutClients(void) {
1264 time_t now
= time(NULL
);
1267 listRewind(server
.clients
,&li
);
1268 while ((ln
= listNext(&li
)) != NULL
) {
1269 c
= listNodeValue(ln
);
1270 if (server
.maxidletime
&&
1271 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1272 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1273 dictSize(c
->pubsub_channels
) == 0 && /* no timeout for pubsub */
1274 listLength(c
->pubsub_patterns
) == 0 &&
1275 (now
- c
->lastinteraction
> server
.maxidletime
))
1277 redisLog(REDIS_VERBOSE
,"Closing idle client");
1279 } else if (c
->flags
& REDIS_BLOCKED
) {
1280 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1281 addReply(c
,shared
.nullmultibulk
);
1282 unblockClientWaitingData(c
);
1288 static int htNeedsResize(dict
*dict
) {
1289 long long size
, used
;
1291 size
= dictSlots(dict
);
1292 used
= dictSize(dict
);
1293 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1294 (used
*100/size
< REDIS_HT_MINFILL
));
1297 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1298 * we resize the hash table to save memory */
1299 static void tryResizeHashTables(void) {
1302 for (j
= 0; j
< server
.dbnum
; j
++) {
1303 if (htNeedsResize(server
.db
[j
].dict
))
1304 dictResize(server
.db
[j
].dict
);
1305 if (htNeedsResize(server
.db
[j
].expires
))
1306 dictResize(server
.db
[j
].expires
);
1310 /* Our hash table implementation performs rehashing incrementally while
1311 * we write/read from the hash table. Still if the server is idle, the hash
1312 * table will use two tables for a long time. So we try to use 1 millisecond
1313 * of CPU time at every serverCron() loop in order to rehash some key. */
1314 static void incrementallyRehash(void) {
1317 for (j
= 0; j
< server
.dbnum
; j
++) {
1318 if (dictIsRehashing(server
.db
[j
].dict
)) {
1319 dictRehashMilliseconds(server
.db
[j
].dict
,1);
1320 break; /* already used our millisecond for this loop... */
1325 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1326 void backgroundSaveDoneHandler(int statloc
) {
1327 int exitcode
= WEXITSTATUS(statloc
);
1328 int bysignal
= WIFSIGNALED(statloc
);
1330 if (!bysignal
&& exitcode
== 0) {
1331 redisLog(REDIS_NOTICE
,
1332 "Background saving terminated with success");
1334 server
.lastsave
= time(NULL
);
1335 } else if (!bysignal
&& exitcode
!= 0) {
1336 redisLog(REDIS_WARNING
, "Background saving error");
1338 redisLog(REDIS_WARNING
,
1339 "Background saving terminated by signal %d", WTERMSIG(statloc
));
1340 rdbRemoveTempFile(server
.bgsavechildpid
);
1342 server
.bgsavechildpid
= -1;
1343 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1344 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1345 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1348 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1350 void backgroundRewriteDoneHandler(int statloc
) {
1351 int exitcode
= WEXITSTATUS(statloc
);
1352 int bysignal
= WIFSIGNALED(statloc
);
1354 if (!bysignal
&& exitcode
== 0) {
1358 redisLog(REDIS_NOTICE
,
1359 "Background append only file rewriting terminated with success");
1360 /* Now it's time to flush the differences accumulated by the parent */
1361 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1362 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1364 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1367 /* Flush our data... */
1368 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1369 (signed) sdslen(server
.bgrewritebuf
)) {
1370 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1374 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1375 /* Now our work is to rename the temp file into the stable file. And
1376 * switch the file descriptor used by the server for append only. */
1377 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1378 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1382 /* Mission completed... almost */
1383 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1384 if (server
.appendfd
!= -1) {
1385 /* If append only is actually enabled... */
1386 close(server
.appendfd
);
1387 server
.appendfd
= fd
;
1389 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1390 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1392 /* If append only is disabled we just generate a dump in this
1393 * format. Why not? */
1396 } else if (!bysignal
&& exitcode
!= 0) {
1397 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1399 redisLog(REDIS_WARNING
,
1400 "Background append only file rewriting terminated by signal %d",
1404 sdsfree(server
.bgrewritebuf
);
1405 server
.bgrewritebuf
= sdsempty();
1406 aofRemoveTempFile(server
.bgrewritechildpid
);
1407 server
.bgrewritechildpid
= -1;
1410 /* This function is called once a background process of some kind terminates,
1411 * as we want to avoid resizing the hash tables when there is a child in order
1412 * to play well with copy-on-write (otherwise when a resize happens lots of
1413 * memory pages are copied). The goal of this function is to update the ability
1414 * for dict.c to resize the hash tables accordingly to the fact we have o not
1415 * running childs. */
1416 static void updateDictResizePolicy(void) {
1417 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1)
1420 dictDisableResize();
1423 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1424 int j
, loops
= server
.cronloops
++;
1425 REDIS_NOTUSED(eventLoop
);
1427 REDIS_NOTUSED(clientData
);
1429 /* We take a cached value of the unix time in the global state because
1430 * with virtual memory and aging there is to store the current time
1431 * in objects at every object access, and accuracy is not needed.
1432 * To access a global var is faster than calling time(NULL) */
1433 server
.unixtime
= time(NULL
);
1435 /* We received a SIGTERM, shutting down here in a safe way, as it is
1436 * not ok doing so inside the signal handler. */
1437 if (server
.shutdown_asap
) {
1438 if (prepareForShutdown() == REDIS_OK
) exit(0);
1439 redisLog(REDIS_WARNING
,"SIGTERM received but errors trying to shut down the server, check the logs for more information");
1442 /* Show some info about non-empty databases */
1443 for (j
= 0; j
< server
.dbnum
; j
++) {
1444 long long size
, used
, vkeys
;
1446 size
= dictSlots(server
.db
[j
].dict
);
1447 used
= dictSize(server
.db
[j
].dict
);
1448 vkeys
= dictSize(server
.db
[j
].expires
);
1449 if (!(loops
% 50) && (used
|| vkeys
)) {
1450 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1451 /* dictPrintStats(server.dict); */
1455 /* We don't want to resize the hash tables while a bacground saving
1456 * is in progress: the saving child is created using fork() that is
1457 * implemented with a copy-on-write semantic in most modern systems, so
1458 * if we resize the HT while there is the saving child at work actually
1459 * a lot of memory movements in the parent will cause a lot of pages
1461 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1) {
1462 if (!(loops
% 10)) tryResizeHashTables();
1463 if (server
.activerehashing
) incrementallyRehash();
1466 /* Show information about connected clients */
1467 if (!(loops
% 50)) {
1468 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use",
1469 listLength(server
.clients
)-listLength(server
.slaves
),
1470 listLength(server
.slaves
),
1471 zmalloc_used_memory());
1474 /* Close connections of timedout clients */
1475 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1476 closeTimedoutClients();
1478 /* Check if a background saving or AOF rewrite in progress terminated */
1479 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1483 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1484 if (pid
== server
.bgsavechildpid
) {
1485 backgroundSaveDoneHandler(statloc
);
1487 backgroundRewriteDoneHandler(statloc
);
1489 updateDictResizePolicy();
1492 /* If there is not a background saving in progress check if
1493 * we have to save now */
1494 time_t now
= time(NULL
);
1495 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1496 struct saveparam
*sp
= server
.saveparams
+j
;
1498 if (server
.dirty
>= sp
->changes
&&
1499 now
-server
.lastsave
> sp
->seconds
) {
1500 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1501 sp
->changes
, sp
->seconds
);
1502 rdbSaveBackground(server
.dbfilename
);
1508 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1509 * will use few CPU cycles if there are few expiring keys, otherwise
1510 * it will get more aggressive to avoid that too much memory is used by
1511 * keys that can be removed from the keyspace. */
1512 for (j
= 0; j
< server
.dbnum
; j
++) {
1514 redisDb
*db
= server
.db
+j
;
1516 /* Continue to expire if at the end of the cycle more than 25%
1517 * of the keys were expired. */
1519 long num
= dictSize(db
->expires
);
1520 time_t now
= time(NULL
);
1523 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1524 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1529 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1530 t
= (time_t) dictGetEntryVal(de
);
1532 deleteKey(db
,dictGetEntryKey(de
));
1534 server
.stat_expiredkeys
++;
1537 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1540 /* Swap a few keys on disk if we are over the memory limit and VM
1541 * is enbled. Try to free objects from the free list first. */
1542 if (vmCanSwapOut()) {
1543 while (server
.vm_enabled
&& zmalloc_used_memory() >
1544 server
.vm_max_memory
)
1548 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1549 retval
= (server
.vm_max_threads
== 0) ?
1550 vmSwapOneObjectBlocking() :
1551 vmSwapOneObjectThreaded();
1552 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1553 zmalloc_used_memory() >
1554 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1556 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1558 /* Note that when using threade I/O we free just one object,
1559 * because anyway when the I/O thread in charge to swap this
1560 * object out will finish, the handler of completed jobs
1561 * will try to swap more objects if we are still out of memory. */
1562 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1566 /* Check if we should connect to a MASTER */
1567 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1568 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1569 if (syncWithMaster() == REDIS_OK
) {
1570 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1571 if (server
.appendonly
) rewriteAppendOnlyFileBackground();
1577 /* This function gets called every time Redis is entering the
1578 * main loop of the event driven library, that is, before to sleep
1579 * for ready file descriptors. */
1580 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1581 REDIS_NOTUSED(eventLoop
);
1583 /* Awake clients that got all the swapped keys they requested */
1584 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1588 listRewind(server
.io_ready_clients
,&li
);
1589 while((ln
= listNext(&li
))) {
1590 redisClient
*c
= ln
->value
;
1591 struct redisCommand
*cmd
;
1593 /* Resume the client. */
1594 listDelNode(server
.io_ready_clients
,ln
);
1595 c
->flags
&= (~REDIS_IO_WAIT
);
1596 server
.vm_blocked_clients
--;
1597 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1598 readQueryFromClient
, c
);
1599 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1600 assert(cmd
!= NULL
);
1603 /* There may be more data to process in the input buffer. */
1604 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1605 processInputBuffer(c
);
1608 /* Write the AOF buffer on disk */
1609 flushAppendOnlyFile();
1612 static void createSharedObjects(void) {
1615 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1616 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1617 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1618 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1619 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1620 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1621 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1622 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1623 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1624 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1625 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1626 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1627 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1628 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1629 "-ERR no such key\r\n"));
1630 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1631 "-ERR syntax error\r\n"));
1632 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1633 "-ERR source and destination objects are the same\r\n"));
1634 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1635 "-ERR index out of range\r\n"));
1636 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1637 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1638 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1639 shared
.select0
= createStringObject("select 0\r\n",10);
1640 shared
.select1
= createStringObject("select 1\r\n",10);
1641 shared
.select2
= createStringObject("select 2\r\n",10);
1642 shared
.select3
= createStringObject("select 3\r\n",10);
1643 shared
.select4
= createStringObject("select 4\r\n",10);
1644 shared
.select5
= createStringObject("select 5\r\n",10);
1645 shared
.select6
= createStringObject("select 6\r\n",10);
1646 shared
.select7
= createStringObject("select 7\r\n",10);
1647 shared
.select8
= createStringObject("select 8\r\n",10);
1648 shared
.select9
= createStringObject("select 9\r\n",10);
1649 shared
.messagebulk
= createStringObject("$7\r\nmessage\r\n",13);
1650 shared
.pmessagebulk
= createStringObject("$8\r\npmessage\r\n",14);
1651 shared
.subscribebulk
= createStringObject("$9\r\nsubscribe\r\n",15);
1652 shared
.unsubscribebulk
= createStringObject("$11\r\nunsubscribe\r\n",18);
1653 shared
.psubscribebulk
= createStringObject("$10\r\npsubscribe\r\n",17);
1654 shared
.punsubscribebulk
= createStringObject("$12\r\npunsubscribe\r\n",19);
1655 shared
.mbulk3
= createStringObject("*3\r\n",4);
1656 shared
.mbulk4
= createStringObject("*4\r\n",4);
1657 for (j
= 0; j
< REDIS_SHARED_INTEGERS
; j
++) {
1658 shared
.integers
[j
] = createObject(REDIS_STRING
,(void*)(long)j
);
1659 shared
.integers
[j
]->encoding
= REDIS_ENCODING_INT
;
1663 static void appendServerSaveParams(time_t seconds
, int changes
) {
1664 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1665 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1666 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1667 server
.saveparamslen
++;
1670 static void resetServerSaveParams() {
1671 zfree(server
.saveparams
);
1672 server
.saveparams
= NULL
;
1673 server
.saveparamslen
= 0;
1676 static void initServerConfig() {
1677 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1678 server
.port
= REDIS_SERVERPORT
;
1679 server
.verbosity
= REDIS_VERBOSE
;
1680 server
.maxidletime
= REDIS_MAXIDLETIME
;
1681 server
.saveparams
= NULL
;
1682 server
.logfile
= NULL
; /* NULL = log on standard output */
1683 server
.bindaddr
= NULL
;
1684 server
.glueoutputbuf
= 1;
1685 server
.daemonize
= 0;
1686 server
.appendonly
= 0;
1687 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1688 server
.lastfsync
= time(NULL
);
1689 server
.appendfd
= -1;
1690 server
.appendseldb
= -1; /* Make sure the first time will not match */
1691 server
.pidfile
= zstrdup("/var/run/redis.pid");
1692 server
.dbfilename
= zstrdup("dump.rdb");
1693 server
.appendfilename
= zstrdup("appendonly.aof");
1694 server
.requirepass
= NULL
;
1695 server
.rdbcompression
= 1;
1696 server
.activerehashing
= 1;
1697 server
.maxclients
= 0;
1698 server
.blpop_blocked_clients
= 0;
1699 server
.maxmemory
= 0;
1700 server
.vm_enabled
= 0;
1701 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1702 server
.vm_page_size
= 256; /* 256 bytes per page */
1703 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1704 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1705 server
.vm_max_threads
= 4;
1706 server
.vm_blocked_clients
= 0;
1707 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1708 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1709 server
.shutdown_asap
= 0;
1711 resetServerSaveParams();
1713 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1714 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1715 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1716 /* Replication related */
1718 server
.masterauth
= NULL
;
1719 server
.masterhost
= NULL
;
1720 server
.masterport
= 6379;
1721 server
.master
= NULL
;
1722 server
.replstate
= REDIS_REPL_NONE
;
1724 /* Double constants initialization */
1726 R_PosInf
= 1.0/R_Zero
;
1727 R_NegInf
= -1.0/R_Zero
;
1728 R_Nan
= R_Zero
/R_Zero
;
1731 static void initServer() {
1734 signal(SIGHUP
, SIG_IGN
);
1735 signal(SIGPIPE
, SIG_IGN
);
1736 setupSigSegvAction();
1738 server
.devnull
= fopen("/dev/null","w");
1739 if (server
.devnull
== NULL
) {
1740 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1743 server
.clients
= listCreate();
1744 server
.slaves
= listCreate();
1745 server
.monitors
= listCreate();
1746 server
.objfreelist
= listCreate();
1747 createSharedObjects();
1748 server
.el
= aeCreateEventLoop();
1749 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1750 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1751 if (server
.fd
== -1) {
1752 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1755 for (j
= 0; j
< server
.dbnum
; j
++) {
1756 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1757 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1758 server
.db
[j
].blocking_keys
= dictCreate(&keylistDictType
,NULL
);
1759 server
.db
[j
].watched_keys
= dictCreate(&keylistDictType
,NULL
);
1760 if (server
.vm_enabled
)
1761 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1762 server
.db
[j
].id
= j
;
1764 server
.pubsub_channels
= dictCreate(&keylistDictType
,NULL
);
1765 server
.pubsub_patterns
= listCreate();
1766 listSetFreeMethod(server
.pubsub_patterns
,freePubsubPattern
);
1767 listSetMatchMethod(server
.pubsub_patterns
,listMatchPubsubPattern
);
1768 server
.cronloops
= 0;
1769 server
.bgsavechildpid
= -1;
1770 server
.bgrewritechildpid
= -1;
1771 server
.bgrewritebuf
= sdsempty();
1772 server
.aofbuf
= sdsempty();
1773 server
.lastsave
= time(NULL
);
1775 server
.stat_numcommands
= 0;
1776 server
.stat_numconnections
= 0;
1777 server
.stat_expiredkeys
= 0;
1778 server
.stat_starttime
= time(NULL
);
1779 server
.unixtime
= time(NULL
);
1780 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1781 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1782 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1784 if (server
.appendonly
) {
1785 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1786 if (server
.appendfd
== -1) {
1787 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1793 if (server
.vm_enabled
) vmInit();
1796 /* Empty the whole database */
1797 static long long emptyDb() {
1799 long long removed
= 0;
1801 for (j
= 0; j
< server
.dbnum
; j
++) {
1802 removed
+= dictSize(server
.db
[j
].dict
);
1803 dictEmpty(server
.db
[j
].dict
);
1804 dictEmpty(server
.db
[j
].expires
);
1809 static int yesnotoi(char *s
) {
1810 if (!strcasecmp(s
,"yes")) return 1;
1811 else if (!strcasecmp(s
,"no")) return 0;
1815 /* I agree, this is a very rudimental way to load a configuration...
1816 will improve later if the config gets more complex */
1817 static void loadServerConfig(char *filename
) {
1819 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1823 if (filename
[0] == '-' && filename
[1] == '\0')
1826 if ((fp
= fopen(filename
,"r")) == NULL
) {
1827 redisLog(REDIS_WARNING
, "Fatal error, can't open config file '%s'", filename
);
1832 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1838 line
= sdstrim(line
," \t\r\n");
1840 /* Skip comments and blank lines*/
1841 if (line
[0] == '#' || line
[0] == '\0') {
1846 /* Split into arguments */
1847 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1848 sdstolower(argv
[0]);
1850 /* Execute config directives */
1851 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1852 server
.maxidletime
= atoi(argv
[1]);
1853 if (server
.maxidletime
< 0) {
1854 err
= "Invalid timeout value"; goto loaderr
;
1856 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1857 server
.port
= atoi(argv
[1]);
1858 if (server
.port
< 1 || server
.port
> 65535) {
1859 err
= "Invalid port"; goto loaderr
;
1861 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1862 server
.bindaddr
= zstrdup(argv
[1]);
1863 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1864 int seconds
= atoi(argv
[1]);
1865 int changes
= atoi(argv
[2]);
1866 if (seconds
< 1 || changes
< 0) {
1867 err
= "Invalid save parameters"; goto loaderr
;
1869 appendServerSaveParams(seconds
,changes
);
1870 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1871 if (chdir(argv
[1]) == -1) {
1872 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1873 argv
[1], strerror(errno
));
1876 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1877 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1878 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1879 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1880 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1882 err
= "Invalid log level. Must be one of debug, notice, warning";
1885 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1888 server
.logfile
= zstrdup(argv
[1]);
1889 if (!strcasecmp(server
.logfile
,"stdout")) {
1890 zfree(server
.logfile
);
1891 server
.logfile
= NULL
;
1893 if (server
.logfile
) {
1894 /* Test if we are able to open the file. The server will not
1895 * be able to abort just for this problem later... */
1896 logfp
= fopen(server
.logfile
,"a");
1897 if (logfp
== NULL
) {
1898 err
= sdscatprintf(sdsempty(),
1899 "Can't open the log file: %s", strerror(errno
));
1904 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1905 server
.dbnum
= atoi(argv
[1]);
1906 if (server
.dbnum
< 1) {
1907 err
= "Invalid number of databases"; goto loaderr
;
1909 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1910 loadServerConfig(argv
[1]);
1911 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1912 server
.maxclients
= atoi(argv
[1]);
1913 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1914 server
.maxmemory
= memtoll(argv
[1],NULL
);
1915 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1916 server
.masterhost
= sdsnew(argv
[1]);
1917 server
.masterport
= atoi(argv
[2]);
1918 server
.replstate
= REDIS_REPL_CONNECT
;
1919 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1920 server
.masterauth
= zstrdup(argv
[1]);
1921 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1922 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1923 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1925 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1926 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1927 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1929 } else if (!strcasecmp(argv
[0],"activerehashing") && argc
== 2) {
1930 if ((server
.activerehashing
= yesnotoi(argv
[1])) == -1) {
1931 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1933 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1934 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1935 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1937 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1938 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1939 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1941 } else if (!strcasecmp(argv
[0],"appendfilename") && argc
== 2) {
1942 zfree(server
.appendfilename
);
1943 server
.appendfilename
= zstrdup(argv
[1]);
1944 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
1945 if (!strcasecmp(argv
[1],"no")) {
1946 server
.appendfsync
= APPENDFSYNC_NO
;
1947 } else if (!strcasecmp(argv
[1],"always")) {
1948 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1949 } else if (!strcasecmp(argv
[1],"everysec")) {
1950 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1952 err
= "argument must be 'no', 'always' or 'everysec'";
1955 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
1956 server
.requirepass
= zstrdup(argv
[1]);
1957 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
1958 zfree(server
.pidfile
);
1959 server
.pidfile
= zstrdup(argv
[1]);
1960 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
1961 zfree(server
.dbfilename
);
1962 server
.dbfilename
= zstrdup(argv
[1]);
1963 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
1964 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
1965 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1967 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
1968 zfree(server
.vm_swap_file
);
1969 server
.vm_swap_file
= zstrdup(argv
[1]);
1970 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
1971 server
.vm_max_memory
= memtoll(argv
[1],NULL
);
1972 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
1973 server
.vm_page_size
= memtoll(argv
[1], NULL
);
1974 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
1975 server
.vm_pages
= memtoll(argv
[1], NULL
);
1976 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1977 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1978 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
1979 server
.hash_max_zipmap_entries
= memtoll(argv
[1], NULL
);
1980 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
1981 server
.hash_max_zipmap_value
= memtoll(argv
[1], NULL
);
1983 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
1985 for (j
= 0; j
< argc
; j
++)
1990 if (fp
!= stdin
) fclose(fp
);
1994 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
1995 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
1996 fprintf(stderr
, ">>> '%s'\n", line
);
1997 fprintf(stderr
, "%s\n", err
);
2001 static void freeClientArgv(redisClient
*c
) {
2004 for (j
= 0; j
< c
->argc
; j
++)
2005 decrRefCount(c
->argv
[j
]);
2006 for (j
= 0; j
< c
->mbargc
; j
++)
2007 decrRefCount(c
->mbargv
[j
]);
2012 static void freeClient(redisClient
*c
) {
2015 /* Note that if the client we are freeing is blocked into a blocking
2016 * call, we have to set querybuf to NULL *before* to call
2017 * unblockClientWaitingData() to avoid processInputBuffer() will get
2018 * called. Also it is important to remove the file events after
2019 * this, because this call adds the READABLE event. */
2020 sdsfree(c
->querybuf
);
2022 if (c
->flags
& REDIS_BLOCKED
)
2023 unblockClientWaitingData(c
);
2025 /* UNWATCH all the keys */
2027 listRelease(c
->watched_keys
);
2028 /* Unsubscribe from all the pubsub channels */
2029 pubsubUnsubscribeAllChannels(c
,0);
2030 pubsubUnsubscribeAllPatterns(c
,0);
2031 dictRelease(c
->pubsub_channels
);
2032 listRelease(c
->pubsub_patterns
);
2033 /* Obvious cleanup */
2034 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
2035 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2036 listRelease(c
->reply
);
2039 /* Remove from the list of clients */
2040 ln
= listSearchKey(server
.clients
,c
);
2041 redisAssert(ln
!= NULL
);
2042 listDelNode(server
.clients
,ln
);
2043 /* Remove from the list of clients that are now ready to be restarted
2044 * after waiting for swapped keys */
2045 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
2046 ln
= listSearchKey(server
.io_ready_clients
,c
);
2048 listDelNode(server
.io_ready_clients
,ln
);
2049 server
.vm_blocked_clients
--;
2052 /* Remove from the list of clients waiting for swapped keys */
2053 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
2054 ln
= listFirst(c
->io_keys
);
2055 dontWaitForSwappedKey(c
,ln
->value
);
2057 listRelease(c
->io_keys
);
2058 /* Master/slave cleanup */
2059 if (c
->flags
& REDIS_SLAVE
) {
2060 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
2062 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
2063 ln
= listSearchKey(l
,c
);
2064 redisAssert(ln
!= NULL
);
2067 if (c
->flags
& REDIS_MASTER
) {
2068 server
.master
= NULL
;
2069 server
.replstate
= REDIS_REPL_CONNECT
;
2071 /* Release memory */
2074 freeClientMultiState(c
);
2078 #define GLUEREPLY_UP_TO (1024)
2079 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
2081 char buf
[GLUEREPLY_UP_TO
];
2086 listRewind(c
->reply
,&li
);
2087 while((ln
= listNext(&li
))) {
2091 objlen
= sdslen(o
->ptr
);
2092 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
2093 memcpy(buf
+copylen
,o
->ptr
,objlen
);
2095 listDelNode(c
->reply
,ln
);
2097 if (copylen
== 0) return;
2101 /* Now the output buffer is empty, add the new single element */
2102 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
2103 listAddNodeHead(c
->reply
,o
);
2106 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2107 redisClient
*c
= privdata
;
2108 int nwritten
= 0, totwritten
= 0, objlen
;
2111 REDIS_NOTUSED(mask
);
2113 /* Use writev() if we have enough buffers to send */
2114 if (!server
.glueoutputbuf
&&
2115 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
2116 !(c
->flags
& REDIS_MASTER
))
2118 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
2122 while(listLength(c
->reply
)) {
2123 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
2124 glueReplyBuffersIfNeeded(c
);
2126 o
= listNodeValue(listFirst(c
->reply
));
2127 objlen
= sdslen(o
->ptr
);
2130 listDelNode(c
->reply
,listFirst(c
->reply
));
2134 if (c
->flags
& REDIS_MASTER
) {
2135 /* Don't reply to a master */
2136 nwritten
= objlen
- c
->sentlen
;
2138 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
2139 if (nwritten
<= 0) break;
2141 c
->sentlen
+= nwritten
;
2142 totwritten
+= nwritten
;
2143 /* If we fully sent the object on head go to the next one */
2144 if (c
->sentlen
== objlen
) {
2145 listDelNode(c
->reply
,listFirst(c
->reply
));
2148 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
2149 * bytes, in a single threaded server it's a good idea to serve
2150 * other clients as well, even if a very large request comes from
2151 * super fast link that is always able to accept data (in real world
2152 * scenario think about 'KEYS *' against the loopback interfae) */
2153 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
2155 if (nwritten
== -1) {
2156 if (errno
== EAGAIN
) {
2159 redisLog(REDIS_VERBOSE
,
2160 "Error writing to client: %s", strerror(errno
));
2165 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
2166 if (listLength(c
->reply
) == 0) {
2168 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2172 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
2174 redisClient
*c
= privdata
;
2175 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
2177 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
2178 int offset
, ion
= 0;
2180 REDIS_NOTUSED(mask
);
2183 while (listLength(c
->reply
)) {
2184 offset
= c
->sentlen
;
2188 /* fill-in the iov[] array */
2189 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
2190 o
= listNodeValue(node
);
2191 objlen
= sdslen(o
->ptr
);
2193 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
2196 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2197 break; /* no more iovecs */
2199 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2200 iov
[ion
].iov_len
= objlen
- offset
;
2201 willwrite
+= objlen
- offset
;
2202 offset
= 0; /* just for the first item */
2209 /* write all collected blocks at once */
2210 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2211 if (errno
!= EAGAIN
) {
2212 redisLog(REDIS_VERBOSE
,
2213 "Error writing to client: %s", strerror(errno
));
2220 totwritten
+= nwritten
;
2221 offset
= c
->sentlen
;
2223 /* remove written robjs from c->reply */
2224 while (nwritten
&& listLength(c
->reply
)) {
2225 o
= listNodeValue(listFirst(c
->reply
));
2226 objlen
= sdslen(o
->ptr
);
2228 if(nwritten
>= objlen
- offset
) {
2229 listDelNode(c
->reply
, listFirst(c
->reply
));
2230 nwritten
-= objlen
- offset
;
2234 c
->sentlen
+= nwritten
;
2242 c
->lastinteraction
= time(NULL
);
2244 if (listLength(c
->reply
) == 0) {
2246 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2250 static struct redisCommand
*lookupCommand(char *name
) {
2252 while(cmdTable
[j
].name
!= NULL
) {
2253 if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
];
2259 /* resetClient prepare the client to process the next command */
2260 static void resetClient(redisClient
*c
) {
2266 /* Call() is the core of Redis execution of a command */
2267 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2270 dirty
= server
.dirty
;
2272 dirty
= server
.dirty
-dirty
;
2274 if (server
.appendonly
&& dirty
)
2275 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2276 if ((dirty
|| cmd
->flags
& REDIS_CMD_FORCE_REPLICATION
) &&
2277 listLength(server
.slaves
))
2278 replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
);
2279 if (listLength(server
.monitors
))
2280 replicationFeedMonitors(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
);
2281 server
.stat_numcommands
++;
2284 /* If this function gets called we already read a whole
2285 * command, argments are in the client argv/argc fields.
2286 * processCommand() execute the command or prepare the
2287 * server for a bulk read from the client.
2289 * If 1 is returned the client is still alive and valid and
2290 * and other operations can be performed by the caller. Otherwise
2291 * if 0 is returned the client was destroied (i.e. after QUIT). */
2292 static int processCommand(redisClient
*c
) {
2293 struct redisCommand
*cmd
;
2295 /* Free some memory if needed (maxmemory setting) */
2296 if (server
.maxmemory
) freeMemoryIfNeeded();
2298 /* Handle the multi bulk command type. This is an alternative protocol
2299 * supported by Redis in order to receive commands that are composed of
2300 * multiple binary-safe "bulk" arguments. The latency of processing is
2301 * a bit higher but this allows things like multi-sets, so if this
2302 * protocol is used only for MSET and similar commands this is a big win. */
2303 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2304 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2305 if (c
->multibulk
<= 0) {
2309 decrRefCount(c
->argv
[c
->argc
-1]);
2313 } else if (c
->multibulk
) {
2314 if (c
->bulklen
== -1) {
2315 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2316 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2320 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2321 decrRefCount(c
->argv
[0]);
2322 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2324 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2329 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2333 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2334 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2338 if (c
->multibulk
== 0) {
2342 /* Here we need to swap the multi-bulk argc/argv with the
2343 * normal argc/argv of the client structure. */
2345 c
->argv
= c
->mbargv
;
2346 c
->mbargv
= auxargv
;
2349 c
->argc
= c
->mbargc
;
2350 c
->mbargc
= auxargc
;
2352 /* We need to set bulklen to something different than -1
2353 * in order for the code below to process the command without
2354 * to try to read the last argument of a bulk command as
2355 * a special argument. */
2357 /* continue below and process the command */
2364 /* -- end of multi bulk commands processing -- */
2366 /* The QUIT command is handled as a special case. Normal command
2367 * procs are unable to close the client connection safely */
2368 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2373 /* Now lookup the command and check ASAP about trivial error conditions
2374 * such wrong arity, bad command name and so forth. */
2375 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2378 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2379 (char*)c
->argv
[0]->ptr
));
2382 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2383 (c
->argc
< -cmd
->arity
)) {
2385 sdscatprintf(sdsempty(),
2386 "-ERR wrong number of arguments for '%s' command\r\n",
2390 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2391 /* This is a bulk command, we have to read the last argument yet. */
2392 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2394 decrRefCount(c
->argv
[c
->argc
-1]);
2395 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2397 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2402 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2403 /* It is possible that the bulk read is already in the
2404 * buffer. Check this condition and handle it accordingly.
2405 * This is just a fast path, alternative to call processInputBuffer().
2406 * It's a good idea since the code is small and this condition
2407 * happens most of the times. */
2408 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2409 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2411 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2413 /* Otherwise return... there is to read the last argument
2414 * from the socket. */
2418 /* Let's try to encode the bulk object to save space. */
2419 if (cmd
->flags
& REDIS_CMD_BULK
)
2420 c
->argv
[c
->argc
-1] = tryObjectEncoding(c
->argv
[c
->argc
-1]);
2422 /* Check if the user is authenticated */
2423 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2424 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2429 /* Handle the maxmemory directive */
2430 if (server
.maxmemory
&& (cmd
->flags
& REDIS_CMD_DENYOOM
) &&
2431 zmalloc_used_memory() > server
.maxmemory
)
2433 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2438 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
2439 if ((dictSize(c
->pubsub_channels
) > 0 || listLength(c
->pubsub_patterns
) > 0)
2441 cmd
->proc
!= subscribeCommand
&& cmd
->proc
!= unsubscribeCommand
&&
2442 cmd
->proc
!= psubscribeCommand
&& cmd
->proc
!= punsubscribeCommand
) {
2443 addReplySds(c
,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n"));
2448 /* Exec the command */
2449 if (c
->flags
& REDIS_MULTI
&& cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
) {
2450 queueMultiCommand(c
,cmd
);
2451 addReply(c
,shared
.queued
);
2453 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2454 blockClientOnSwappedKeys(c
,cmd
)) return 1;
2458 /* Prepare the client for the next command */
2463 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
) {
2468 /* We need 1+(ARGS*3) objects since commands are using the new protocol
2469 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
2470 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
2471 robj
*static_outv
[REDIS_STATIC_ARGS
*3+1];
2474 if (argc
<= REDIS_STATIC_ARGS
) {
2477 outv
= zmalloc(sizeof(robj
*)*(argc
*3+1));
2480 lenobj
= createObject(REDIS_STRING
,
2481 sdscatprintf(sdsempty(), "*%d\r\n", argc
));
2482 lenobj
->refcount
= 0;
2483 outv
[outc
++] = lenobj
;
2484 for (j
= 0; j
< argc
; j
++) {
2485 lenobj
= createObject(REDIS_STRING
,
2486 sdscatprintf(sdsempty(),"$%lu\r\n",
2487 (unsigned long) stringObjectLen(argv
[j
])));
2488 lenobj
->refcount
= 0;
2489 outv
[outc
++] = lenobj
;
2490 outv
[outc
++] = argv
[j
];
2491 outv
[outc
++] = shared
.crlf
;
2494 /* Increment all the refcounts at start and decrement at end in order to
2495 * be sure to free objects if there is no slave in a replication state
2496 * able to be feed with commands */
2497 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2498 listRewind(slaves
,&li
);
2499 while((ln
= listNext(&li
))) {
2500 redisClient
*slave
= ln
->value
;
2502 /* Don't feed slaves that are still waiting for BGSAVE to start */
2503 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2505 /* Feed all the other slaves, MONITORs and so on */
2506 if (slave
->slaveseldb
!= dictid
) {
2510 case 0: selectcmd
= shared
.select0
; break;
2511 case 1: selectcmd
= shared
.select1
; break;
2512 case 2: selectcmd
= shared
.select2
; break;
2513 case 3: selectcmd
= shared
.select3
; break;
2514 case 4: selectcmd
= shared
.select4
; break;
2515 case 5: selectcmd
= shared
.select5
; break;
2516 case 6: selectcmd
= shared
.select6
; break;
2517 case 7: selectcmd
= shared
.select7
; break;
2518 case 8: selectcmd
= shared
.select8
; break;
2519 case 9: selectcmd
= shared
.select9
; break;
2521 selectcmd
= createObject(REDIS_STRING
,
2522 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2523 selectcmd
->refcount
= 0;
2526 addReply(slave
,selectcmd
);
2527 slave
->slaveseldb
= dictid
;
2529 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2531 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2532 if (outv
!= static_outv
) zfree(outv
);
2535 static sds
sdscatrepr(sds s
, char *p
, size_t len
) {
2536 s
= sdscatlen(s
,"\"",1);
2541 s
= sdscatprintf(s
,"\\%c",*p
);
2543 case '\n': s
= sdscatlen(s
,"\\n",1); break;
2544 case '\r': s
= sdscatlen(s
,"\\r",1); break;
2545 case '\t': s
= sdscatlen(s
,"\\t",1); break;
2546 case '\a': s
= sdscatlen(s
,"\\a",1); break;
2547 case '\b': s
= sdscatlen(s
,"\\b",1); break;
2550 s
= sdscatprintf(s
,"%c",*p
);
2552 s
= sdscatprintf(s
,"\\x%02x",(unsigned char)*p
);
2557 return sdscatlen(s
,"\"",1);
2560 static void replicationFeedMonitors(list
*monitors
, int dictid
, robj
**argv
, int argc
) {
2564 sds cmdrepr
= sdsnew("+");
2568 gettimeofday(&tv
,NULL
);
2569 cmdrepr
= sdscatprintf(cmdrepr
,"%ld.%ld ",(long)tv
.tv_sec
,(long)tv
.tv_usec
);
2570 if (dictid
!= 0) cmdrepr
= sdscatprintf(cmdrepr
,"(db %d) ", dictid
);
2572 for (j
= 0; j
< argc
; j
++) {
2573 if (argv
[j
]->encoding
== REDIS_ENCODING_INT
) {
2574 cmdrepr
= sdscatprintf(cmdrepr
, "%ld", (long)argv
[j
]->ptr
);
2576 cmdrepr
= sdscatrepr(cmdrepr
,(char*)argv
[j
]->ptr
,
2577 sdslen(argv
[j
]->ptr
));
2580 cmdrepr
= sdscatlen(cmdrepr
," ",1);
2582 cmdrepr
= sdscatlen(cmdrepr
,"\r\n",2);
2583 cmdobj
= createObject(REDIS_STRING
,cmdrepr
);
2585 listRewind(monitors
,&li
);
2586 while((ln
= listNext(&li
))) {
2587 redisClient
*monitor
= ln
->value
;
2588 addReply(monitor
,cmdobj
);
2590 decrRefCount(cmdobj
);
2593 static void processInputBuffer(redisClient
*c
) {
2595 /* Before to process the input buffer, make sure the client is not
2596 * waitig for a blocking operation such as BLPOP. Note that the first
2597 * iteration the client is never blocked, otherwise the processInputBuffer
2598 * would not be called at all, but after the execution of the first commands
2599 * in the input buffer the client may be blocked, and the "goto again"
2600 * will try to reiterate. The following line will make it return asap. */
2601 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2602 if (c
->bulklen
== -1) {
2603 /* Read the first line of the query */
2604 char *p
= strchr(c
->querybuf
,'\n');
2611 query
= c
->querybuf
;
2612 c
->querybuf
= sdsempty();
2613 querylen
= 1+(p
-(query
));
2614 if (sdslen(query
) > querylen
) {
2615 /* leave data after the first line of the query in the buffer */
2616 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2618 *p
= '\0'; /* remove "\n" */
2619 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2620 sdsupdatelen(query
);
2622 /* Now we can split the query in arguments */
2623 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2626 if (c
->argv
) zfree(c
->argv
);
2627 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2629 for (j
= 0; j
< argc
; j
++) {
2630 if (sdslen(argv
[j
])) {
2631 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2639 /* Execute the command. If the client is still valid
2640 * after processCommand() return and there is something
2641 * on the query buffer try to process the next command. */
2642 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2644 /* Nothing to process, argc == 0. Just process the query
2645 * buffer if it's not empty or return to the caller */
2646 if (sdslen(c
->querybuf
)) goto again
;
2649 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2650 redisLog(REDIS_VERBOSE
, "Client protocol error");
2655 /* Bulk read handling. Note that if we are at this point
2656 the client already sent a command terminated with a newline,
2657 we are reading the bulk data that is actually the last
2658 argument of the command. */
2659 int qbl
= sdslen(c
->querybuf
);
2661 if (c
->bulklen
<= qbl
) {
2662 /* Copy everything but the final CRLF as final argument */
2663 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2665 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2666 /* Process the command. If the client is still valid after
2667 * the processing and there is more data in the buffer
2668 * try to parse it. */
2669 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2675 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2676 redisClient
*c
= (redisClient
*) privdata
;
2677 char buf
[REDIS_IOBUF_LEN
];
2680 REDIS_NOTUSED(mask
);
2682 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2684 if (errno
== EAGAIN
) {
2687 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2691 } else if (nread
== 0) {
2692 redisLog(REDIS_VERBOSE
, "Client closed connection");
2697 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2698 c
->lastinteraction
= time(NULL
);
2702 processInputBuffer(c
);
2705 static int selectDb(redisClient
*c
, int id
) {
2706 if (id
< 0 || id
>= server
.dbnum
)
2708 c
->db
= &server
.db
[id
];
2712 static void *dupClientReplyValue(void *o
) {
2713 incrRefCount((robj
*)o
);
2717 static int listMatchObjects(void *a
, void *b
) {
2718 return equalStringObjects(a
,b
);
2721 static redisClient
*createClient(int fd
) {
2722 redisClient
*c
= zmalloc(sizeof(*c
));
2724 anetNonBlock(NULL
,fd
);
2725 anetTcpNoDelay(NULL
,fd
);
2726 if (!c
) return NULL
;
2729 c
->querybuf
= sdsempty();
2738 c
->lastinteraction
= time(NULL
);
2739 c
->authenticated
= 0;
2740 c
->replstate
= REDIS_REPL_NONE
;
2741 c
->reply
= listCreate();
2742 listSetFreeMethod(c
->reply
,decrRefCount
);
2743 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2744 c
->blocking_keys
= NULL
;
2745 c
->blocking_keys_num
= 0;
2746 c
->io_keys
= listCreate();
2747 c
->watched_keys
= listCreate();
2748 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2749 c
->pubsub_channels
= dictCreate(&setDictType
,NULL
);
2750 c
->pubsub_patterns
= listCreate();
2751 listSetFreeMethod(c
->pubsub_patterns
,decrRefCount
);
2752 listSetMatchMethod(c
->pubsub_patterns
,listMatchObjects
);
2753 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2754 readQueryFromClient
, c
) == AE_ERR
) {
2758 listAddNodeTail(server
.clients
,c
);
2759 initClientMultiState(c
);
2763 static void addReply(redisClient
*c
, robj
*obj
) {
2764 if (listLength(c
->reply
) == 0 &&
2765 (c
->replstate
== REDIS_REPL_NONE
||
2766 c
->replstate
== REDIS_REPL_ONLINE
) &&
2767 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2768 sendReplyToClient
, c
) == AE_ERR
) return;
2770 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2771 obj
= dupStringObject(obj
);
2772 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2774 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2777 static void addReplySds(redisClient
*c
, sds s
) {
2778 robj
*o
= createObject(REDIS_STRING
,s
);
2783 static void addReplyDouble(redisClient
*c
, double d
) {
2786 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2787 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2788 (unsigned long) strlen(buf
),buf
));
2791 static void addReplyLongLong(redisClient
*c
, long long ll
) {
2796 addReply(c
,shared
.czero
);
2798 } else if (ll
== 1) {
2799 addReply(c
,shared
.cone
);
2803 len
= ll2string(buf
+1,sizeof(buf
)-1,ll
);
2806 addReplySds(c
,sdsnewlen(buf
,len
+3));
2809 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2814 addReply(c
,shared
.czero
);
2816 } else if (ul
== 1) {
2817 addReply(c
,shared
.cone
);
2820 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2821 addReplySds(c
,sdsnewlen(buf
,len
));
2824 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2828 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2829 len
= sdslen(obj
->ptr
);
2831 long n
= (long)obj
->ptr
;
2833 /* Compute how many bytes will take this integer as a radix 10 string */
2839 while((n
= n
/10) != 0) {
2844 intlen
= ll2string(buf
+1,sizeof(buf
)-1,(long long)len
);
2845 buf
[intlen
+1] = '\r';
2846 buf
[intlen
+2] = '\n';
2847 addReplySds(c
,sdsnewlen(buf
,intlen
+3));
2850 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2851 addReplyBulkLen(c
,obj
);
2853 addReply(c
,shared
.crlf
);
2856 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2857 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2859 addReply(c
,shared
.nullbulk
);
2861 robj
*o
= createStringObject(s
,strlen(s
));
2867 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2872 REDIS_NOTUSED(mask
);
2873 REDIS_NOTUSED(privdata
);
2875 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2876 if (cfd
== AE_ERR
) {
2877 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2880 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2881 if ((c
= createClient(cfd
)) == NULL
) {
2882 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2883 close(cfd
); /* May be already closed, just ingore errors */
2886 /* If maxclient directive is set and this is one client more... close the
2887 * connection. Note that we create the client instead to check before
2888 * for this condition, since now the socket is already set in nonblocking
2889 * mode and we can send an error for free using the Kernel I/O */
2890 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2891 char *err
= "-ERR max number of clients reached\r\n";
2893 /* That's a best effort error message, don't check write errors */
2894 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2895 /* Nothing to do, Just to avoid the warning... */
2900 server
.stat_numconnections
++;
2903 /* ======================= Redis objects implementation ===================== */
2905 static robj
*createObject(int type
, void *ptr
) {
2908 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2909 if (listLength(server
.objfreelist
)) {
2910 listNode
*head
= listFirst(server
.objfreelist
);
2911 o
= listNodeValue(head
);
2912 listDelNode(server
.objfreelist
,head
);
2913 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2915 if (server
.vm_enabled
) {
2916 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2917 o
= zmalloc(sizeof(*o
));
2919 o
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
));
2923 o
->encoding
= REDIS_ENCODING_RAW
;
2926 if (server
.vm_enabled
) {
2927 /* Note that this code may run in the context of an I/O thread
2928 * and accessing to server.unixtime in theory is an error
2929 * (no locks). But in practice this is safe, and even if we read
2930 * garbage Redis will not fail, as it's just a statistical info */
2931 o
->vm
.atime
= server
.unixtime
;
2932 o
->storage
= REDIS_VM_MEMORY
;
2937 static robj
*createStringObject(char *ptr
, size_t len
) {
2938 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
2941 static robj
*createStringObjectFromLongLong(long long value
) {
2943 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
2944 incrRefCount(shared
.integers
[value
]);
2945 o
= shared
.integers
[value
];
2947 if (value
>= LONG_MIN
&& value
<= LONG_MAX
) {
2948 o
= createObject(REDIS_STRING
, NULL
);
2949 o
->encoding
= REDIS_ENCODING_INT
;
2950 o
->ptr
= (void*)((long)value
);
2952 o
= createObject(REDIS_STRING
,sdsfromlonglong(value
));
2958 static robj
*dupStringObject(robj
*o
) {
2959 assert(o
->encoding
== REDIS_ENCODING_RAW
);
2960 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
2963 static robj
*createListObject(void) {
2964 list
*l
= listCreate();
2966 listSetFreeMethod(l
,decrRefCount
);
2967 return createObject(REDIS_LIST
,l
);
2970 static robj
*createSetObject(void) {
2971 dict
*d
= dictCreate(&setDictType
,NULL
);
2972 return createObject(REDIS_SET
,d
);
2975 static robj
*createHashObject(void) {
2976 /* All the Hashes start as zipmaps. Will be automatically converted
2977 * into hash tables if there are enough elements or big elements
2979 unsigned char *zm
= zipmapNew();
2980 robj
*o
= createObject(REDIS_HASH
,zm
);
2981 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
2985 static robj
*createZsetObject(void) {
2986 zset
*zs
= zmalloc(sizeof(*zs
));
2988 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
2989 zs
->zsl
= zslCreate();
2990 return createObject(REDIS_ZSET
,zs
);
2993 static void freeStringObject(robj
*o
) {
2994 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2999 static void freeListObject(robj
*o
) {
3000 listRelease((list
*) o
->ptr
);
3003 static void freeSetObject(robj
*o
) {
3004 dictRelease((dict
*) o
->ptr
);
3007 static void freeZsetObject(robj
*o
) {
3010 dictRelease(zs
->dict
);
3015 static void freeHashObject(robj
*o
) {
3016 switch (o
->encoding
) {
3017 case REDIS_ENCODING_HT
:
3018 dictRelease((dict
*) o
->ptr
);
3020 case REDIS_ENCODING_ZIPMAP
:
3024 redisPanic("Unknown hash encoding type");
3029 static void incrRefCount(robj
*o
) {
3033 static void decrRefCount(void *obj
) {
3036 if (o
->refcount
<= 0) redisPanic("decrRefCount against refcount <= 0");
3037 /* Object is a key of a swapped out value, or in the process of being
3039 if (server
.vm_enabled
&&
3040 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
3042 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
);
3043 redisAssert(o
->type
== REDIS_STRING
);
3044 freeStringObject(o
);
3045 vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
);
3046 pthread_mutex_lock(&server
.obj_freelist_mutex
);
3047 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
3048 !listAddNodeHead(server
.objfreelist
,o
))
3050 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
3051 server
.vm_stats_swapped_objects
--;
3054 /* Object is in memory, or in the process of being swapped out. */
3055 if (--(o
->refcount
) == 0) {
3056 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
3057 vmCancelThreadedIOJob(obj
);
3059 case REDIS_STRING
: freeStringObject(o
); break;
3060 case REDIS_LIST
: freeListObject(o
); break;
3061 case REDIS_SET
: freeSetObject(o
); break;
3062 case REDIS_ZSET
: freeZsetObject(o
); break;
3063 case REDIS_HASH
: freeHashObject(o
); break;
3064 default: redisPanic("Unknown object type"); break;
3066 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
3067 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
3068 !listAddNodeHead(server
.objfreelist
,o
))
3070 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
3074 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
3075 dictEntry
*de
= dictFind(db
->dict
,key
);
3077 robj
*key
= dictGetEntryKey(de
);
3078 robj
*val
= dictGetEntryVal(de
);
3080 if (server
.vm_enabled
) {
3081 if (key
->storage
== REDIS_VM_MEMORY
||
3082 key
->storage
== REDIS_VM_SWAPPING
)
3084 /* If we were swapping the object out, stop it, this key
3086 if (key
->storage
== REDIS_VM_SWAPPING
)
3087 vmCancelThreadedIOJob(key
);
3088 /* Update the access time of the key for the aging algorithm. */
3089 key
->vm
.atime
= server
.unixtime
;
3091 int notify
= (key
->storage
== REDIS_VM_LOADING
);
3093 /* Our value was swapped on disk. Bring it at home. */
3094 redisAssert(val
== NULL
);
3095 val
= vmLoadObject(key
);
3096 dictGetEntryVal(de
) = val
;
3098 /* Clients blocked by the VM subsystem may be waiting for
3100 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
3109 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
3110 expireIfNeeded(db
,key
);
3111 return lookupKey(db
,key
);
3114 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
3115 deleteIfVolatile(db
,key
);
3116 touchWatchedKey(db
,key
);
3117 return lookupKey(db
,key
);
3120 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3121 robj
*o
= lookupKeyRead(c
->db
, key
);
3122 if (!o
) addReply(c
,reply
);
3126 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3127 robj
*o
= lookupKeyWrite(c
->db
, key
);
3128 if (!o
) addReply(c
,reply
);
3132 static int checkType(redisClient
*c
, robj
*o
, int type
) {
3133 if (o
->type
!= type
) {
3134 addReply(c
,shared
.wrongtypeerr
);
3140 static int deleteKey(redisDb
*db
, robj
*key
) {
3143 /* We need to protect key from destruction: after the first dictDelete()
3144 * it may happen that 'key' is no longer valid if we don't increment
3145 * it's count. This may happen when we get the object reference directly
3146 * from the hash table with dictRandomKey() or dict iterators */
3148 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
);
3149 retval
= dictDelete(db
->dict
,key
);
3152 return retval
== DICT_OK
;
3155 /* Check if the nul-terminated string 's' can be represented by a long
3156 * (that is, is a number that fits into long without any other space or
3157 * character before or after the digits).
3159 * If so, the function returns REDIS_OK and *longval is set to the value
3160 * of the number. Otherwise REDIS_ERR is returned */
3161 static int isStringRepresentableAsLong(sds s
, long *longval
) {
3162 char buf
[32], *endptr
;
3166 value
= strtol(s
, &endptr
, 10);
3167 if (endptr
[0] != '\0') return REDIS_ERR
;
3168 slen
= ll2string(buf
,32,value
);
3170 /* If the number converted back into a string is not identical
3171 * then it's not possible to encode the string as integer */
3172 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
3173 if (longval
) *longval
= value
;
3177 /* Try to encode a string object in order to save space */
3178 static robj
*tryObjectEncoding(robj
*o
) {
3182 if (o
->encoding
!= REDIS_ENCODING_RAW
)
3183 return o
; /* Already encoded */
3185 /* It's not safe to encode shared objects: shared objects can be shared
3186 * everywhere in the "object space" of Redis. Encoded objects can only
3187 * appear as "values" (and not, for instance, as keys) */
3188 if (o
->refcount
> 1) return o
;
3190 /* Currently we try to encode only strings */
3191 redisAssert(o
->type
== REDIS_STRING
);
3193 /* Check if we can represent this string as a long integer */
3194 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return o
;
3196 /* Ok, this object can be encoded */
3197 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
3199 incrRefCount(shared
.integers
[value
]);
3200 return shared
.integers
[value
];
3202 o
->encoding
= REDIS_ENCODING_INT
;
3204 o
->ptr
= (void*) value
;
3209 /* Get a decoded version of an encoded object (returned as a new object).
3210 * If the object is already raw-encoded just increment the ref count. */
3211 static robj
*getDecodedObject(robj
*o
) {
3214 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3218 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
3221 ll2string(buf
,32,(long)o
->ptr
);
3222 dec
= createStringObject(buf
,strlen(buf
));
3225 redisPanic("Unknown encoding type");
3229 /* Compare two string objects via strcmp() or alike.
3230 * Note that the objects may be integer-encoded. In such a case we
3231 * use ll2string() to get a string representation of the numbers on the stack
3232 * and compare the strings, it's much faster than calling getDecodedObject().
3234 * Important note: if objects are not integer encoded, but binary-safe strings,
3235 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
3237 static int compareStringObjects(robj
*a
, robj
*b
) {
3238 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
3239 char bufa
[128], bufb
[128], *astr
, *bstr
;
3242 if (a
== b
) return 0;
3243 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
3244 ll2string(bufa
,sizeof(bufa
),(long) a
->ptr
);
3250 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
3251 ll2string(bufb
,sizeof(bufb
),(long) b
->ptr
);
3257 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
3260 /* Equal string objects return 1 if the two objects are the same from the
3261 * point of view of a string comparison, otherwise 0 is returned. Note that
3262 * this function is faster then checking for (compareStringObject(a,b) == 0)
3263 * because it can perform some more optimization. */
3264 static int equalStringObjects(robj
*a
, robj
*b
) {
3265 if (a
->encoding
!= REDIS_ENCODING_RAW
&& b
->encoding
!= REDIS_ENCODING_RAW
){
3266 return a
->ptr
== b
->ptr
;
3268 return compareStringObjects(a
,b
) == 0;
3272 static size_t stringObjectLen(robj
*o
) {
3273 redisAssert(o
->type
== REDIS_STRING
);
3274 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3275 return sdslen(o
->ptr
);
3279 return ll2string(buf
,32,(long)o
->ptr
);
3283 static int getDoubleFromObject(robj
*o
, double *target
) {
3290 redisAssert(o
->type
== REDIS_STRING
);
3291 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3292 value
= strtod(o
->ptr
, &eptr
);
3293 if (eptr
[0] != '\0') return REDIS_ERR
;
3294 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3295 value
= (long)o
->ptr
;
3297 redisPanic("Unknown string encoding");
3305 static int getDoubleFromObjectOrReply(redisClient
*c
, robj
*o
, double *target
, const char *msg
) {
3307 if (getDoubleFromObject(o
, &value
) != REDIS_OK
) {
3309 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3311 addReplySds(c
, sdsnew("-ERR value is not a double\r\n"));
3320 static int getLongLongFromObject(robj
*o
, long long *target
) {
3327 redisAssert(o
->type
== REDIS_STRING
);
3328 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3329 value
= strtoll(o
->ptr
, &eptr
, 10);
3330 if (eptr
[0] != '\0') return REDIS_ERR
;
3331 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3332 value
= (long)o
->ptr
;
3334 redisPanic("Unknown string encoding");
3342 static int getLongLongFromObjectOrReply(redisClient
*c
, robj
*o
, long long *target
, const char *msg
) {
3344 if (getLongLongFromObject(o
, &value
) != REDIS_OK
) {
3346 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3348 addReplySds(c
, sdsnew("-ERR value is not an integer\r\n"));
3357 static int getLongFromObjectOrReply(redisClient
*c
, robj
*o
, long *target
, const char *msg
) {
3360 if (getLongLongFromObjectOrReply(c
, o
, &value
, msg
) != REDIS_OK
) return REDIS_ERR
;
3361 if (value
< LONG_MIN
|| value
> LONG_MAX
) {
3363 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3365 addReplySds(c
, sdsnew("-ERR value is out of range\r\n"));
3374 /*============================ RDB saving/loading =========================== */
3376 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3377 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3381 static int rdbSaveTime(FILE *fp
, time_t t
) {
3382 int32_t t32
= (int32_t) t
;
3383 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3387 /* check rdbLoadLen() comments for more info */
3388 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3389 unsigned char buf
[2];
3392 /* Save a 6 bit len */
3393 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3394 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3395 } else if (len
< (1<<14)) {
3396 /* Save a 14 bit len */
3397 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3399 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3401 /* Save a 32 bit len */
3402 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3403 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3405 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3410 /* Encode 'value' as an integer if possible (if integer will fit the
3411 * supported range). If the function sucessful encoded the integer
3412 * then the (up to 5 bytes) encoded representation is written in the
3413 * string pointed by 'enc' and the length is returned. Otherwise
3415 static int rdbEncodeInteger(long long value
, unsigned char *enc
) {
3416 /* Finally check if it fits in our ranges */
3417 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3418 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3419 enc
[1] = value
&0xFF;
3421 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3422 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3423 enc
[1] = value
&0xFF;
3424 enc
[2] = (value
>>8)&0xFF;
3426 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3427 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3428 enc
[1] = value
&0xFF;
3429 enc
[2] = (value
>>8)&0xFF;
3430 enc
[3] = (value
>>16)&0xFF;
3431 enc
[4] = (value
>>24)&0xFF;
3438 /* String objects in the form "2391" "-100" without any space and with a
3439 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3440 * encoded as integers to save space */
3441 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3443 char *endptr
, buf
[32];
3445 /* Check if it's possible to encode this value as a number */
3446 value
= strtoll(s
, &endptr
, 10);
3447 if (endptr
[0] != '\0') return 0;
3448 ll2string(buf
,32,value
);
3450 /* If the number converted back into a string is not identical
3451 * then it's not possible to encode the string as integer */
3452 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3454 return rdbEncodeInteger(value
,enc
);
3457 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3458 size_t comprlen
, outlen
;
3462 /* We require at least four bytes compression for this to be worth it */
3463 if (len
<= 4) return 0;
3465 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3466 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3467 if (comprlen
== 0) {
3471 /* Data compressed! Let's save it on disk */
3472 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3473 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3474 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3475 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3476 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3485 /* Save a string objet as [len][data] on disk. If the object is a string
3486 * representation of an integer value we try to safe it in a special form */
3487 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3490 /* Try integer encoding */
3492 unsigned char buf
[5];
3493 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3494 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3499 /* Try LZF compression - under 20 bytes it's unable to compress even
3500 * aaaaaaaaaaaaaaaaaa so skip it */
3501 if (server
.rdbcompression
&& len
> 20) {
3504 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3505 if (retval
== -1) return -1;
3506 if (retval
> 0) return 0;
3507 /* retval == 0 means data can't be compressed, save the old way */
3510 /* Store verbatim */
3511 if (rdbSaveLen(fp
,len
) == -1) return -1;
3512 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3516 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3517 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3520 /* Avoid to decode the object, then encode it again, if the
3521 * object is alrady integer encoded. */
3522 if (obj
->encoding
== REDIS_ENCODING_INT
) {
3523 long val
= (long) obj
->ptr
;
3524 unsigned char buf
[5];
3527 if ((enclen
= rdbEncodeInteger(val
,buf
)) > 0) {
3528 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3531 /* otherwise... fall throught and continue with the usual
3535 /* Avoid incr/decr ref count business when possible.
3536 * This plays well with copy-on-write given that we are probably
3537 * in a child process (BGSAVE). Also this makes sure key objects
3538 * of swapped objects are not incRefCount-ed (an assert does not allow
3539 * this in order to avoid bugs) */
3540 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
3541 obj
= getDecodedObject(obj
);
3542 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3545 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3550 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3551 * 8 bit integer specifing the length of the representation.
3552 * This 8 bit integer has special values in order to specify the following
3558 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3559 unsigned char buf
[128];
3565 } else if (!isfinite(val
)) {
3567 buf
[0] = (val
< 0) ? 255 : 254;
3569 #if (DBL_MANT_DIG >= 52) && (LLONG_MAX == 0x7fffffffffffffffLL)
3570 /* Check if the float is in a safe range to be casted into a
3571 * long long. We are assuming that long long is 64 bit here.
3572 * Also we are assuming that there are no implementations around where
3573 * double has precision < 52 bit.
3575 * Under this assumptions we test if a double is inside an interval
3576 * where casting to long long is safe. Then using two castings we
3577 * make sure the decimal part is zero. If all this is true we use
3578 * integer printing function that is much faster. */
3579 double min
= -4503599627370495; /* (2^52)-1 */
3580 double max
= 4503599627370496; /* -(2^52) */
3581 if (val
> min
&& val
< max
&& val
== ((double)((long long)val
)))
3582 ll2string((char*)buf
+1,sizeof(buf
),(long long)val
);
3585 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3586 buf
[0] = strlen((char*)buf
+1);
3589 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3593 /* Save a Redis object. */
3594 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3595 if (o
->type
== REDIS_STRING
) {
3596 /* Save a string value */
3597 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3598 } else if (o
->type
== REDIS_LIST
) {
3599 /* Save a list value */
3600 list
*list
= o
->ptr
;
3604 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3605 listRewind(list
,&li
);
3606 while((ln
= listNext(&li
))) {
3607 robj
*eleobj
= listNodeValue(ln
);
3609 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3611 } else if (o
->type
== REDIS_SET
) {
3612 /* Save a set value */
3614 dictIterator
*di
= dictGetIterator(set
);
3617 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3618 while((de
= dictNext(di
)) != NULL
) {
3619 robj
*eleobj
= dictGetEntryKey(de
);
3621 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3623 dictReleaseIterator(di
);
3624 } else if (o
->type
== REDIS_ZSET
) {
3625 /* Save a set value */
3627 dictIterator
*di
= dictGetIterator(zs
->dict
);
3630 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3631 while((de
= dictNext(di
)) != NULL
) {
3632 robj
*eleobj
= dictGetEntryKey(de
);
3633 double *score
= dictGetEntryVal(de
);
3635 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3636 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3638 dictReleaseIterator(di
);
3639 } else if (o
->type
== REDIS_HASH
) {
3640 /* Save a hash value */
3641 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3642 unsigned char *p
= zipmapRewind(o
->ptr
);
3643 unsigned int count
= zipmapLen(o
->ptr
);
3644 unsigned char *key
, *val
;
3645 unsigned int klen
, vlen
;
3647 if (rdbSaveLen(fp
,count
) == -1) return -1;
3648 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3649 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3650 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3653 dictIterator
*di
= dictGetIterator(o
->ptr
);
3656 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3657 while((de
= dictNext(di
)) != NULL
) {
3658 robj
*key
= dictGetEntryKey(de
);
3659 robj
*val
= dictGetEntryVal(de
);
3661 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3662 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3664 dictReleaseIterator(di
);
3667 redisPanic("Unknown object type");
3672 /* Return the length the object will have on disk if saved with
3673 * the rdbSaveObject() function. Currently we use a trick to get
3674 * this length with very little changes to the code. In the future
3675 * we could switch to a faster solution. */
3676 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3677 if (fp
== NULL
) fp
= server
.devnull
;
3679 assert(rdbSaveObject(fp
,o
) != 1);
3683 /* Return the number of pages required to save this object in the swap file */
3684 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3685 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3687 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3690 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3691 static int rdbSave(char *filename
) {
3692 dictIterator
*di
= NULL
;
3697 time_t now
= time(NULL
);
3699 /* Wait for I/O therads to terminate, just in case this is a
3700 * foreground-saving, to avoid seeking the swap file descriptor at the
3702 if (server
.vm_enabled
)
3703 waitEmptyIOJobsQueue();
3705 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3706 fp
= fopen(tmpfile
,"w");
3708 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3711 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3712 for (j
= 0; j
< server
.dbnum
; j
++) {
3713 redisDb
*db
= server
.db
+j
;
3715 if (dictSize(d
) == 0) continue;
3716 di
= dictGetIterator(d
);
3722 /* Write the SELECT DB opcode */
3723 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3724 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3726 /* Iterate this DB writing every entry */
3727 while((de
= dictNext(di
)) != NULL
) {
3728 robj
*key
= dictGetEntryKey(de
);
3729 robj
*o
= dictGetEntryVal(de
);
3730 time_t expiretime
= getExpire(db
,key
);
3732 /* Save the expire time */
3733 if (expiretime
!= -1) {
3734 /* If this key is already expired skip it */
3735 if (expiretime
< now
) continue;
3736 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3737 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3739 /* Save the key and associated value. This requires special
3740 * handling if the value is swapped out. */
3741 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
3742 key
->storage
== REDIS_VM_SWAPPING
) {
3743 /* Save type, key, value */
3744 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3745 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3746 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3748 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3750 /* Get a preview of the object in memory */
3751 po
= vmPreviewObject(key
);
3752 /* Save type, key, value */
3753 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
;
3754 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3755 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3756 /* Remove the loaded object from memory */
3760 dictReleaseIterator(di
);
3763 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3765 /* Make sure data will not remain on the OS's output buffers */
3770 /* Use RENAME to make sure the DB file is changed atomically only
3771 * if the generate DB file is ok. */
3772 if (rename(tmpfile
,filename
) == -1) {
3773 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3777 redisLog(REDIS_NOTICE
,"DB saved on disk");
3779 server
.lastsave
= time(NULL
);
3785 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3786 if (di
) dictReleaseIterator(di
);
3790 static int rdbSaveBackground(char *filename
) {
3793 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3794 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3795 if ((childpid
= fork()) == 0) {
3797 if (server
.vm_enabled
) vmReopenSwapFile();
3799 if (rdbSave(filename
) == REDIS_OK
) {
3806 if (childpid
== -1) {
3807 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3811 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3812 server
.bgsavechildpid
= childpid
;
3813 updateDictResizePolicy();
3816 return REDIS_OK
; /* unreached */
3819 static void rdbRemoveTempFile(pid_t childpid
) {
3822 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
3826 static int rdbLoadType(FILE *fp
) {
3828 if (fread(&type
,1,1,fp
) == 0) return -1;
3832 static time_t rdbLoadTime(FILE *fp
) {
3834 if (fread(&t32
,4,1,fp
) == 0) return -1;
3835 return (time_t) t32
;
3838 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3839 * of this file for a description of how this are stored on disk.
3841 * isencoded is set to 1 if the readed length is not actually a length but
3842 * an "encoding type", check the above comments for more info */
3843 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
3844 unsigned char buf
[2];
3848 if (isencoded
) *isencoded
= 0;
3849 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3850 type
= (buf
[0]&0xC0)>>6;
3851 if (type
== REDIS_RDB_6BITLEN
) {
3852 /* Read a 6 bit len */
3854 } else if (type
== REDIS_RDB_ENCVAL
) {
3855 /* Read a 6 bit len encoding type */
3856 if (isencoded
) *isencoded
= 1;
3858 } else if (type
== REDIS_RDB_14BITLEN
) {
3859 /* Read a 14 bit len */
3860 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3861 return ((buf
[0]&0x3F)<<8)|buf
[1];
3863 /* Read a 32 bit len */
3864 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
3869 /* Load an integer-encoded object from file 'fp', with the specified
3870 * encoding type 'enctype'. If encode is true the function may return
3871 * an integer-encoded object as reply, otherwise the returned object
3872 * will always be encoded as a raw string. */
3873 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
, int encode
) {
3874 unsigned char enc
[4];
3877 if (enctype
== REDIS_RDB_ENC_INT8
) {
3878 if (fread(enc
,1,1,fp
) == 0) return NULL
;
3879 val
= (signed char)enc
[0];
3880 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
3882 if (fread(enc
,2,1,fp
) == 0) return NULL
;
3883 v
= enc
[0]|(enc
[1]<<8);
3885 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
3887 if (fread(enc
,4,1,fp
) == 0) return NULL
;
3888 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
3891 val
= 0; /* anti-warning */
3892 redisPanic("Unknown RDB integer encoding type");
3895 return createStringObjectFromLongLong(val
);
3897 return createObject(REDIS_STRING
,sdsfromlonglong(val
));
3900 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
3901 unsigned int len
, clen
;
3902 unsigned char *c
= NULL
;
3905 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3906 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3907 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
3908 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
3909 if (fread(c
,clen
,1,fp
) == 0) goto err
;
3910 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
3912 return createObject(REDIS_STRING
,val
);
3919 static robj
*rdbGenericLoadStringObject(FILE*fp
, int encode
) {
3924 len
= rdbLoadLen(fp
,&isencoded
);
3927 case REDIS_RDB_ENC_INT8
:
3928 case REDIS_RDB_ENC_INT16
:
3929 case REDIS_RDB_ENC_INT32
:
3930 return rdbLoadIntegerObject(fp
,len
,encode
);
3931 case REDIS_RDB_ENC_LZF
:
3932 return rdbLoadLzfStringObject(fp
);
3934 redisPanic("Unknown RDB encoding type");
3938 if (len
== REDIS_RDB_LENERR
) return NULL
;
3939 val
= sdsnewlen(NULL
,len
);
3940 if (len
&& fread(val
,len
,1,fp
) == 0) {
3944 return createObject(REDIS_STRING
,val
);
3947 static robj
*rdbLoadStringObject(FILE *fp
) {
3948 return rdbGenericLoadStringObject(fp
,0);
3951 static robj
*rdbLoadEncodedStringObject(FILE *fp
) {
3952 return rdbGenericLoadStringObject(fp
,1);
3955 /* For information about double serialization check rdbSaveDoubleValue() */
3956 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
3960 if (fread(&len
,1,1,fp
) == 0) return -1;
3962 case 255: *val
= R_NegInf
; return 0;
3963 case 254: *val
= R_PosInf
; return 0;
3964 case 253: *val
= R_Nan
; return 0;
3966 if (fread(buf
,len
,1,fp
) == 0) return -1;
3968 sscanf(buf
, "%lg", val
);
3973 /* Load a Redis object of the specified type from the specified file.
3974 * On success a newly allocated object is returned, otherwise NULL. */
3975 static robj
*rdbLoadObject(int type
, FILE *fp
) {
3978 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
3979 if (type
== REDIS_STRING
) {
3980 /* Read string value */
3981 if ((o
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
3982 o
= tryObjectEncoding(o
);
3983 } else if (type
== REDIS_LIST
|| type
== REDIS_SET
) {
3984 /* Read list/set value */
3987 if ((listlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3988 o
= (type
== REDIS_LIST
) ? createListObject() : createSetObject();
3989 /* It's faster to expand the dict to the right size asap in order
3990 * to avoid rehashing */
3991 if (type
== REDIS_SET
&& listlen
> DICT_HT_INITIAL_SIZE
)
3992 dictExpand(o
->ptr
,listlen
);
3993 /* Load every single element of the list/set */
3997 if ((ele
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
3998 ele
= tryObjectEncoding(ele
);
3999 if (type
== REDIS_LIST
) {
4000 listAddNodeTail((list
*)o
->ptr
,ele
);
4002 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
4005 } else if (type
== REDIS_ZSET
) {
4006 /* Read list/set value */
4010 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4011 o
= createZsetObject();
4013 /* Load every single element of the list/set */
4016 double *score
= zmalloc(sizeof(double));
4018 if ((ele
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4019 ele
= tryObjectEncoding(ele
);
4020 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
4021 dictAdd(zs
->dict
,ele
,score
);
4022 zslInsert(zs
->zsl
,*score
,ele
);
4023 incrRefCount(ele
); /* added to skiplist */
4025 } else if (type
== REDIS_HASH
) {
4028 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4029 o
= createHashObject();
4030 /* Too many entries? Use an hash table. */
4031 if (hashlen
> server
.hash_max_zipmap_entries
)
4032 convertToRealHash(o
);
4033 /* Load every key/value, then set it into the zipmap or hash
4034 * table, as needed. */
4038 if ((key
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
4039 if ((val
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
4040 /* If we are using a zipmap and there are too big values
4041 * the object is converted to real hash table encoding. */
4042 if (o
->encoding
!= REDIS_ENCODING_HT
&&
4043 (sdslen(key
->ptr
) > server
.hash_max_zipmap_value
||
4044 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
))
4046 convertToRealHash(o
);
4049 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
4050 unsigned char *zm
= o
->ptr
;
4052 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
4053 val
->ptr
,sdslen(val
->ptr
),NULL
);
4058 key
= tryObjectEncoding(key
);
4059 val
= tryObjectEncoding(val
);
4060 dictAdd((dict
*)o
->ptr
,key
,val
);
4064 redisPanic("Unknown object type");
4069 static int rdbLoad(char *filename
) {
4072 int type
, retval
, rdbver
;
4073 int swap_all_values
= 0;
4074 dict
*d
= server
.db
[0].dict
;
4075 redisDb
*db
= server
.db
+0;
4077 time_t expiretime
, now
= time(NULL
);
4078 long long loadedkeys
= 0;
4080 fp
= fopen(filename
,"r");
4081 if (!fp
) return REDIS_ERR
;
4082 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
4084 if (memcmp(buf
,"REDIS",5) != 0) {
4086 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
4089 rdbver
= atoi(buf
+5);
4092 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
4100 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
4101 if (type
== REDIS_EXPIRETIME
) {
4102 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
4103 /* We read the time so we need to read the object type again */
4104 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
4106 if (type
== REDIS_EOF
) break;
4107 /* Handle SELECT DB opcode as a special case */
4108 if (type
== REDIS_SELECTDB
) {
4109 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
4111 if (dbid
>= (unsigned)server
.dbnum
) {
4112 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
4115 db
= server
.db
+dbid
;
4120 if ((key
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
4122 if ((val
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
4123 /* Check if the key already expired */
4124 if (expiretime
!= -1 && expiretime
< now
) {
4129 /* Add the new object in the hash table */
4130 retval
= dictAdd(d
,key
,val
);
4131 if (retval
== DICT_ERR
) {
4132 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", key
->ptr
);
4136 /* Set the expire time if needed */
4137 if (expiretime
!= -1) setExpire(db
,key
,expiretime
);
4139 /* Handle swapping while loading big datasets when VM is on */
4141 /* If we detecter we are hopeless about fitting something in memory
4142 * we just swap every new key on disk. Directly...
4143 * Note that's important to check for this condition before resorting
4144 * to random sampling, otherwise we may try to swap already
4146 if (swap_all_values
) {
4147 dictEntry
*de
= dictFind(d
,key
);
4149 /* de may be NULL since the key already expired */
4151 key
= dictGetEntryKey(de
);
4152 val
= dictGetEntryVal(de
);
4154 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
4155 dictGetEntryVal(de
) = NULL
;
4161 /* If we have still some hope of having some value fitting memory
4162 * then we try random sampling. */
4163 if (!swap_all_values
&& server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
4164 while (zmalloc_used_memory() > server
.vm_max_memory
) {
4165 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
4167 if (zmalloc_used_memory() > server
.vm_max_memory
)
4168 swap_all_values
= 1; /* We are already using too much mem */
4174 eoferr
: /* unexpected end of file is handled here with a fatal exit */
4175 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
4177 return REDIS_ERR
; /* Just to avoid warning */
4180 /*================================== Shutdown =============================== */
4181 static int prepareForShutdown() {
4182 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4183 /* Kill the saving child if there is a background saving in progress.
4184 We want to avoid race conditions, for instance our saving child may
4185 overwrite the synchronous saving did by SHUTDOWN. */
4186 if (server
.bgsavechildpid
!= -1) {
4187 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4188 kill(server
.bgsavechildpid
,SIGKILL
);
4189 rdbRemoveTempFile(server
.bgsavechildpid
);
4191 if (server
.appendonly
) {
4192 /* Append only file: fsync() the AOF and exit */
4193 fsync(server
.appendfd
);
4194 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4196 /* Snapshotting. Perform a SYNC SAVE and exit */
4197 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4198 if (server
.daemonize
)
4199 unlink(server
.pidfile
);
4200 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4202 /* Ooops.. error saving! The best we can do is to continue
4203 * operating. Note that if there was a background saving process,
4204 * in the next cron() Redis will be notified that the background
4205 * saving aborted, handling special stuff like slaves pending for
4206 * synchronization... */
4207 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4211 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4215 /*================================== Commands =============================== */
4217 static void authCommand(redisClient
*c
) {
4218 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
4219 c
->authenticated
= 1;
4220 addReply(c
,shared
.ok
);
4222 c
->authenticated
= 0;
4223 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
4227 static void pingCommand(redisClient
*c
) {
4228 addReply(c
,shared
.pong
);
4231 static void echoCommand(redisClient
*c
) {
4232 addReplyBulk(c
,c
->argv
[1]);
4235 /*=================================== Strings =============================== */
4237 static void setGenericCommand(redisClient
*c
, int nx
, robj
*key
, robj
*val
, robj
*expire
) {
4239 long seconds
= 0; /* initialized to avoid an harmness warning */
4242 if (getLongFromObjectOrReply(c
, expire
, &seconds
, NULL
) != REDIS_OK
)
4245 addReplySds(c
,sdsnew("-ERR invalid expire time in SETEX\r\n"));
4250 touchWatchedKey(c
->db
,key
);
4251 if (nx
) deleteIfVolatile(c
->db
,key
);
4252 retval
= dictAdd(c
->db
->dict
,key
,val
);
4253 if (retval
== DICT_ERR
) {
4255 /* If the key is about a swapped value, we want a new key object
4256 * to overwrite the old. So we delete the old key in the database.
4257 * This will also make sure that swap pages about the old object
4258 * will be marked as free. */
4259 if (server
.vm_enabled
&& deleteIfSwapped(c
->db
,key
))
4261 dictReplace(c
->db
->dict
,key
,val
);
4264 addReply(c
,shared
.czero
);
4272 removeExpire(c
->db
,key
);
4273 if (expire
) setExpire(c
->db
,key
,time(NULL
)+seconds
);
4274 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4277 static void setCommand(redisClient
*c
) {
4278 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[2],NULL
);
4281 static void setnxCommand(redisClient
*c
) {
4282 setGenericCommand(c
,1,c
->argv
[1],c
->argv
[2],NULL
);
4285 static void setexCommand(redisClient
*c
) {
4286 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[3],c
->argv
[2]);
4289 static int getGenericCommand(redisClient
*c
) {
4292 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
4295 if (o
->type
!= REDIS_STRING
) {
4296 addReply(c
,shared
.wrongtypeerr
);
4304 static void getCommand(redisClient
*c
) {
4305 getGenericCommand(c
);
4308 static void getsetCommand(redisClient
*c
) {
4309 if (getGenericCommand(c
) == REDIS_ERR
) return;
4310 if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) {
4311 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4313 incrRefCount(c
->argv
[1]);
4315 incrRefCount(c
->argv
[2]);
4317 removeExpire(c
->db
,c
->argv
[1]);
4320 static void mgetCommand(redisClient
*c
) {
4323 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
4324 for (j
= 1; j
< c
->argc
; j
++) {
4325 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
4327 addReply(c
,shared
.nullbulk
);
4329 if (o
->type
!= REDIS_STRING
) {
4330 addReply(c
,shared
.nullbulk
);
4338 static void msetGenericCommand(redisClient
*c
, int nx
) {
4339 int j
, busykeys
= 0;
4341 if ((c
->argc
% 2) == 0) {
4342 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
4345 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
4346 * set nothing at all if at least one already key exists. */
4348 for (j
= 1; j
< c
->argc
; j
+= 2) {
4349 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
4355 addReply(c
, shared
.czero
);
4359 for (j
= 1; j
< c
->argc
; j
+= 2) {
4362 c
->argv
[j
+1] = tryObjectEncoding(c
->argv
[j
+1]);
4363 retval
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
4364 if (retval
== DICT_ERR
) {
4365 dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
4366 incrRefCount(c
->argv
[j
+1]);
4368 incrRefCount(c
->argv
[j
]);
4369 incrRefCount(c
->argv
[j
+1]);
4371 removeExpire(c
->db
,c
->argv
[j
]);
4373 server
.dirty
+= (c
->argc
-1)/2;
4374 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4377 static void msetCommand(redisClient
*c
) {
4378 msetGenericCommand(c
,0);
4381 static void msetnxCommand(redisClient
*c
) {
4382 msetGenericCommand(c
,1);
4385 static void incrDecrCommand(redisClient
*c
, long long incr
) {
4390 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4391 if (o
!= NULL
&& checkType(c
,o
,REDIS_STRING
)) return;
4392 if (getLongLongFromObjectOrReply(c
,o
,&value
,NULL
) != REDIS_OK
) return;
4395 o
= createStringObjectFromLongLong(value
);
4396 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],o
);
4397 if (retval
== DICT_ERR
) {
4398 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4399 removeExpire(c
->db
,c
->argv
[1]);
4401 incrRefCount(c
->argv
[1]);
4404 addReply(c
,shared
.colon
);
4406 addReply(c
,shared
.crlf
);
4409 static void incrCommand(redisClient
*c
) {
4410 incrDecrCommand(c
,1);
4413 static void decrCommand(redisClient
*c
) {
4414 incrDecrCommand(c
,-1);
4417 static void incrbyCommand(redisClient
*c
) {
4420 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4421 incrDecrCommand(c
,incr
);
4424 static void decrbyCommand(redisClient
*c
) {
4427 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4428 incrDecrCommand(c
,-incr
);
4431 static void appendCommand(redisClient
*c
) {
4436 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4438 /* Create the key */
4439 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4440 incrRefCount(c
->argv
[1]);
4441 incrRefCount(c
->argv
[2]);
4442 totlen
= stringObjectLen(c
->argv
[2]);
4446 de
= dictFind(c
->db
->dict
,c
->argv
[1]);
4449 o
= dictGetEntryVal(de
);
4450 if (o
->type
!= REDIS_STRING
) {
4451 addReply(c
,shared
.wrongtypeerr
);
4454 /* If the object is specially encoded or shared we have to make
4456 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
4457 robj
*decoded
= getDecodedObject(o
);
4459 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
4460 decrRefCount(decoded
);
4461 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4464 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
4465 o
->ptr
= sdscatlen(o
->ptr
,
4466 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
4468 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
4469 (unsigned long) c
->argv
[2]->ptr
);
4471 totlen
= sdslen(o
->ptr
);
4474 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
4477 static void substrCommand(redisClient
*c
) {
4479 long start
= atoi(c
->argv
[2]->ptr
);
4480 long end
= atoi(c
->argv
[3]->ptr
);
4481 size_t rangelen
, strlen
;
4484 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4485 checkType(c
,o
,REDIS_STRING
)) return;
4487 o
= getDecodedObject(o
);
4488 strlen
= sdslen(o
->ptr
);
4490 /* convert negative indexes */
4491 if (start
< 0) start
= strlen
+start
;
4492 if (end
< 0) end
= strlen
+end
;
4493 if (start
< 0) start
= 0;
4494 if (end
< 0) end
= 0;
4496 /* indexes sanity checks */
4497 if (start
> end
|| (size_t)start
>= strlen
) {
4498 /* Out of range start or start > end result in null reply */
4499 addReply(c
,shared
.nullbulk
);
4503 if ((size_t)end
>= strlen
) end
= strlen
-1;
4504 rangelen
= (end
-start
)+1;
4506 /* Return the result */
4507 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4508 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4509 addReplySds(c
,range
);
4510 addReply(c
,shared
.crlf
);
4514 /* ========================= Type agnostic commands ========================= */
4516 static void delCommand(redisClient
*c
) {
4519 for (j
= 1; j
< c
->argc
; j
++) {
4520 if (deleteKey(c
->db
,c
->argv
[j
])) {
4521 touchWatchedKey(c
->db
,c
->argv
[j
]);
4526 addReplyLongLong(c
,deleted
);
4529 static void existsCommand(redisClient
*c
) {
4530 expireIfNeeded(c
->db
,c
->argv
[1]);
4531 if (dictFind(c
->db
->dict
,c
->argv
[1])) {
4532 addReply(c
, shared
.cone
);
4534 addReply(c
, shared
.czero
);
4538 static void selectCommand(redisClient
*c
) {
4539 int id
= atoi(c
->argv
[1]->ptr
);
4541 if (selectDb(c
,id
) == REDIS_ERR
) {
4542 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4544 addReply(c
,shared
.ok
);
4548 static void randomkeyCommand(redisClient
*c
) {
4553 de
= dictGetRandomKey(c
->db
->dict
);
4554 if (!de
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break;
4558 addReply(c
,shared
.nullbulk
);
4562 key
= dictGetEntryKey(de
);
4563 if (server
.vm_enabled
) {
4564 key
= dupStringObject(key
);
4565 addReplyBulk(c
,key
);
4568 addReplyBulk(c
,key
);
4572 static void keysCommand(redisClient
*c
) {
4575 sds pattern
= c
->argv
[1]->ptr
;
4576 int plen
= sdslen(pattern
);
4577 unsigned long numkeys
= 0;
4578 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4580 di
= dictGetIterator(c
->db
->dict
);
4582 decrRefCount(lenobj
);
4583 while((de
= dictNext(di
)) != NULL
) {
4584 robj
*keyobj
= dictGetEntryKey(de
);
4586 sds key
= keyobj
->ptr
;
4587 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4588 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4589 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4590 addReplyBulk(c
,keyobj
);
4595 dictReleaseIterator(di
);
4596 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4599 static void dbsizeCommand(redisClient
*c
) {
4601 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4604 static void lastsaveCommand(redisClient
*c
) {
4606 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4609 static void typeCommand(redisClient
*c
) {
4613 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4618 case REDIS_STRING
: type
= "+string"; break;
4619 case REDIS_LIST
: type
= "+list"; break;
4620 case REDIS_SET
: type
= "+set"; break;
4621 case REDIS_ZSET
: type
= "+zset"; break;
4622 case REDIS_HASH
: type
= "+hash"; break;
4623 default: type
= "+unknown"; break;
4626 addReplySds(c
,sdsnew(type
));
4627 addReply(c
,shared
.crlf
);
4630 static void saveCommand(redisClient
*c
) {
4631 if (server
.bgsavechildpid
!= -1) {
4632 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4635 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4636 addReply(c
,shared
.ok
);
4638 addReply(c
,shared
.err
);
4642 static void bgsaveCommand(redisClient
*c
) {
4643 if (server
.bgsavechildpid
!= -1) {
4644 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4647 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4648 char *status
= "+Background saving started\r\n";
4649 addReplySds(c
,sdsnew(status
));
4651 addReply(c
,shared
.err
);
4655 static void shutdownCommand(redisClient
*c
) {
4656 if (prepareForShutdown() == REDIS_OK
)
4658 addReplySds(c
, sdsnew("-ERR Errors trying to SHUTDOWN. Check logs.\r\n"));
4661 static void renameGenericCommand(redisClient
*c
, int nx
) {
4664 /* To use the same key as src and dst is probably an error */
4665 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4666 addReply(c
,shared
.sameobjecterr
);
4670 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4674 deleteIfVolatile(c
->db
,c
->argv
[2]);
4675 if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) {
4678 addReply(c
,shared
.czero
);
4681 dictReplace(c
->db
->dict
,c
->argv
[2],o
);
4683 incrRefCount(c
->argv
[2]);
4685 deleteKey(c
->db
,c
->argv
[1]);
4686 touchWatchedKey(c
->db
,c
->argv
[2]);
4688 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4691 static void renameCommand(redisClient
*c
) {
4692 renameGenericCommand(c
,0);
4695 static void renamenxCommand(redisClient
*c
) {
4696 renameGenericCommand(c
,1);
4699 static void moveCommand(redisClient
*c
) {
4704 /* Obtain source and target DB pointers */
4707 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4708 addReply(c
,shared
.outofrangeerr
);
4712 selectDb(c
,srcid
); /* Back to the source DB */
4714 /* If the user is moving using as target the same
4715 * DB as the source DB it is probably an error. */
4717 addReply(c
,shared
.sameobjecterr
);
4721 /* Check if the element exists and get a reference */
4722 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4724 addReply(c
,shared
.czero
);
4728 /* Try to add the element to the target DB */
4729 deleteIfVolatile(dst
,c
->argv
[1]);
4730 if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) {
4731 addReply(c
,shared
.czero
);
4734 incrRefCount(c
->argv
[1]);
4737 /* OK! key moved, free the entry in the source DB */
4738 deleteKey(src
,c
->argv
[1]);
4740 addReply(c
,shared
.cone
);
4743 /* =================================== Lists ================================ */
4744 static void pushGenericCommand(redisClient
*c
, int where
) {
4748 lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4750 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4751 addReply(c
,shared
.cone
);
4754 lobj
= createListObject();
4756 if (where
== REDIS_HEAD
) {
4757 listAddNodeHead(list
,c
->argv
[2]);
4759 listAddNodeTail(list
,c
->argv
[2]);
4761 dictAdd(c
->db
->dict
,c
->argv
[1],lobj
);
4762 incrRefCount(c
->argv
[1]);
4763 incrRefCount(c
->argv
[2]);
4765 if (lobj
->type
!= REDIS_LIST
) {
4766 addReply(c
,shared
.wrongtypeerr
);
4769 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4770 addReply(c
,shared
.cone
);
4774 if (where
== REDIS_HEAD
) {
4775 listAddNodeHead(list
,c
->argv
[2]);
4777 listAddNodeTail(list
,c
->argv
[2]);
4779 incrRefCount(c
->argv
[2]);
4782 addReplyLongLong(c
,listLength(list
));
4785 static void lpushCommand(redisClient
*c
) {
4786 pushGenericCommand(c
,REDIS_HEAD
);
4789 static void rpushCommand(redisClient
*c
) {
4790 pushGenericCommand(c
,REDIS_TAIL
);
4793 static void llenCommand(redisClient
*c
) {
4797 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4798 checkType(c
,o
,REDIS_LIST
)) return;
4801 addReplyUlong(c
,listLength(l
));
4804 static void lindexCommand(redisClient
*c
) {
4806 int index
= atoi(c
->argv
[2]->ptr
);
4810 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4811 checkType(c
,o
,REDIS_LIST
)) return;
4814 ln
= listIndex(list
, index
);
4816 addReply(c
,shared
.nullbulk
);
4818 robj
*ele
= listNodeValue(ln
);
4819 addReplyBulk(c
,ele
);
4823 static void lsetCommand(redisClient
*c
) {
4825 int index
= atoi(c
->argv
[2]->ptr
);
4829 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
||
4830 checkType(c
,o
,REDIS_LIST
)) return;
4833 ln
= listIndex(list
, index
);
4835 addReply(c
,shared
.outofrangeerr
);
4837 robj
*ele
= listNodeValue(ln
);
4840 listNodeValue(ln
) = c
->argv
[3];
4841 incrRefCount(c
->argv
[3]);
4842 addReply(c
,shared
.ok
);
4847 static void popGenericCommand(redisClient
*c
, int where
) {
4852 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4853 checkType(c
,o
,REDIS_LIST
)) return;
4856 if (where
== REDIS_HEAD
)
4857 ln
= listFirst(list
);
4859 ln
= listLast(list
);
4862 addReply(c
,shared
.nullbulk
);
4864 robj
*ele
= listNodeValue(ln
);
4865 addReplyBulk(c
,ele
);
4866 listDelNode(list
,ln
);
4867 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4872 static void lpopCommand(redisClient
*c
) {
4873 popGenericCommand(c
,REDIS_HEAD
);
4876 static void rpopCommand(redisClient
*c
) {
4877 popGenericCommand(c
,REDIS_TAIL
);
4880 static void lrangeCommand(redisClient
*c
) {
4882 int start
= atoi(c
->argv
[2]->ptr
);
4883 int end
= atoi(c
->argv
[3]->ptr
);
4890 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
4891 || checkType(c
,o
,REDIS_LIST
)) return;
4893 llen
= listLength(list
);
4895 /* convert negative indexes */
4896 if (start
< 0) start
= llen
+start
;
4897 if (end
< 0) end
= llen
+end
;
4898 if (start
< 0) start
= 0;
4899 if (end
< 0) end
= 0;
4901 /* indexes sanity checks */
4902 if (start
> end
|| start
>= llen
) {
4903 /* Out of range start or start > end result in empty list */
4904 addReply(c
,shared
.emptymultibulk
);
4907 if (end
>= llen
) end
= llen
-1;
4908 rangelen
= (end
-start
)+1;
4910 /* Return the result in form of a multi-bulk reply */
4911 ln
= listIndex(list
, start
);
4912 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
4913 for (j
= 0; j
< rangelen
; j
++) {
4914 ele
= listNodeValue(ln
);
4915 addReplyBulk(c
,ele
);
4920 static void ltrimCommand(redisClient
*c
) {
4922 int start
= atoi(c
->argv
[2]->ptr
);
4923 int end
= atoi(c
->argv
[3]->ptr
);
4925 int j
, ltrim
, rtrim
;
4929 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
4930 checkType(c
,o
,REDIS_LIST
)) return;
4932 llen
= listLength(list
);
4934 /* convert negative indexes */
4935 if (start
< 0) start
= llen
+start
;
4936 if (end
< 0) end
= llen
+end
;
4937 if (start
< 0) start
= 0;
4938 if (end
< 0) end
= 0;
4940 /* indexes sanity checks */
4941 if (start
> end
|| start
>= llen
) {
4942 /* Out of range start or start > end result in empty list */
4946 if (end
>= llen
) end
= llen
-1;
4951 /* Remove list elements to perform the trim */
4952 for (j
= 0; j
< ltrim
; j
++) {
4953 ln
= listFirst(list
);
4954 listDelNode(list
,ln
);
4956 for (j
= 0; j
< rtrim
; j
++) {
4957 ln
= listLast(list
);
4958 listDelNode(list
,ln
);
4960 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4962 addReply(c
,shared
.ok
);
4965 static void lremCommand(redisClient
*c
) {
4968 listNode
*ln
, *next
;
4969 int toremove
= atoi(c
->argv
[2]->ptr
);
4973 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4974 checkType(c
,o
,REDIS_LIST
)) return;
4978 toremove
= -toremove
;
4981 ln
= fromtail
? list
->tail
: list
->head
;
4983 robj
*ele
= listNodeValue(ln
);
4985 next
= fromtail
? ln
->prev
: ln
->next
;
4986 if (equalStringObjects(ele
,c
->argv
[3])) {
4987 listDelNode(list
,ln
);
4990 if (toremove
&& removed
== toremove
) break;
4994 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4995 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
4998 /* This is the semantic of this command:
4999 * RPOPLPUSH srclist dstlist:
5000 * IF LLEN(srclist) > 0
5001 * element = RPOP srclist
5002 * LPUSH dstlist element
5009 * The idea is to be able to get an element from a list in a reliable way
5010 * since the element is not just returned but pushed against another list
5011 * as well. This command was originally proposed by Ezra Zygmuntowicz.
5013 static void rpoplpushcommand(redisClient
*c
) {
5018 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5019 checkType(c
,sobj
,REDIS_LIST
)) return;
5020 srclist
= sobj
->ptr
;
5021 ln
= listLast(srclist
);
5024 addReply(c
,shared
.nullbulk
);
5026 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
5027 robj
*ele
= listNodeValue(ln
);
5030 if (dobj
&& dobj
->type
!= REDIS_LIST
) {
5031 addReply(c
,shared
.wrongtypeerr
);
5035 /* Add the element to the target list (unless it's directly
5036 * passed to some BLPOP-ing client */
5037 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) {
5039 /* Create the list if the key does not exist */
5040 dobj
= createListObject();
5041 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
);
5042 incrRefCount(c
->argv
[2]);
5044 dstlist
= dobj
->ptr
;
5045 listAddNodeHead(dstlist
,ele
);
5049 /* Send the element to the client as reply as well */
5050 addReplyBulk(c
,ele
);
5052 /* Finally remove the element from the source list */
5053 listDelNode(srclist
,ln
);
5054 if (listLength(srclist
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5059 /* ==================================== Sets ================================ */
5061 static void saddCommand(redisClient
*c
) {
5064 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5066 set
= createSetObject();
5067 dictAdd(c
->db
->dict
,c
->argv
[1],set
);
5068 incrRefCount(c
->argv
[1]);
5070 if (set
->type
!= REDIS_SET
) {
5071 addReply(c
,shared
.wrongtypeerr
);
5075 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
5076 incrRefCount(c
->argv
[2]);
5078 addReply(c
,shared
.cone
);
5080 addReply(c
,shared
.czero
);
5084 static void sremCommand(redisClient
*c
) {
5087 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5088 checkType(c
,set
,REDIS_SET
)) return;
5090 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
5092 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
5093 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5094 addReply(c
,shared
.cone
);
5096 addReply(c
,shared
.czero
);
5100 static void smoveCommand(redisClient
*c
) {
5101 robj
*srcset
, *dstset
;
5103 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5104 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
5106 /* If the source key does not exist return 0, if it's of the wrong type
5108 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
5109 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
5112 /* Error if the destination key is not a set as well */
5113 if (dstset
&& dstset
->type
!= REDIS_SET
) {
5114 addReply(c
,shared
.wrongtypeerr
);
5117 /* Remove the element from the source set */
5118 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
5119 /* Key not found in the src set! return zero */
5120 addReply(c
,shared
.czero
);
5123 if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset
!= dstset
)
5124 deleteKey(c
->db
,c
->argv
[1]);
5126 /* Add the element to the destination set */
5128 dstset
= createSetObject();
5129 dictAdd(c
->db
->dict
,c
->argv
[2],dstset
);
5130 incrRefCount(c
->argv
[2]);
5132 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
5133 incrRefCount(c
->argv
[3]);
5134 addReply(c
,shared
.cone
);
5137 static void sismemberCommand(redisClient
*c
) {
5140 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5141 checkType(c
,set
,REDIS_SET
)) return;
5143 if (dictFind(set
->ptr
,c
->argv
[2]))
5144 addReply(c
,shared
.cone
);
5146 addReply(c
,shared
.czero
);
5149 static void scardCommand(redisClient
*c
) {
5153 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5154 checkType(c
,o
,REDIS_SET
)) return;
5157 addReplyUlong(c
,dictSize(s
));
5160 static void spopCommand(redisClient
*c
) {
5164 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5165 checkType(c
,set
,REDIS_SET
)) return;
5167 de
= dictGetRandomKey(set
->ptr
);
5169 addReply(c
,shared
.nullbulk
);
5171 robj
*ele
= dictGetEntryKey(de
);
5173 addReplyBulk(c
,ele
);
5174 dictDelete(set
->ptr
,ele
);
5175 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
5176 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5181 static void srandmemberCommand(redisClient
*c
) {
5185 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5186 checkType(c
,set
,REDIS_SET
)) return;
5188 de
= dictGetRandomKey(set
->ptr
);
5190 addReply(c
,shared
.nullbulk
);
5192 robj
*ele
= dictGetEntryKey(de
);
5194 addReplyBulk(c
,ele
);
5198 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
5199 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
5201 return dictSize(*d1
)-dictSize(*d2
);
5204 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
5205 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
5208 robj
*lenobj
= NULL
, *dstset
= NULL
;
5209 unsigned long j
, cardinality
= 0;
5211 for (j
= 0; j
< setsnum
; j
++) {
5215 lookupKeyWrite(c
->db
,setskeys
[j
]) :
5216 lookupKeyRead(c
->db
,setskeys
[j
]);
5220 if (deleteKey(c
->db
,dstkey
))
5222 addReply(c
,shared
.czero
);
5224 addReply(c
,shared
.emptymultibulk
);
5228 if (setobj
->type
!= REDIS_SET
) {
5230 addReply(c
,shared
.wrongtypeerr
);
5233 dv
[j
] = setobj
->ptr
;
5235 /* Sort sets from the smallest to largest, this will improve our
5236 * algorithm's performace */
5237 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
5239 /* The first thing we should output is the total number of elements...
5240 * since this is a multi-bulk write, but at this stage we don't know
5241 * the intersection set size, so we use a trick, append an empty object
5242 * to the output list and save the pointer to later modify it with the
5245 lenobj
= createObject(REDIS_STRING
,NULL
);
5247 decrRefCount(lenobj
);
5249 /* If we have a target key where to store the resulting set
5250 * create this key with an empty set inside */
5251 dstset
= createSetObject();
5254 /* Iterate all the elements of the first (smallest) set, and test
5255 * the element against all the other sets, if at least one set does
5256 * not include the element it is discarded */
5257 di
= dictGetIterator(dv
[0]);
5259 while((de
= dictNext(di
)) != NULL
) {
5262 for (j
= 1; j
< setsnum
; j
++)
5263 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
5265 continue; /* at least one set does not contain the member */
5266 ele
= dictGetEntryKey(de
);
5268 addReplyBulk(c
,ele
);
5271 dictAdd(dstset
->ptr
,ele
,NULL
);
5275 dictReleaseIterator(di
);
5278 /* Store the resulting set into the target, if the intersection
5279 * is not an empty set. */
5280 deleteKey(c
->db
,dstkey
);
5281 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5282 dictAdd(c
->db
->dict
,dstkey
,dstset
);
5283 incrRefCount(dstkey
);
5284 addReplyLongLong(c
,dictSize((dict
*)dstset
->ptr
));
5286 decrRefCount(dstset
);
5287 addReply(c
,shared
.czero
);
5291 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
5296 static void sinterCommand(redisClient
*c
) {
5297 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
5300 static void sinterstoreCommand(redisClient
*c
) {
5301 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
5304 #define REDIS_OP_UNION 0
5305 #define REDIS_OP_DIFF 1
5306 #define REDIS_OP_INTER 2
5308 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
5309 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
5312 robj
*dstset
= NULL
;
5313 int j
, cardinality
= 0;
5315 for (j
= 0; j
< setsnum
; j
++) {
5319 lookupKeyWrite(c
->db
,setskeys
[j
]) :
5320 lookupKeyRead(c
->db
,setskeys
[j
]);
5325 if (setobj
->type
!= REDIS_SET
) {
5327 addReply(c
,shared
.wrongtypeerr
);
5330 dv
[j
] = setobj
->ptr
;
5333 /* We need a temp set object to store our union. If the dstkey
5334 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
5335 * this set object will be the resulting object to set into the target key*/
5336 dstset
= createSetObject();
5338 /* Iterate all the elements of all the sets, add every element a single
5339 * time to the result set */
5340 for (j
= 0; j
< setsnum
; j
++) {
5341 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
5342 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
5344 di
= dictGetIterator(dv
[j
]);
5346 while((de
= dictNext(di
)) != NULL
) {
5349 /* dictAdd will not add the same element multiple times */
5350 ele
= dictGetEntryKey(de
);
5351 if (op
== REDIS_OP_UNION
|| j
== 0) {
5352 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
5356 } else if (op
== REDIS_OP_DIFF
) {
5357 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
5362 dictReleaseIterator(di
);
5364 /* result set is empty? Exit asap. */
5365 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
5368 /* Output the content of the resulting set, if not in STORE mode */
5370 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
5371 di
= dictGetIterator(dstset
->ptr
);
5372 while((de
= dictNext(di
)) != NULL
) {
5375 ele
= dictGetEntryKey(de
);
5376 addReplyBulk(c
,ele
);
5378 dictReleaseIterator(di
);
5379 decrRefCount(dstset
);
5381 /* If we have a target key where to store the resulting set
5382 * create this key with the result set inside */
5383 deleteKey(c
->db
,dstkey
);
5384 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5385 dictAdd(c
->db
->dict
,dstkey
,dstset
);
5386 incrRefCount(dstkey
);
5387 addReplyLongLong(c
,dictSize((dict
*)dstset
->ptr
));
5389 decrRefCount(dstset
);
5390 addReply(c
,shared
.czero
);
5397 static void sunionCommand(redisClient
*c
) {
5398 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
5401 static void sunionstoreCommand(redisClient
*c
) {
5402 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
5405 static void sdiffCommand(redisClient
*c
) {
5406 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
5409 static void sdiffstoreCommand(redisClient
*c
) {
5410 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
5413 /* ==================================== ZSets =============================== */
5415 /* ZSETs are ordered sets using two data structures to hold the same elements
5416 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
5419 * The elements are added to an hash table mapping Redis objects to scores.
5420 * At the same time the elements are added to a skip list mapping scores
5421 * to Redis objects (so objects are sorted by scores in this "view"). */
5423 /* This skiplist implementation is almost a C translation of the original
5424 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
5425 * Alternative to Balanced Trees", modified in three ways:
5426 * a) this implementation allows for repeated values.
5427 * b) the comparison is not just by key (our 'score') but by satellite data.
5428 * c) there is a back pointer, so it's a doubly linked list with the back
5429 * pointers being only at "level 1". This allows to traverse the list
5430 * from tail to head, useful for ZREVRANGE. */
5432 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
5433 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
5435 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
5437 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
5445 static zskiplist
*zslCreate(void) {
5449 zsl
= zmalloc(sizeof(*zsl
));
5452 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
5453 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
5454 zsl
->header
->forward
[j
] = NULL
;
5456 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
5457 if (j
< ZSKIPLIST_MAXLEVEL
-1)
5458 zsl
->header
->span
[j
] = 0;
5460 zsl
->header
->backward
= NULL
;
5465 static void zslFreeNode(zskiplistNode
*node
) {
5466 decrRefCount(node
->obj
);
5467 zfree(node
->forward
);
5472 static void zslFree(zskiplist
*zsl
) {
5473 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
5475 zfree(zsl
->header
->forward
);
5476 zfree(zsl
->header
->span
);
5479 next
= node
->forward
[0];
5486 static int zslRandomLevel(void) {
5488 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
5490 return (level
<ZSKIPLIST_MAXLEVEL
) ? level
: ZSKIPLIST_MAXLEVEL
;
5493 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
5494 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5495 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
5499 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5500 /* store rank that is crossed to reach the insert position */
5501 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
5503 while (x
->forward
[i
] &&
5504 (x
->forward
[i
]->score
< score
||
5505 (x
->forward
[i
]->score
== score
&&
5506 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
5507 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
5512 /* we assume the key is not already inside, since we allow duplicated
5513 * scores, and the re-insertion of score and redis object should never
5514 * happpen since the caller of zslInsert() should test in the hash table
5515 * if the element is already inside or not. */
5516 level
= zslRandomLevel();
5517 if (level
> zsl
->level
) {
5518 for (i
= zsl
->level
; i
< level
; i
++) {
5520 update
[i
] = zsl
->header
;
5521 update
[i
]->span
[i
-1] = zsl
->length
;
5525 x
= zslCreateNode(level
,score
,obj
);
5526 for (i
= 0; i
< level
; i
++) {
5527 x
->forward
[i
] = update
[i
]->forward
[i
];
5528 update
[i
]->forward
[i
] = x
;
5530 /* update span covered by update[i] as x is inserted here */
5532 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5533 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5537 /* increment span for untouched levels */
5538 for (i
= level
; i
< zsl
->level
; i
++) {
5539 update
[i
]->span
[i
-1]++;
5542 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
5544 x
->forward
[0]->backward
= x
;
5550 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
5551 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
5553 for (i
= 0; i
< zsl
->level
; i
++) {
5554 if (update
[i
]->forward
[i
] == x
) {
5556 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5558 update
[i
]->forward
[i
] = x
->forward
[i
];
5560 /* invariant: i > 0, because update[0]->forward[0]
5561 * is always equal to x */
5562 update
[i
]->span
[i
-1] -= 1;
5565 if (x
->forward
[0]) {
5566 x
->forward
[0]->backward
= x
->backward
;
5568 zsl
->tail
= x
->backward
;
5570 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5575 /* Delete an element with matching score/object from the skiplist. */
5576 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
5577 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5581 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5582 while (x
->forward
[i
] &&
5583 (x
->forward
[i
]->score
< score
||
5584 (x
->forward
[i
]->score
== score
&&
5585 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
5589 /* We may have multiple elements with the same score, what we need
5590 * is to find the element with both the right score and object. */
5592 if (x
&& score
== x
->score
&& equalStringObjects(x
->obj
,obj
)) {
5593 zslDeleteNode(zsl
, x
, update
);
5597 return 0; /* not found */
5599 return 0; /* not found */
5602 /* Delete all the elements with score between min and max from the skiplist.
5603 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5604 * Note that this function takes the reference to the hash table view of the
5605 * sorted set, in order to remove the elements from the hash table too. */
5606 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
5607 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5608 unsigned long removed
= 0;
5612 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5613 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
5617 /* We may have multiple elements with the same score, what we need
5618 * is to find the element with both the right score and object. */
5620 while (x
&& x
->score
<= max
) {
5621 zskiplistNode
*next
= x
->forward
[0];
5622 zslDeleteNode(zsl
, x
, update
);
5623 dictDelete(dict
,x
->obj
);
5628 return removed
; /* not found */
5631 /* Delete all the elements with rank between start and end from the skiplist.
5632 * Start and end are inclusive. Note that start and end need to be 1-based */
5633 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
5634 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5635 unsigned long traversed
= 0, removed
= 0;
5639 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5640 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
5641 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5649 while (x
&& traversed
<= end
) {
5650 zskiplistNode
*next
= x
->forward
[0];
5651 zslDeleteNode(zsl
, x
, update
);
5652 dictDelete(dict
,x
->obj
);
5661 /* Find the first node having a score equal or greater than the specified one.
5662 * Returns NULL if there is no match. */
5663 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
5668 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5669 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
5672 /* We may have multiple elements with the same score, what we need
5673 * is to find the element with both the right score and object. */
5674 return x
->forward
[0];
5677 /* Find the rank for an element by both score and key.
5678 * Returns 0 when the element cannot be found, rank otherwise.
5679 * Note that the rank is 1-based due to the span of zsl->header to the
5681 static unsigned long zslGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
5683 unsigned long rank
= 0;
5687 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5688 while (x
->forward
[i
] &&
5689 (x
->forward
[i
]->score
< score
||
5690 (x
->forward
[i
]->score
== score
&&
5691 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
5692 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
5696 /* x might be equal to zsl->header, so test if obj is non-NULL */
5697 if (x
->obj
&& equalStringObjects(x
->obj
,o
)) {
5704 /* Finds an element by its rank. The rank argument needs to be 1-based. */
5705 zskiplistNode
* zslGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
5707 unsigned long traversed
= 0;
5711 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5712 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
5714 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5717 if (traversed
== rank
) {
5724 /* The actual Z-commands implementations */
5726 /* This generic command implements both ZADD and ZINCRBY.
5727 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
5728 * the increment if the operation is a ZINCRBY (doincrement == 1). */
5729 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
5734 zsetobj
= lookupKeyWrite(c
->db
,key
);
5735 if (zsetobj
== NULL
) {
5736 zsetobj
= createZsetObject();
5737 dictAdd(c
->db
->dict
,key
,zsetobj
);
5740 if (zsetobj
->type
!= REDIS_ZSET
) {
5741 addReply(c
,shared
.wrongtypeerr
);
5747 /* Ok now since we implement both ZADD and ZINCRBY here the code
5748 * needs to handle the two different conditions. It's all about setting
5749 * '*score', that is, the new score to set, to the right value. */
5750 score
= zmalloc(sizeof(double));
5754 /* Read the old score. If the element was not present starts from 0 */
5755 de
= dictFind(zs
->dict
,ele
);
5757 double *oldscore
= dictGetEntryVal(de
);
5758 *score
= *oldscore
+ scoreval
;
5766 /* What follows is a simple remove and re-insert operation that is common
5767 * to both ZADD and ZINCRBY... */
5768 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
5769 /* case 1: New element */
5770 incrRefCount(ele
); /* added to hash */
5771 zslInsert(zs
->zsl
,*score
,ele
);
5772 incrRefCount(ele
); /* added to skiplist */
5775 addReplyDouble(c
,*score
);
5777 addReply(c
,shared
.cone
);
5782 /* case 2: Score update operation */
5783 de
= dictFind(zs
->dict
,ele
);
5784 redisAssert(de
!= NULL
);
5785 oldscore
= dictGetEntryVal(de
);
5786 if (*score
!= *oldscore
) {
5789 /* Remove and insert the element in the skip list with new score */
5790 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
5791 redisAssert(deleted
!= 0);
5792 zslInsert(zs
->zsl
,*score
,ele
);
5794 /* Update the score in the hash table */
5795 dictReplace(zs
->dict
,ele
,score
);
5801 addReplyDouble(c
,*score
);
5803 addReply(c
,shared
.czero
);
5807 static void zaddCommand(redisClient
*c
) {
5810 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
5811 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
5814 static void zincrbyCommand(redisClient
*c
) {
5817 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
5818 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
5821 static void zremCommand(redisClient
*c
) {
5828 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5829 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5832 de
= dictFind(zs
->dict
,c
->argv
[2]);
5834 addReply(c
,shared
.czero
);
5837 /* Delete from the skiplist */
5838 oldscore
= dictGetEntryVal(de
);
5839 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
5840 redisAssert(deleted
!= 0);
5842 /* Delete from the hash table */
5843 dictDelete(zs
->dict
,c
->argv
[2]);
5844 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5845 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5847 addReply(c
,shared
.cone
);
5850 static void zremrangebyscoreCommand(redisClient
*c
) {
5857 if ((getDoubleFromObjectOrReply(c
, c
->argv
[2], &min
, NULL
) != REDIS_OK
) ||
5858 (getDoubleFromObjectOrReply(c
, c
->argv
[3], &max
, NULL
) != REDIS_OK
)) return;
5860 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5861 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5864 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
5865 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5866 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5867 server
.dirty
+= deleted
;
5868 addReplyLongLong(c
,deleted
);
5871 static void zremrangebyrankCommand(redisClient
*c
) {
5879 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
5880 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
5882 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5883 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5885 llen
= zs
->zsl
->length
;
5887 /* convert negative indexes */
5888 if (start
< 0) start
= llen
+start
;
5889 if (end
< 0) end
= llen
+end
;
5890 if (start
< 0) start
= 0;
5891 if (end
< 0) end
= 0;
5893 /* indexes sanity checks */
5894 if (start
> end
|| start
>= llen
) {
5895 addReply(c
,shared
.czero
);
5898 if (end
>= llen
) end
= llen
-1;
5900 /* increment start and end because zsl*Rank functions
5901 * use 1-based rank */
5902 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
5903 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5904 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5905 server
.dirty
+= deleted
;
5906 addReplyLongLong(c
, deleted
);
5914 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
5915 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
5916 unsigned long size1
, size2
;
5917 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
5918 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
5919 return size1
- size2
;
5922 #define REDIS_AGGR_SUM 1
5923 #define REDIS_AGGR_MIN 2
5924 #define REDIS_AGGR_MAX 3
5926 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
5927 if (aggregate
== REDIS_AGGR_SUM
) {
5928 *target
= *target
+ val
;
5929 } else if (aggregate
== REDIS_AGGR_MIN
) {
5930 *target
= val
< *target
? val
: *target
;
5931 } else if (aggregate
== REDIS_AGGR_MAX
) {
5932 *target
= val
> *target
? val
: *target
;
5935 redisPanic("Unknown ZUNION/INTER aggregate type");
5939 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
5941 int aggregate
= REDIS_AGGR_SUM
;
5948 /* expect zsetnum input keys to be given */
5949 zsetnum
= atoi(c
->argv
[2]->ptr
);
5951 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNIONSTORE/ZINTERSTORE\r\n"));
5955 /* test if the expected number of keys would overflow */
5956 if (3+zsetnum
> c
->argc
) {
5957 addReply(c
,shared
.syntaxerr
);
5961 /* read keys to be used for input */
5962 src
= zmalloc(sizeof(zsetopsrc
) * zsetnum
);
5963 for (i
= 0, j
= 3; i
< zsetnum
; i
++, j
++) {
5964 robj
*zsetobj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
5968 if (zsetobj
->type
!= REDIS_ZSET
) {
5970 addReply(c
,shared
.wrongtypeerr
);
5973 src
[i
].dict
= ((zset
*)zsetobj
->ptr
)->dict
;
5976 /* default all weights to 1 */
5977 src
[i
].weight
= 1.0;
5980 /* parse optional extra arguments */
5982 int remaining
= c
->argc
- j
;
5985 if (remaining
>= (zsetnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
5987 for (i
= 0; i
< zsetnum
; i
++, j
++, remaining
--) {
5988 if (getDoubleFromObjectOrReply(c
, c
->argv
[j
], &src
[i
].weight
, NULL
) != REDIS_OK
)
5991 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
5993 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
5994 aggregate
= REDIS_AGGR_SUM
;
5995 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
5996 aggregate
= REDIS_AGGR_MIN
;
5997 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
5998 aggregate
= REDIS_AGGR_MAX
;
6001 addReply(c
,shared
.syntaxerr
);
6007 addReply(c
,shared
.syntaxerr
);
6013 /* sort sets from the smallest to largest, this will improve our
6014 * algorithm's performance */
6015 qsort(src
,zsetnum
,sizeof(zsetopsrc
), qsortCompareZsetopsrcByCardinality
);
6017 dstobj
= createZsetObject();
6018 dstzset
= dstobj
->ptr
;
6020 if (op
== REDIS_OP_INTER
) {
6021 /* skip going over all entries if the smallest zset is NULL or empty */
6022 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
6023 /* precondition: as src[0].dict is non-empty and the zsets are ordered
6024 * from small to large, all src[i > 0].dict are non-empty too */
6025 di
= dictGetIterator(src
[0].dict
);
6026 while((de
= dictNext(di
)) != NULL
) {
6027 double *score
= zmalloc(sizeof(double)), value
;
6028 *score
= src
[0].weight
* (*(double*)dictGetEntryVal(de
));
6030 for (j
= 1; j
< zsetnum
; j
++) {
6031 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
6033 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
6034 zunionInterAggregate(score
, value
, aggregate
);
6040 /* skip entry when not present in every source dict */
6044 robj
*o
= dictGetEntryKey(de
);
6045 dictAdd(dstzset
->dict
,o
,score
);
6046 incrRefCount(o
); /* added to dictionary */
6047 zslInsert(dstzset
->zsl
,*score
,o
);
6048 incrRefCount(o
); /* added to skiplist */
6051 dictReleaseIterator(di
);
6053 } else if (op
== REDIS_OP_UNION
) {
6054 for (i
= 0; i
< zsetnum
; i
++) {
6055 if (!src
[i
].dict
) continue;
6057 di
= dictGetIterator(src
[i
].dict
);
6058 while((de
= dictNext(di
)) != NULL
) {
6059 /* skip key when already processed */
6060 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
6062 double *score
= zmalloc(sizeof(double)), value
;
6063 *score
= src
[i
].weight
* (*(double*)dictGetEntryVal(de
));
6065 /* because the zsets are sorted by size, its only possible
6066 * for sets at larger indices to hold this entry */
6067 for (j
= (i
+1); j
< zsetnum
; j
++) {
6068 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
6070 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
6071 zunionInterAggregate(score
, value
, aggregate
);
6075 robj
*o
= dictGetEntryKey(de
);
6076 dictAdd(dstzset
->dict
,o
,score
);
6077 incrRefCount(o
); /* added to dictionary */
6078 zslInsert(dstzset
->zsl
,*score
,o
);
6079 incrRefCount(o
); /* added to skiplist */
6081 dictReleaseIterator(di
);
6084 /* unknown operator */
6085 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
6088 deleteKey(c
->db
,dstkey
);
6089 if (dstzset
->zsl
->length
) {
6090 dictAdd(c
->db
->dict
,dstkey
,dstobj
);
6091 incrRefCount(dstkey
);
6092 addReplyLongLong(c
, dstzset
->zsl
->length
);
6095 decrRefCount(dstobj
);
6096 addReply(c
, shared
.czero
);
6101 static void zunionstoreCommand(redisClient
*c
) {
6102 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
6105 static void zinterstoreCommand(redisClient
*c
) {
6106 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
6109 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
6121 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
6122 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
6124 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
6126 } else if (c
->argc
>= 5) {
6127 addReply(c
,shared
.syntaxerr
);
6131 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6132 || checkType(c
,o
,REDIS_ZSET
)) return;
6137 /* convert negative indexes */
6138 if (start
< 0) start
= llen
+start
;
6139 if (end
< 0) end
= llen
+end
;
6140 if (start
< 0) start
= 0;
6141 if (end
< 0) end
= 0;
6143 /* indexes sanity checks */
6144 if (start
> end
|| start
>= llen
) {
6145 /* Out of range start or start > end result in empty list */
6146 addReply(c
,shared
.emptymultibulk
);
6149 if (end
>= llen
) end
= llen
-1;
6150 rangelen
= (end
-start
)+1;
6152 /* check if starting point is trivial, before searching
6153 * the element in log(N) time */
6155 ln
= start
== 0 ? zsl
->tail
: zslGetElementByRank(zsl
, llen
-start
);
6158 zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1);
6161 /* Return the result in form of a multi-bulk reply */
6162 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
6163 withscores
? (rangelen
*2) : rangelen
));
6164 for (j
= 0; j
< rangelen
; j
++) {
6166 addReplyBulk(c
,ele
);
6168 addReplyDouble(c
,ln
->score
);
6169 ln
= reverse
? ln
->backward
: ln
->forward
[0];
6173 static void zrangeCommand(redisClient
*c
) {
6174 zrangeGenericCommand(c
,0);
6177 static void zrevrangeCommand(redisClient
*c
) {
6178 zrangeGenericCommand(c
,1);
6181 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
6182 * If justcount is non-zero, just the count is returned. */
6183 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
6186 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
6187 int offset
= 0, limit
= -1;
6191 /* Parse the min-max interval. If one of the values is prefixed
6192 * by the "(" character, it's considered "open". For instance
6193 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
6194 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
6195 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
6196 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
6199 min
= strtod(c
->argv
[2]->ptr
,NULL
);
6201 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
6202 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
6205 max
= strtod(c
->argv
[3]->ptr
,NULL
);
6208 /* Parse "WITHSCORES": note that if the command was called with
6209 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
6210 * enter the following paths to parse WITHSCORES and LIMIT. */
6211 if (c
->argc
== 5 || c
->argc
== 8) {
6212 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
6217 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
6221 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
6226 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
6227 addReply(c
,shared
.syntaxerr
);
6229 } else if (c
->argc
== (7 + withscores
)) {
6230 offset
= atoi(c
->argv
[5]->ptr
);
6231 limit
= atoi(c
->argv
[6]->ptr
);
6232 if (offset
< 0) offset
= 0;
6235 /* Ok, lookup the key and get the range */
6236 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
6238 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6240 if (o
->type
!= REDIS_ZSET
) {
6241 addReply(c
,shared
.wrongtypeerr
);
6243 zset
*zsetobj
= o
->ptr
;
6244 zskiplist
*zsl
= zsetobj
->zsl
;
6246 robj
*ele
, *lenobj
= NULL
;
6247 unsigned long rangelen
= 0;
6249 /* Get the first node with the score >= min, or with
6250 * score > min if 'minex' is true. */
6251 ln
= zslFirstWithScore(zsl
,min
);
6252 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
6255 /* No element matching the speciifed interval */
6256 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6260 /* We don't know in advance how many matching elements there
6261 * are in the list, so we push this object that will represent
6262 * the multi-bulk length in the output buffer, and will "fix"
6265 lenobj
= createObject(REDIS_STRING
,NULL
);
6267 decrRefCount(lenobj
);
6270 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
6273 ln
= ln
->forward
[0];
6276 if (limit
== 0) break;
6279 addReplyBulk(c
,ele
);
6281 addReplyDouble(c
,ln
->score
);
6283 ln
= ln
->forward
[0];
6285 if (limit
> 0) limit
--;
6288 addReplyLongLong(c
,(long)rangelen
);
6290 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
6291 withscores
? (rangelen
*2) : rangelen
);
6297 static void zrangebyscoreCommand(redisClient
*c
) {
6298 genericZrangebyscoreCommand(c
,0);
6301 static void zcountCommand(redisClient
*c
) {
6302 genericZrangebyscoreCommand(c
,1);
6305 static void zcardCommand(redisClient
*c
) {
6309 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6310 checkType(c
,o
,REDIS_ZSET
)) return;
6313 addReplyUlong(c
,zs
->zsl
->length
);
6316 static void zscoreCommand(redisClient
*c
) {
6321 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6322 checkType(c
,o
,REDIS_ZSET
)) return;
6325 de
= dictFind(zs
->dict
,c
->argv
[2]);
6327 addReply(c
,shared
.nullbulk
);
6329 double *score
= dictGetEntryVal(de
);
6331 addReplyDouble(c
,*score
);
6335 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
6343 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6344 checkType(c
,o
,REDIS_ZSET
)) return;
6348 de
= dictFind(zs
->dict
,c
->argv
[2]);
6350 addReply(c
,shared
.nullbulk
);
6354 score
= dictGetEntryVal(de
);
6355 rank
= zslGetRank(zsl
, *score
, c
->argv
[2]);
6358 addReplyLongLong(c
, zsl
->length
- rank
);
6360 addReplyLongLong(c
, rank
-1);
6363 addReply(c
,shared
.nullbulk
);
6367 static void zrankCommand(redisClient
*c
) {
6368 zrankGenericCommand(c
, 0);
6371 static void zrevrankCommand(redisClient
*c
) {
6372 zrankGenericCommand(c
, 1);
6375 /* ========================= Hashes utility functions ======================= */
6376 #define REDIS_HASH_KEY 1
6377 #define REDIS_HASH_VALUE 2
6379 /* Check the length of a number of objects to see if we need to convert a
6380 * zipmap to a real hash. Note that we only check string encoded objects
6381 * as their string length can be queried in constant time. */
6382 static void hashTryConversion(robj
*subject
, robj
**argv
, int start
, int end
) {
6384 if (subject
->encoding
!= REDIS_ENCODING_ZIPMAP
) return;
6386 for (i
= start
; i
<= end
; i
++) {
6387 if (argv
[i
]->encoding
== REDIS_ENCODING_RAW
&&
6388 sdslen(argv
[i
]->ptr
) > server
.hash_max_zipmap_value
)
6390 convertToRealHash(subject
);
6396 /* Encode given objects in-place when the hash uses a dict. */
6397 static void hashTryObjectEncoding(robj
*subject
, robj
**o1
, robj
**o2
) {
6398 if (subject
->encoding
== REDIS_ENCODING_HT
) {
6399 if (o1
) *o1
= tryObjectEncoding(*o1
);
6400 if (o2
) *o2
= tryObjectEncoding(*o2
);
6404 /* Get the value from a hash identified by key. Returns either a string
6405 * object or NULL if the value cannot be found. The refcount of the object
6406 * is always increased by 1 when the value was found. */
6407 static robj
*hashGet(robj
*o
, robj
*key
) {
6409 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6412 key
= getDecodedObject(key
);
6413 if (zipmapGet(o
->ptr
,key
->ptr
,sdslen(key
->ptr
),&v
,&vlen
)) {
6414 value
= createStringObject((char*)v
,vlen
);
6418 dictEntry
*de
= dictFind(o
->ptr
,key
);
6420 value
= dictGetEntryVal(de
);
6421 incrRefCount(value
);
6427 /* Test if the key exists in the given hash. Returns 1 if the key
6428 * exists and 0 when it doesn't. */
6429 static int hashExists(robj
*o
, robj
*key
) {
6430 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6431 key
= getDecodedObject(key
);
6432 if (zipmapExists(o
->ptr
,key
->ptr
,sdslen(key
->ptr
))) {
6438 if (dictFind(o
->ptr
,key
) != NULL
) {
6445 /* Add an element, discard the old if the key already exists.
6446 * Return 0 on insert and 1 on update. */
6447 static int hashSet(robj
*o
, robj
*key
, robj
*value
) {
6449 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6450 key
= getDecodedObject(key
);
6451 value
= getDecodedObject(value
);
6452 o
->ptr
= zipmapSet(o
->ptr
,
6453 key
->ptr
,sdslen(key
->ptr
),
6454 value
->ptr
,sdslen(value
->ptr
), &update
);
6456 decrRefCount(value
);
6458 /* Check if the zipmap needs to be upgraded to a real hash table */
6459 if (zipmapLen(o
->ptr
) > server
.hash_max_zipmap_entries
)
6460 convertToRealHash(o
);
6462 if (dictReplace(o
->ptr
,key
,value
)) {
6469 incrRefCount(value
);
6474 /* Delete an element from a hash.
6475 * Return 1 on deleted and 0 on not found. */
6476 static int hashDelete(robj
*o
, robj
*key
) {
6478 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6479 key
= getDecodedObject(key
);
6480 o
->ptr
= zipmapDel(o
->ptr
,key
->ptr
,sdslen(key
->ptr
), &deleted
);
6483 deleted
= dictDelete((dict
*)o
->ptr
,key
) == DICT_OK
;
6484 /* Always check if the dictionary needs a resize after a delete. */
6485 if (deleted
&& htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
6490 /* Return the number of elements in a hash. */
6491 static unsigned long hashLength(robj
*o
) {
6492 return (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
6493 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
6496 /* Structure to hold hash iteration abstration. Note that iteration over
6497 * hashes involves both fields and values. Because it is possible that
6498 * not both are required, store pointers in the iterator to avoid
6499 * unnecessary memory allocation for fields/values. */
6503 unsigned char *zk
, *zv
;
6504 unsigned int zklen
, zvlen
;
6510 static hashIterator
*hashInitIterator(robj
*subject
) {
6511 hashIterator
*hi
= zmalloc(sizeof(hashIterator
));
6512 hi
->encoding
= subject
->encoding
;
6513 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6514 hi
->zi
= zipmapRewind(subject
->ptr
);
6515 } else if (hi
->encoding
== REDIS_ENCODING_HT
) {
6516 hi
->di
= dictGetIterator(subject
->ptr
);
6523 static void hashReleaseIterator(hashIterator
*hi
) {
6524 if (hi
->encoding
== REDIS_ENCODING_HT
) {
6525 dictReleaseIterator(hi
->di
);
6530 /* Move to the next entry in the hash. Return REDIS_OK when the next entry
6531 * could be found and REDIS_ERR when the iterator reaches the end. */
6532 static int hashNext(hashIterator
*hi
) {
6533 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6534 if ((hi
->zi
= zipmapNext(hi
->zi
, &hi
->zk
, &hi
->zklen
,
6535 &hi
->zv
, &hi
->zvlen
)) == NULL
) return REDIS_ERR
;
6537 if ((hi
->de
= dictNext(hi
->di
)) == NULL
) return REDIS_ERR
;
6542 /* Get key or value object at current iteration position.
6543 * This increases the refcount of the field object by 1. */
6544 static robj
*hashCurrent(hashIterator
*hi
, int what
) {
6546 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6547 if (what
& REDIS_HASH_KEY
) {
6548 o
= createStringObject((char*)hi
->zk
,hi
->zklen
);
6550 o
= createStringObject((char*)hi
->zv
,hi
->zvlen
);
6553 if (what
& REDIS_HASH_KEY
) {
6554 o
= dictGetEntryKey(hi
->de
);
6556 o
= dictGetEntryVal(hi
->de
);
6563 static robj
*hashLookupWriteOrCreate(redisClient
*c
, robj
*key
) {
6564 robj
*o
= lookupKeyWrite(c
->db
,key
);
6566 o
= createHashObject();
6567 dictAdd(c
->db
->dict
,key
,o
);
6570 if (o
->type
!= REDIS_HASH
) {
6571 addReply(c
,shared
.wrongtypeerr
);
6578 /* ============================= Hash commands ============================== */
6579 static void hsetCommand(redisClient
*c
) {
6583 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6584 hashTryConversion(o
,c
->argv
,2,3);
6585 hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
6586 update
= hashSet(o
,c
->argv
[2],c
->argv
[3]);
6587 addReply(c
, update
? shared
.czero
: shared
.cone
);
6591 static void hsetnxCommand(redisClient
*c
) {
6593 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6594 hashTryConversion(o
,c
->argv
,2,3);
6596 if (hashExists(o
, c
->argv
[2])) {
6597 addReply(c
, shared
.czero
);
6599 hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
6600 hashSet(o
,c
->argv
[2],c
->argv
[3]);
6601 addReply(c
, shared
.cone
);
6606 static void hmsetCommand(redisClient
*c
) {
6610 if ((c
->argc
% 2) == 1) {
6611 addReplySds(c
,sdsnew("-ERR wrong number of arguments for HMSET\r\n"));
6615 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6616 hashTryConversion(o
,c
->argv
,2,c
->argc
-1);
6617 for (i
= 2; i
< c
->argc
; i
+= 2) {
6618 hashTryObjectEncoding(o
,&c
->argv
[i
], &c
->argv
[i
+1]);
6619 hashSet(o
,c
->argv
[i
],c
->argv
[i
+1]);
6621 addReply(c
, shared
.ok
);
6625 static void hincrbyCommand(redisClient
*c
) {
6626 long long value
, incr
;
6627 robj
*o
, *current
, *new;
6629 if (getLongLongFromObjectOrReply(c
,c
->argv
[3],&incr
,NULL
) != REDIS_OK
) return;
6630 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6631 if ((current
= hashGet(o
,c
->argv
[2])) != NULL
) {
6632 if (getLongLongFromObjectOrReply(c
,current
,&value
,
6633 "hash value is not an integer") != REDIS_OK
) {
6634 decrRefCount(current
);
6637 decrRefCount(current
);
6643 new = createStringObjectFromLongLong(value
);
6644 hashTryObjectEncoding(o
,&c
->argv
[2],NULL
);
6645 hashSet(o
,c
->argv
[2],new);
6647 addReplyLongLong(c
,value
);
6651 static void hgetCommand(redisClient
*c
) {
6653 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6654 checkType(c
,o
,REDIS_HASH
)) return;
6656 if ((value
= hashGet(o
,c
->argv
[2])) != NULL
) {
6657 addReplyBulk(c
,value
);
6658 decrRefCount(value
);
6660 addReply(c
,shared
.nullbulk
);
6664 static void hmgetCommand(redisClient
*c
) {
6667 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
6668 if (o
!= NULL
&& o
->type
!= REDIS_HASH
) {
6669 addReply(c
,shared
.wrongtypeerr
);
6672 /* Note the check for o != NULL happens inside the loop. This is
6673 * done because objects that cannot be found are considered to be
6674 * an empty hash. The reply should then be a series of NULLs. */
6675 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-2));
6676 for (i
= 2; i
< c
->argc
; i
++) {
6677 if (o
!= NULL
&& (value
= hashGet(o
,c
->argv
[i
])) != NULL
) {
6678 addReplyBulk(c
,value
);
6679 decrRefCount(value
);
6681 addReply(c
,shared
.nullbulk
);
6686 static void hdelCommand(redisClient
*c
) {
6688 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6689 checkType(c
,o
,REDIS_HASH
)) return;
6691 if (hashDelete(o
,c
->argv
[2])) {
6692 if (hashLength(o
) == 0) deleteKey(c
->db
,c
->argv
[1]);
6693 addReply(c
,shared
.cone
);
6696 addReply(c
,shared
.czero
);
6700 static void hlenCommand(redisClient
*c
) {
6702 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6703 checkType(c
,o
,REDIS_HASH
)) return;
6705 addReplyUlong(c
,hashLength(o
));
6708 static void genericHgetallCommand(redisClient
*c
, int flags
) {
6709 robj
*o
, *lenobj
, *obj
;
6710 unsigned long count
= 0;
6713 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6714 || checkType(c
,o
,REDIS_HASH
)) return;
6716 lenobj
= createObject(REDIS_STRING
,NULL
);
6718 decrRefCount(lenobj
);
6720 hi
= hashInitIterator(o
);
6721 while (hashNext(hi
) != REDIS_ERR
) {
6722 if (flags
& REDIS_HASH_KEY
) {
6723 obj
= hashCurrent(hi
,REDIS_HASH_KEY
);
6724 addReplyBulk(c
,obj
);
6728 if (flags
& REDIS_HASH_VALUE
) {
6729 obj
= hashCurrent(hi
,REDIS_HASH_VALUE
);
6730 addReplyBulk(c
,obj
);
6735 hashReleaseIterator(hi
);
6737 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
6740 static void hkeysCommand(redisClient
*c
) {
6741 genericHgetallCommand(c
,REDIS_HASH_KEY
);
6744 static void hvalsCommand(redisClient
*c
) {
6745 genericHgetallCommand(c
,REDIS_HASH_VALUE
);
6748 static void hgetallCommand(redisClient
*c
) {
6749 genericHgetallCommand(c
,REDIS_HASH_KEY
|REDIS_HASH_VALUE
);
6752 static void hexistsCommand(redisClient
*c
) {
6754 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6755 checkType(c
,o
,REDIS_HASH
)) return;
6757 addReply(c
, hashExists(o
,c
->argv
[2]) ? shared
.cone
: shared
.czero
);
6760 static void convertToRealHash(robj
*o
) {
6761 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
6762 unsigned int klen
, vlen
;
6763 dict
*dict
= dictCreate(&hashDictType
,NULL
);
6765 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
6766 p
= zipmapRewind(zm
);
6767 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
6768 robj
*keyobj
, *valobj
;
6770 keyobj
= createStringObject((char*)key
,klen
);
6771 valobj
= createStringObject((char*)val
,vlen
);
6772 keyobj
= tryObjectEncoding(keyobj
);
6773 valobj
= tryObjectEncoding(valobj
);
6774 dictAdd(dict
,keyobj
,valobj
);
6776 o
->encoding
= REDIS_ENCODING_HT
;
6781 /* ========================= Non type-specific commands ==================== */
6783 static void flushdbCommand(redisClient
*c
) {
6784 server
.dirty
+= dictSize(c
->db
->dict
);
6785 touchWatchedKeysOnFlush(c
->db
->id
);
6786 dictEmpty(c
->db
->dict
);
6787 dictEmpty(c
->db
->expires
);
6788 addReply(c
,shared
.ok
);
6791 static void flushallCommand(redisClient
*c
) {
6792 touchWatchedKeysOnFlush(-1);
6793 server
.dirty
+= emptyDb();
6794 addReply(c
,shared
.ok
);
6795 if (server
.bgsavechildpid
!= -1) {
6796 kill(server
.bgsavechildpid
,SIGKILL
);
6797 rdbRemoveTempFile(server
.bgsavechildpid
);
6799 rdbSave(server
.dbfilename
);
6803 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
6804 redisSortOperation
*so
= zmalloc(sizeof(*so
));
6806 so
->pattern
= pattern
;
6810 /* Return the value associated to the key with a name obtained
6811 * substituting the first occurence of '*' in 'pattern' with 'subst'.
6812 * The returned object will always have its refcount increased by 1
6813 * when it is non-NULL. */
6814 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
6817 robj keyobj
, fieldobj
, *o
;
6818 int prefixlen
, sublen
, postfixlen
, fieldlen
;
6819 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
6823 char buf
[REDIS_SORTKEY_MAX
+1];
6824 } keyname
, fieldname
;
6826 /* If the pattern is "#" return the substitution object itself in order
6827 * to implement the "SORT ... GET #" feature. */
6828 spat
= pattern
->ptr
;
6829 if (spat
[0] == '#' && spat
[1] == '\0') {
6830 incrRefCount(subst
);
6834 /* The substitution object may be specially encoded. If so we create
6835 * a decoded object on the fly. Otherwise getDecodedObject will just
6836 * increment the ref count, that we'll decrement later. */
6837 subst
= getDecodedObject(subst
);
6840 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
6841 p
= strchr(spat
,'*');
6843 decrRefCount(subst
);
6847 /* Find out if we're dealing with a hash dereference. */
6848 if ((f
= strstr(p
+1, "->")) != NULL
) {
6849 fieldlen
= sdslen(spat
)-(f
-spat
);
6850 /* this also copies \0 character */
6851 memcpy(fieldname
.buf
,f
+2,fieldlen
-1);
6852 fieldname
.len
= fieldlen
-2;
6858 sublen
= sdslen(ssub
);
6859 postfixlen
= sdslen(spat
)-(prefixlen
+1)-fieldlen
;
6860 memcpy(keyname
.buf
,spat
,prefixlen
);
6861 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
6862 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
6863 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
6864 keyname
.len
= prefixlen
+sublen
+postfixlen
;
6865 decrRefCount(subst
);
6867 /* Lookup substituted key */
6868 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2));
6869 o
= lookupKeyRead(db
,&keyobj
);
6870 if (o
== NULL
) return NULL
;
6873 if (o
->type
!= REDIS_HASH
|| fieldname
.len
< 1) return NULL
;
6875 /* Retrieve value from hash by the field name. This operation
6876 * already increases the refcount of the returned object. */
6877 initStaticStringObject(fieldobj
,((char*)&fieldname
)+(sizeof(long)*2));
6878 o
= hashGet(o
, &fieldobj
);
6880 if (o
->type
!= REDIS_STRING
) return NULL
;
6882 /* Every object that this function returns needs to have its refcount
6883 * increased. sortCommand decreases it again. */
6890 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
6891 * the additional parameter is not standard but a BSD-specific we have to
6892 * pass sorting parameters via the global 'server' structure */
6893 static int sortCompare(const void *s1
, const void *s2
) {
6894 const redisSortObject
*so1
= s1
, *so2
= s2
;
6897 if (!server
.sort_alpha
) {
6898 /* Numeric sorting. Here it's trivial as we precomputed scores */
6899 if (so1
->u
.score
> so2
->u
.score
) {
6901 } else if (so1
->u
.score
< so2
->u
.score
) {
6907 /* Alphanumeric sorting */
6908 if (server
.sort_bypattern
) {
6909 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
6910 /* At least one compare object is NULL */
6911 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
6913 else if (so1
->u
.cmpobj
== NULL
)
6918 /* We have both the objects, use strcoll */
6919 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
6922 /* Compare elements directly. */
6923 cmp
= compareStringObjects(so1
->obj
,so2
->obj
);
6926 return server
.sort_desc
? -cmp
: cmp
;
6929 /* The SORT command is the most complex command in Redis. Warning: this code
6930 * is optimized for speed and a bit less for readability */
6931 static void sortCommand(redisClient
*c
) {
6934 int desc
= 0, alpha
= 0;
6935 int limit_start
= 0, limit_count
= -1, start
, end
;
6936 int j
, dontsort
= 0, vectorlen
;
6937 int getop
= 0; /* GET operation counter */
6938 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
6939 redisSortObject
*vector
; /* Resulting vector to sort */
6941 /* Lookup the key to sort. It must be of the right types */
6942 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
6943 if (sortval
== NULL
) {
6944 addReply(c
,shared
.emptymultibulk
);
6947 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
6948 sortval
->type
!= REDIS_ZSET
)
6950 addReply(c
,shared
.wrongtypeerr
);
6954 /* Create a list of operations to perform for every sorted element.
6955 * Operations can be GET/DEL/INCR/DECR */
6956 operations
= listCreate();
6957 listSetFreeMethod(operations
,zfree
);
6960 /* Now we need to protect sortval incrementing its count, in the future
6961 * SORT may have options able to overwrite/delete keys during the sorting
6962 * and the sorted key itself may get destroied */
6963 incrRefCount(sortval
);
6965 /* The SORT command has an SQL-alike syntax, parse it */
6966 while(j
< c
->argc
) {
6967 int leftargs
= c
->argc
-j
-1;
6968 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
6970 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
6972 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
6974 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
6975 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
6976 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
6978 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
6979 storekey
= c
->argv
[j
+1];
6981 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
6982 sortby
= c
->argv
[j
+1];
6983 /* If the BY pattern does not contain '*', i.e. it is constant,
6984 * we don't need to sort nor to lookup the weight keys. */
6985 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
6987 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
6988 listAddNodeTail(operations
,createSortOperation(
6989 REDIS_SORT_GET
,c
->argv
[j
+1]));
6993 decrRefCount(sortval
);
6994 listRelease(operations
);
6995 addReply(c
,shared
.syntaxerr
);
7001 /* Load the sorting vector with all the objects to sort */
7002 switch(sortval
->type
) {
7003 case REDIS_LIST
: vectorlen
= listLength((list
*)sortval
->ptr
); break;
7004 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
7005 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
7006 default: vectorlen
= 0; redisPanic("Bad SORT type"); /* Avoid GCC warning */
7008 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
7011 if (sortval
->type
== REDIS_LIST
) {
7012 list
*list
= sortval
->ptr
;
7016 listRewind(list
,&li
);
7017 while((ln
= listNext(&li
))) {
7018 robj
*ele
= ln
->value
;
7019 vector
[j
].obj
= ele
;
7020 vector
[j
].u
.score
= 0;
7021 vector
[j
].u
.cmpobj
= NULL
;
7029 if (sortval
->type
== REDIS_SET
) {
7032 zset
*zs
= sortval
->ptr
;
7036 di
= dictGetIterator(set
);
7037 while((setele
= dictNext(di
)) != NULL
) {
7038 vector
[j
].obj
= dictGetEntryKey(setele
);
7039 vector
[j
].u
.score
= 0;
7040 vector
[j
].u
.cmpobj
= NULL
;
7043 dictReleaseIterator(di
);
7045 redisAssert(j
== vectorlen
);
7047 /* Now it's time to load the right scores in the sorting vector */
7048 if (dontsort
== 0) {
7049 for (j
= 0; j
< vectorlen
; j
++) {
7052 /* lookup value to sort by */
7053 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
7054 if (!byval
) continue;
7056 /* use object itself to sort by */
7057 byval
= vector
[j
].obj
;
7061 if (sortby
) vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
7063 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
7064 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
7065 } else if (byval
->encoding
== REDIS_ENCODING_INT
) {
7066 /* Don't need to decode the object if it's
7067 * integer-encoded (the only encoding supported) so
7068 * far. We can just cast it */
7069 vector
[j
].u
.score
= (long)byval
->ptr
;
7071 redisAssert(1 != 1);
7075 /* when the object was retrieved using lookupKeyByPattern,
7076 * its refcount needs to be decreased. */
7078 decrRefCount(byval
);
7083 /* We are ready to sort the vector... perform a bit of sanity check
7084 * on the LIMIT option too. We'll use a partial version of quicksort. */
7085 start
= (limit_start
< 0) ? 0 : limit_start
;
7086 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
7087 if (start
>= vectorlen
) {
7088 start
= vectorlen
-1;
7091 if (end
>= vectorlen
) end
= vectorlen
-1;
7093 if (dontsort
== 0) {
7094 server
.sort_desc
= desc
;
7095 server
.sort_alpha
= alpha
;
7096 server
.sort_bypattern
= sortby
? 1 : 0;
7097 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
7098 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
7100 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
7103 /* Send command output to the output buffer, performing the specified
7104 * GET/DEL/INCR/DECR operations if any. */
7105 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
7106 if (storekey
== NULL
) {
7107 /* STORE option not specified, sent the sorting result to client */
7108 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
7109 for (j
= start
; j
<= end
; j
++) {
7113 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
7114 listRewind(operations
,&li
);
7115 while((ln
= listNext(&li
))) {
7116 redisSortOperation
*sop
= ln
->value
;
7117 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
7120 if (sop
->type
== REDIS_SORT_GET
) {
7122 addReply(c
,shared
.nullbulk
);
7124 addReplyBulk(c
,val
);
7128 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
7133 robj
*listObject
= createListObject();
7134 list
*listPtr
= (list
*) listObject
->ptr
;
7136 /* STORE option specified, set the sorting result as a List object */
7137 for (j
= start
; j
<= end
; j
++) {
7142 listAddNodeTail(listPtr
,vector
[j
].obj
);
7143 incrRefCount(vector
[j
].obj
);
7145 listRewind(operations
,&li
);
7146 while((ln
= listNext(&li
))) {
7147 redisSortOperation
*sop
= ln
->value
;
7148 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
7151 if (sop
->type
== REDIS_SORT_GET
) {
7153 listAddNodeTail(listPtr
,createStringObject("",0));
7155 /* We should do a incrRefCount on val because it is
7156 * added to the list, but also a decrRefCount because
7157 * it is returned by lookupKeyByPattern. This results
7158 * in doing nothing at all. */
7159 listAddNodeTail(listPtr
,val
);
7162 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
7166 if (dictReplace(c
->db
->dict
,storekey
,listObject
)) {
7167 incrRefCount(storekey
);
7169 /* Note: we add 1 because the DB is dirty anyway since even if the
7170 * SORT result is empty a new key is set and maybe the old content
7172 server
.dirty
+= 1+outputlen
;
7173 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
7177 decrRefCount(sortval
);
7178 listRelease(operations
);
7179 for (j
= 0; j
< vectorlen
; j
++) {
7180 if (alpha
&& vector
[j
].u
.cmpobj
)
7181 decrRefCount(vector
[j
].u
.cmpobj
);
7186 /* Convert an amount of bytes into a human readable string in the form
7187 * of 100B, 2G, 100M, 4K, and so forth. */
7188 static void bytesToHuman(char *s
, unsigned long long n
) {
7193 sprintf(s
,"%lluB",n
);
7195 } else if (n
< (1024*1024)) {
7196 d
= (double)n
/(1024);
7197 sprintf(s
,"%.2fK",d
);
7198 } else if (n
< (1024LL*1024*1024)) {
7199 d
= (double)n
/(1024*1024);
7200 sprintf(s
,"%.2fM",d
);
7201 } else if (n
< (1024LL*1024*1024*1024)) {
7202 d
= (double)n
/(1024LL*1024*1024);
7203 sprintf(s
,"%.2fG",d
);
7207 /* Create the string returned by the INFO command. This is decoupled
7208 * by the INFO command itself as we need to report the same information
7209 * on memory corruption problems. */
7210 static sds
genRedisInfoString(void) {
7212 time_t uptime
= time(NULL
)-server
.stat_starttime
;
7216 bytesToHuman(hmem
,zmalloc_used_memory());
7217 info
= sdscatprintf(sdsempty(),
7218 "redis_version:%s\r\n"
7219 "redis_git_sha1:%s\r\n"
7220 "redis_git_dirty:%d\r\n"
7222 "multiplexing_api:%s\r\n"
7223 "process_id:%ld\r\n"
7224 "uptime_in_seconds:%ld\r\n"
7225 "uptime_in_days:%ld\r\n"
7226 "connected_clients:%d\r\n"
7227 "connected_slaves:%d\r\n"
7228 "blocked_clients:%d\r\n"
7229 "used_memory:%zu\r\n"
7230 "used_memory_human:%s\r\n"
7231 "changes_since_last_save:%lld\r\n"
7232 "bgsave_in_progress:%d\r\n"
7233 "last_save_time:%ld\r\n"
7234 "bgrewriteaof_in_progress:%d\r\n"
7235 "total_connections_received:%lld\r\n"
7236 "total_commands_processed:%lld\r\n"
7237 "expired_keys:%lld\r\n"
7238 "hash_max_zipmap_entries:%zu\r\n"
7239 "hash_max_zipmap_value:%zu\r\n"
7240 "pubsub_channels:%ld\r\n"
7241 "pubsub_patterns:%u\r\n"
7246 strtol(REDIS_GIT_DIRTY
,NULL
,10) > 0,
7247 (sizeof(long) == 8) ? "64" : "32",
7252 listLength(server
.clients
)-listLength(server
.slaves
),
7253 listLength(server
.slaves
),
7254 server
.blpop_blocked_clients
,
7255 zmalloc_used_memory(),
7258 server
.bgsavechildpid
!= -1,
7260 server
.bgrewritechildpid
!= -1,
7261 server
.stat_numconnections
,
7262 server
.stat_numcommands
,
7263 server
.stat_expiredkeys
,
7264 server
.hash_max_zipmap_entries
,
7265 server
.hash_max_zipmap_value
,
7266 dictSize(server
.pubsub_channels
),
7267 listLength(server
.pubsub_patterns
),
7268 server
.vm_enabled
!= 0,
7269 server
.masterhost
== NULL
? "master" : "slave"
7271 if (server
.masterhost
) {
7272 info
= sdscatprintf(info
,
7273 "master_host:%s\r\n"
7274 "master_port:%d\r\n"
7275 "master_link_status:%s\r\n"
7276 "master_last_io_seconds_ago:%d\r\n"
7279 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
7281 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
7284 if (server
.vm_enabled
) {
7286 info
= sdscatprintf(info
,
7287 "vm_conf_max_memory:%llu\r\n"
7288 "vm_conf_page_size:%llu\r\n"
7289 "vm_conf_pages:%llu\r\n"
7290 "vm_stats_used_pages:%llu\r\n"
7291 "vm_stats_swapped_objects:%llu\r\n"
7292 "vm_stats_swappin_count:%llu\r\n"
7293 "vm_stats_swappout_count:%llu\r\n"
7294 "vm_stats_io_newjobs_len:%lu\r\n"
7295 "vm_stats_io_processing_len:%lu\r\n"
7296 "vm_stats_io_processed_len:%lu\r\n"
7297 "vm_stats_io_active_threads:%lu\r\n"
7298 "vm_stats_blocked_clients:%lu\r\n"
7299 ,(unsigned long long) server
.vm_max_memory
,
7300 (unsigned long long) server
.vm_page_size
,
7301 (unsigned long long) server
.vm_pages
,
7302 (unsigned long long) server
.vm_stats_used_pages
,
7303 (unsigned long long) server
.vm_stats_swapped_objects
,
7304 (unsigned long long) server
.vm_stats_swapins
,
7305 (unsigned long long) server
.vm_stats_swapouts
,
7306 (unsigned long) listLength(server
.io_newjobs
),
7307 (unsigned long) listLength(server
.io_processing
),
7308 (unsigned long) listLength(server
.io_processed
),
7309 (unsigned long) server
.io_active_threads
,
7310 (unsigned long) server
.vm_blocked_clients
7314 for (j
= 0; j
< server
.dbnum
; j
++) {
7315 long long keys
, vkeys
;
7317 keys
= dictSize(server
.db
[j
].dict
);
7318 vkeys
= dictSize(server
.db
[j
].expires
);
7319 if (keys
|| vkeys
) {
7320 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
7327 static void infoCommand(redisClient
*c
) {
7328 sds info
= genRedisInfoString();
7329 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
7330 (unsigned long)sdslen(info
)));
7331 addReplySds(c
,info
);
7332 addReply(c
,shared
.crlf
);
7335 static void monitorCommand(redisClient
*c
) {
7336 /* ignore MONITOR if aleady slave or in monitor mode */
7337 if (c
->flags
& REDIS_SLAVE
) return;
7339 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
7341 listAddNodeTail(server
.monitors
,c
);
7342 addReply(c
,shared
.ok
);
7345 /* ================================= Expire ================================= */
7346 static int removeExpire(redisDb
*db
, robj
*key
) {
7347 if (dictDelete(db
->expires
,key
) == DICT_OK
) {
7354 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
7355 if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) {
7363 /* Return the expire time of the specified key, or -1 if no expire
7364 * is associated with this key (i.e. the key is non volatile) */
7365 static time_t getExpire(redisDb
*db
, robj
*key
) {
7368 /* No expire? return ASAP */
7369 if (dictSize(db
->expires
) == 0 ||
7370 (de
= dictFind(db
->expires
,key
)) == NULL
) return -1;
7372 return (time_t) dictGetEntryVal(de
);
7375 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
7379 /* No expire? return ASAP */
7380 if (dictSize(db
->expires
) == 0 ||
7381 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7383 /* Lookup the expire */
7384 when
= (time_t) dictGetEntryVal(de
);
7385 if (time(NULL
) <= when
) return 0;
7387 /* Delete the key */
7388 dictDelete(db
->expires
,key
);
7389 server
.stat_expiredkeys
++;
7390 return dictDelete(db
->dict
,key
) == DICT_OK
;
7393 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
7396 /* No expire? return ASAP */
7397 if (dictSize(db
->expires
) == 0 ||
7398 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7400 /* Delete the key */
7402 server
.stat_expiredkeys
++;
7403 dictDelete(db
->expires
,key
);
7404 return dictDelete(db
->dict
,key
) == DICT_OK
;
7407 static void expireGenericCommand(redisClient
*c
, robj
*key
, robj
*param
, long offset
) {
7411 if (getLongFromObjectOrReply(c
, param
, &seconds
, NULL
) != REDIS_OK
) return;
7415 de
= dictFind(c
->db
->dict
,key
);
7417 addReply(c
,shared
.czero
);
7421 if (deleteKey(c
->db
,key
)) server
.dirty
++;
7422 addReply(c
, shared
.cone
);
7425 time_t when
= time(NULL
)+seconds
;
7426 if (setExpire(c
->db
,key
,when
)) {
7427 addReply(c
,shared
.cone
);
7430 addReply(c
,shared
.czero
);
7436 static void expireCommand(redisClient
*c
) {
7437 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],0);
7440 static void expireatCommand(redisClient
*c
) {
7441 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],time(NULL
));
7444 static void ttlCommand(redisClient
*c
) {
7448 expire
= getExpire(c
->db
,c
->argv
[1]);
7450 ttl
= (int) (expire
-time(NULL
));
7451 if (ttl
< 0) ttl
= -1;
7453 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
7456 /* ================================ MULTI/EXEC ============================== */
7458 /* Client state initialization for MULTI/EXEC */
7459 static void initClientMultiState(redisClient
*c
) {
7460 c
->mstate
.commands
= NULL
;
7461 c
->mstate
.count
= 0;
7464 /* Release all the resources associated with MULTI/EXEC state */
7465 static void freeClientMultiState(redisClient
*c
) {
7468 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7470 multiCmd
*mc
= c
->mstate
.commands
+j
;
7472 for (i
= 0; i
< mc
->argc
; i
++)
7473 decrRefCount(mc
->argv
[i
]);
7476 zfree(c
->mstate
.commands
);
7479 /* Add a new command into the MULTI commands queue */
7480 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
7484 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
7485 sizeof(multiCmd
)*(c
->mstate
.count
+1));
7486 mc
= c
->mstate
.commands
+c
->mstate
.count
;
7489 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
7490 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
7491 for (j
= 0; j
< c
->argc
; j
++)
7492 incrRefCount(mc
->argv
[j
]);
7496 static void multiCommand(redisClient
*c
) {
7497 c
->flags
|= REDIS_MULTI
;
7498 addReply(c
,shared
.ok
);
7501 static void discardCommand(redisClient
*c
) {
7502 if (!(c
->flags
& REDIS_MULTI
)) {
7503 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
7507 freeClientMultiState(c
);
7508 initClientMultiState(c
);
7509 c
->flags
&= (~REDIS_MULTI
);
7510 addReply(c
,shared
.ok
);
7513 /* Send a MULTI command to all the slaves and AOF file. Check the execCommand
7514 * implememntation for more information. */
7515 static void execCommandReplicateMulti(redisClient
*c
) {
7516 struct redisCommand
*cmd
;
7517 robj
*multistring
= createStringObject("MULTI",5);
7519 cmd
= lookupCommand("multi");
7520 if (server
.appendonly
)
7521 feedAppendOnlyFile(cmd
,c
->db
->id
,&multistring
,1);
7522 if (listLength(server
.slaves
))
7523 replicationFeedSlaves(server
.slaves
,c
->db
->id
,&multistring
,1);
7524 decrRefCount(multistring
);
7527 static void execCommand(redisClient
*c
) {
7532 if (!(c
->flags
& REDIS_MULTI
)) {
7533 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
7537 /* Check if we need to abort the EXEC if some WATCHed key was touched.
7538 * A failed EXEC will return a multi bulk nil object. */
7539 if (c
->flags
& REDIS_DIRTY_CAS
) {
7540 freeClientMultiState(c
);
7541 initClientMultiState(c
);
7542 c
->flags
&= ~(REDIS_MULTI
|REDIS_DIRTY_CAS
);
7544 addReply(c
,shared
.nullmultibulk
);
7548 /* Replicate a MULTI request now that we are sure the block is executed.
7549 * This way we'll deliver the MULTI/..../EXEC block as a whole and
7550 * both the AOF and the replication link will have the same consistency
7551 * and atomicity guarantees. */
7552 execCommandReplicateMulti(c
);
7554 /* Exec all the queued commands */
7555 orig_argv
= c
->argv
;
7556 orig_argc
= c
->argc
;
7557 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
7558 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7559 c
->argc
= c
->mstate
.commands
[j
].argc
;
7560 c
->argv
= c
->mstate
.commands
[j
].argv
;
7561 call(c
,c
->mstate
.commands
[j
].cmd
);
7563 c
->argv
= orig_argv
;
7564 c
->argc
= orig_argc
;
7565 freeClientMultiState(c
);
7566 initClientMultiState(c
);
7567 c
->flags
&= (~REDIS_MULTI
);
7569 /* Make sure the EXEC command is always replicated / AOF, since we
7570 * always send the MULTI command (we can't know beforehand if the
7571 * next operations will contain at least a modification to the DB). */
7575 /* =========================== Blocking Operations ========================= */
7577 /* Currently Redis blocking operations support is limited to list POP ops,
7578 * so the current implementation is not fully generic, but it is also not
7579 * completely specific so it will not require a rewrite to support new
7580 * kind of blocking operations in the future.
7582 * Still it's important to note that list blocking operations can be already
7583 * used as a notification mechanism in order to implement other blocking
7584 * operations at application level, so there must be a very strong evidence
7585 * of usefulness and generality before new blocking operations are implemented.
7587 * This is how the current blocking POP works, we use BLPOP as example:
7588 * - If the user calls BLPOP and the key exists and contains a non empty list
7589 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
7590 * if there is not to block.
7591 * - If instead BLPOP is called and the key does not exists or the list is
7592 * empty we need to block. In order to do so we remove the notification for
7593 * new data to read in the client socket (so that we'll not serve new
7594 * requests if the blocking request is not served). Also we put the client
7595 * in a dictionary (db->blocking_keys) mapping keys to a list of clients
7596 * blocking for this keys.
7597 * - If a PUSH operation against a key with blocked clients waiting is
7598 * performed, we serve the first in the list: basically instead to push
7599 * the new element inside the list we return it to the (first / oldest)
7600 * blocking client, unblock the client, and remove it form the list.
7602 * The above comment and the source code should be enough in order to understand
7603 * the implementation and modify / fix it later.
7606 /* Set a client in blocking mode for the specified key, with the specified
7608 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
7613 c
->blocking_keys
= zmalloc(sizeof(robj
*)*numkeys
);
7614 c
->blocking_keys_num
= numkeys
;
7615 c
->blockingto
= timeout
;
7616 for (j
= 0; j
< numkeys
; j
++) {
7617 /* Add the key in the client structure, to map clients -> keys */
7618 c
->blocking_keys
[j
] = keys
[j
];
7619 incrRefCount(keys
[j
]);
7621 /* And in the other "side", to map keys -> clients */
7622 de
= dictFind(c
->db
->blocking_keys
,keys
[j
]);
7626 /* For every key we take a list of clients blocked for it */
7628 retval
= dictAdd(c
->db
->blocking_keys
,keys
[j
],l
);
7629 incrRefCount(keys
[j
]);
7630 assert(retval
== DICT_OK
);
7632 l
= dictGetEntryVal(de
);
7634 listAddNodeTail(l
,c
);
7636 /* Mark the client as a blocked client */
7637 c
->flags
|= REDIS_BLOCKED
;
7638 server
.blpop_blocked_clients
++;
7641 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
7642 static void unblockClientWaitingData(redisClient
*c
) {
7647 assert(c
->blocking_keys
!= NULL
);
7648 /* The client may wait for multiple keys, so unblock it for every key. */
7649 for (j
= 0; j
< c
->blocking_keys_num
; j
++) {
7650 /* Remove this client from the list of clients waiting for this key. */
7651 de
= dictFind(c
->db
->blocking_keys
,c
->blocking_keys
[j
]);
7653 l
= dictGetEntryVal(de
);
7654 listDelNode(l
,listSearchKey(l
,c
));
7655 /* If the list is empty we need to remove it to avoid wasting memory */
7656 if (listLength(l
) == 0)
7657 dictDelete(c
->db
->blocking_keys
,c
->blocking_keys
[j
]);
7658 decrRefCount(c
->blocking_keys
[j
]);
7660 /* Cleanup the client structure */
7661 zfree(c
->blocking_keys
);
7662 c
->blocking_keys
= NULL
;
7663 c
->flags
&= (~REDIS_BLOCKED
);
7664 server
.blpop_blocked_clients
--;
7665 /* We want to process data if there is some command waiting
7666 * in the input buffer. Note that this is safe even if
7667 * unblockClientWaitingData() gets called from freeClient() because
7668 * freeClient() will be smart enough to call this function
7669 * *after* c->querybuf was set to NULL. */
7670 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
7673 /* This should be called from any function PUSHing into lists.
7674 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
7675 * 'ele' is the element pushed.
7677 * If the function returns 0 there was no client waiting for a list push
7680 * If the function returns 1 there was a client waiting for a list push
7681 * against this key, the element was passed to this client thus it's not
7682 * needed to actually add it to the list and the caller should return asap. */
7683 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
7684 struct dictEntry
*de
;
7685 redisClient
*receiver
;
7689 de
= dictFind(c
->db
->blocking_keys
,key
);
7690 if (de
== NULL
) return 0;
7691 l
= dictGetEntryVal(de
);
7694 receiver
= ln
->value
;
7696 addReplySds(receiver
,sdsnew("*2\r\n"));
7697 addReplyBulk(receiver
,key
);
7698 addReplyBulk(receiver
,ele
);
7699 unblockClientWaitingData(receiver
);
7703 /* Blocking RPOP/LPOP */
7704 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
7709 for (j
= 1; j
< c
->argc
-1; j
++) {
7710 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
7712 if (o
->type
!= REDIS_LIST
) {
7713 addReply(c
,shared
.wrongtypeerr
);
7716 list
*list
= o
->ptr
;
7717 if (listLength(list
) != 0) {
7718 /* If the list contains elements fall back to the usual
7719 * non-blocking POP operation */
7720 robj
*argv
[2], **orig_argv
;
7723 /* We need to alter the command arguments before to call
7724 * popGenericCommand() as the command takes a single key. */
7725 orig_argv
= c
->argv
;
7726 orig_argc
= c
->argc
;
7727 argv
[1] = c
->argv
[j
];
7731 /* Also the return value is different, we need to output
7732 * the multi bulk reply header and the key name. The
7733 * "real" command will add the last element (the value)
7734 * for us. If this souds like an hack to you it's just
7735 * because it is... */
7736 addReplySds(c
,sdsnew("*2\r\n"));
7737 addReplyBulk(c
,argv
[1]);
7738 popGenericCommand(c
,where
);
7740 /* Fix the client structure with the original stuff */
7741 c
->argv
= orig_argv
;
7742 c
->argc
= orig_argc
;
7748 /* If the list is empty or the key does not exists we must block */
7749 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
7750 if (timeout
> 0) timeout
+= time(NULL
);
7751 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
7754 static void blpopCommand(redisClient
*c
) {
7755 blockingPopGenericCommand(c
,REDIS_HEAD
);
7758 static void brpopCommand(redisClient
*c
) {
7759 blockingPopGenericCommand(c
,REDIS_TAIL
);
7762 /* =============================== Replication ============================= */
7764 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7765 ssize_t nwritten
, ret
= size
;
7766 time_t start
= time(NULL
);
7770 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
7771 nwritten
= write(fd
,ptr
,size
);
7772 if (nwritten
== -1) return -1;
7776 if ((time(NULL
)-start
) > timeout
) {
7784 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7785 ssize_t nread
, totread
= 0;
7786 time_t start
= time(NULL
);
7790 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
7791 nread
= read(fd
,ptr
,size
);
7792 if (nread
== -1) return -1;
7797 if ((time(NULL
)-start
) > timeout
) {
7805 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7812 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
7815 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
7826 static void syncCommand(redisClient
*c
) {
7827 /* ignore SYNC if aleady slave or in monitor mode */
7828 if (c
->flags
& REDIS_SLAVE
) return;
7830 /* SYNC can't be issued when the server has pending data to send to
7831 * the client about already issued commands. We need a fresh reply
7832 * buffer registering the differences between the BGSAVE and the current
7833 * dataset, so that we can copy to other slaves if needed. */
7834 if (listLength(c
->reply
) != 0) {
7835 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
7839 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
7840 /* Here we need to check if there is a background saving operation
7841 * in progress, or if it is required to start one */
7842 if (server
.bgsavechildpid
!= -1) {
7843 /* Ok a background save is in progress. Let's check if it is a good
7844 * one for replication, i.e. if there is another slave that is
7845 * registering differences since the server forked to save */
7850 listRewind(server
.slaves
,&li
);
7851 while((ln
= listNext(&li
))) {
7853 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
7856 /* Perfect, the server is already registering differences for
7857 * another slave. Set the right state, and copy the buffer. */
7858 listRelease(c
->reply
);
7859 c
->reply
= listDup(slave
->reply
);
7860 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7861 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
7863 /* No way, we need to wait for the next BGSAVE in order to
7864 * register differences */
7865 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7866 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
7869 /* Ok we don't have a BGSAVE in progress, let's start one */
7870 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
7871 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7872 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
7873 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
7876 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7879 c
->flags
|= REDIS_SLAVE
;
7881 listAddNodeTail(server
.slaves
,c
);
7885 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
7886 redisClient
*slave
= privdata
;
7888 REDIS_NOTUSED(mask
);
7889 char buf
[REDIS_IOBUF_LEN
];
7890 ssize_t nwritten
, buflen
;
7892 if (slave
->repldboff
== 0) {
7893 /* Write the bulk write count before to transfer the DB. In theory here
7894 * we don't know how much room there is in the output buffer of the
7895 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
7896 * operations) will never be smaller than the few bytes we need. */
7899 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
7901 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
7909 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
7910 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
7912 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
7913 (buflen
== 0) ? "premature EOF" : strerror(errno
));
7917 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
7918 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
7923 slave
->repldboff
+= nwritten
;
7924 if (slave
->repldboff
== slave
->repldbsize
) {
7925 close(slave
->repldbfd
);
7926 slave
->repldbfd
= -1;
7927 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7928 slave
->replstate
= REDIS_REPL_ONLINE
;
7929 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
7930 sendReplyToClient
, slave
) == AE_ERR
) {
7934 addReplySds(slave
,sdsempty());
7935 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
7939 /* This function is called at the end of every backgrond saving.
7940 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
7941 * otherwise REDIS_ERR is passed to the function.
7943 * The goal of this function is to handle slaves waiting for a successful
7944 * background saving in order to perform non-blocking synchronization. */
7945 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
7947 int startbgsave
= 0;
7950 listRewind(server
.slaves
,&li
);
7951 while((ln
= listNext(&li
))) {
7952 redisClient
*slave
= ln
->value
;
7954 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
7956 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7957 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
7958 struct redis_stat buf
;
7960 if (bgsaveerr
!= REDIS_OK
) {
7962 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
7965 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
7966 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
7968 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
7971 slave
->repldboff
= 0;
7972 slave
->repldbsize
= buf
.st_size
;
7973 slave
->replstate
= REDIS_REPL_SEND_BULK
;
7974 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7975 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
7982 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7985 listRewind(server
.slaves
,&li
);
7986 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
7987 while((ln
= listNext(&li
))) {
7988 redisClient
*slave
= ln
->value
;
7990 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
7997 static int syncWithMaster(void) {
7998 char buf
[1024], tmpfile
[256], authcmd
[1024];
8000 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
8001 int dfd
, maxtries
= 5;
8004 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
8009 /* AUTH with the master if required. */
8010 if(server
.masterauth
) {
8011 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
8012 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
8014 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
8018 /* Read the AUTH result. */
8019 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
8021 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
8025 if (buf
[0] != '+') {
8027 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
8032 /* Issue the SYNC command */
8033 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
8035 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
8039 /* Read the bulk write count */
8040 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
8042 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
8046 if (buf
[0] != '$') {
8048 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
8051 dumpsize
= strtol(buf
+1,NULL
,10);
8052 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
8053 /* Read the bulk write data on a temp file */
8055 snprintf(tmpfile
,256,
8056 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
8057 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
8058 if (dfd
!= -1) break;
8063 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
8067 int nread
, nwritten
;
8069 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
8071 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
8077 nwritten
= write(dfd
,buf
,nread
);
8078 if (nwritten
== -1) {
8079 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
8087 if (rename(tmpfile
,server
.dbfilename
) == -1) {
8088 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
8094 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
8095 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
8099 server
.master
= createClient(fd
);
8100 server
.master
->flags
|= REDIS_MASTER
;
8101 server
.master
->authenticated
= 1;
8102 server
.replstate
= REDIS_REPL_CONNECTED
;
8106 static void slaveofCommand(redisClient
*c
) {
8107 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
8108 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
8109 if (server
.masterhost
) {
8110 sdsfree(server
.masterhost
);
8111 server
.masterhost
= NULL
;
8112 if (server
.master
) freeClient(server
.master
);
8113 server
.replstate
= REDIS_REPL_NONE
;
8114 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
8117 sdsfree(server
.masterhost
);
8118 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
8119 server
.masterport
= atoi(c
->argv
[2]->ptr
);
8120 if (server
.master
) freeClient(server
.master
);
8121 server
.replstate
= REDIS_REPL_CONNECT
;
8122 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
8123 server
.masterhost
, server
.masterport
);
8125 addReply(c
,shared
.ok
);
8128 /* ============================ Maxmemory directive ======================== */
8130 /* Try to free one object form the pre-allocated objects free list.
8131 * This is useful under low mem conditions as by default we take 1 million
8132 * free objects allocated. On success REDIS_OK is returned, otherwise
8134 static int tryFreeOneObjectFromFreelist(void) {
8137 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
8138 if (listLength(server
.objfreelist
)) {
8139 listNode
*head
= listFirst(server
.objfreelist
);
8140 o
= listNodeValue(head
);
8141 listDelNode(server
.objfreelist
,head
);
8142 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
8146 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
8151 /* This function gets called when 'maxmemory' is set on the config file to limit
8152 * the max memory used by the server, and we are out of memory.
8153 * This function will try to, in order:
8155 * - Free objects from the free list
8156 * - Try to remove keys with an EXPIRE set
8158 * It is not possible to free enough memory to reach used-memory < maxmemory
8159 * the server will start refusing commands that will enlarge even more the
8162 static void freeMemoryIfNeeded(void) {
8163 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
8164 int j
, k
, freed
= 0;
8166 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
8167 for (j
= 0; j
< server
.dbnum
; j
++) {
8169 robj
*minkey
= NULL
;
8170 struct dictEntry
*de
;
8172 if (dictSize(server
.db
[j
].expires
)) {
8174 /* From a sample of three keys drop the one nearest to
8175 * the natural expire */
8176 for (k
= 0; k
< 3; k
++) {
8179 de
= dictGetRandomKey(server
.db
[j
].expires
);
8180 t
= (time_t) dictGetEntryVal(de
);
8181 if (minttl
== -1 || t
< minttl
) {
8182 minkey
= dictGetEntryKey(de
);
8186 deleteKey(server
.db
+j
,minkey
);
8189 if (!freed
) return; /* nothing to free... */
8193 /* ============================== Append Only file ========================== */
8195 /* Write the append only file buffer on disk.
8197 * Since we are required to write the AOF before replying to the client,
8198 * and the only way the client socket can get a write is entering when the
8199 * the event loop, we accumulate all the AOF writes in a memory
8200 * buffer and write it on disk using this function just before entering
8201 * the event loop again. */
8202 static void flushAppendOnlyFile(void) {
8206 if (sdslen(server
.aofbuf
) == 0) return;
8208 /* We want to perform a single write. This should be guaranteed atomic
8209 * at least if the filesystem we are writing is a real physical one.
8210 * While this will save us against the server being killed I don't think
8211 * there is much to do about the whole server stopping for power problems
8213 nwritten
= write(server
.appendfd
,server
.aofbuf
,sdslen(server
.aofbuf
));
8214 if (nwritten
!= (signed)sdslen(server
.aofbuf
)) {
8215 /* Ooops, we are in troubles. The best thing to do for now is
8216 * aborting instead of giving the illusion that everything is
8217 * working as expected. */
8218 if (nwritten
== -1) {
8219 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
8221 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
8225 sdsfree(server
.aofbuf
);
8226 server
.aofbuf
= sdsempty();
8228 /* Fsync if needed */
8230 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
8231 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
8232 now
-server
.lastfsync
> 1))
8234 /* aof_fsync is defined as fdatasync() for Linux in order to avoid
8235 * flushing metadata. */
8236 aof_fsync(server
.appendfd
); /* Let's try to get this data on the disk */
8237 server
.lastfsync
= now
;
8241 static sds
catAppendOnlyGenericCommand(sds buf
, int argc
, robj
**argv
) {
8243 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
8244 for (j
= 0; j
< argc
; j
++) {
8245 robj
*o
= getDecodedObject(argv
[j
]);
8246 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
8247 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
8248 buf
= sdscatlen(buf
,"\r\n",2);
8254 static sds
catAppendOnlyExpireAtCommand(sds buf
, robj
*key
, robj
*seconds
) {
8259 /* Make sure we can use strtol */
8260 seconds
= getDecodedObject(seconds
);
8261 when
= time(NULL
)+strtol(seconds
->ptr
,NULL
,10);
8262 decrRefCount(seconds
);
8264 argv
[0] = createStringObject("EXPIREAT",8);
8266 argv
[2] = createObject(REDIS_STRING
,
8267 sdscatprintf(sdsempty(),"%ld",when
));
8268 buf
= catAppendOnlyGenericCommand(buf
, argc
, argv
);
8269 decrRefCount(argv
[0]);
8270 decrRefCount(argv
[2]);
8274 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
8275 sds buf
= sdsempty();
8278 /* The DB this command was targetting is not the same as the last command
8279 * we appendend. To issue a SELECT command is needed. */
8280 if (dictid
!= server
.appendseldb
) {
8283 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
8284 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
8285 (unsigned long)strlen(seldb
),seldb
);
8286 server
.appendseldb
= dictid
;
8289 if (cmd
->proc
== expireCommand
) {
8290 /* Translate EXPIRE into EXPIREAT */
8291 buf
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]);
8292 } else if (cmd
->proc
== setexCommand
) {
8293 /* Translate SETEX to SET and EXPIREAT */
8294 tmpargv
[0] = createStringObject("SET",3);
8295 tmpargv
[1] = argv
[1];
8296 tmpargv
[2] = argv
[3];
8297 buf
= catAppendOnlyGenericCommand(buf
,3,tmpargv
);
8298 decrRefCount(tmpargv
[0]);
8299 buf
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]);
8301 buf
= catAppendOnlyGenericCommand(buf
,argc
,argv
);
8304 /* Append to the AOF buffer. This will be flushed on disk just before
8305 * of re-entering the event loop, so before the client will get a
8306 * positive reply about the operation performed. */
8307 server
.aofbuf
= sdscatlen(server
.aofbuf
,buf
,sdslen(buf
));
8309 /* If a background append only file rewriting is in progress we want to
8310 * accumulate the differences between the child DB and the current one
8311 * in a buffer, so that when the child process will do its work we
8312 * can append the differences to the new append only file. */
8313 if (server
.bgrewritechildpid
!= -1)
8314 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
8319 /* In Redis commands are always executed in the context of a client, so in
8320 * order to load the append only file we need to create a fake client. */
8321 static struct redisClient
*createFakeClient(void) {
8322 struct redisClient
*c
= zmalloc(sizeof(*c
));
8326 c
->querybuf
= sdsempty();
8330 /* We set the fake client as a slave waiting for the synchronization
8331 * so that Redis will not try to send replies to this client. */
8332 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
8333 c
->reply
= listCreate();
8334 listSetFreeMethod(c
->reply
,decrRefCount
);
8335 listSetDupMethod(c
->reply
,dupClientReplyValue
);
8336 initClientMultiState(c
);
8340 static void freeFakeClient(struct redisClient
*c
) {
8341 sdsfree(c
->querybuf
);
8342 listRelease(c
->reply
);
8343 freeClientMultiState(c
);
8347 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
8348 * error (the append only file is zero-length) REDIS_ERR is returned. On
8349 * fatal error an error message is logged and the program exists. */
8350 int loadAppendOnlyFile(char *filename
) {
8351 struct redisClient
*fakeClient
;
8352 FILE *fp
= fopen(filename
,"r");
8353 struct redis_stat sb
;
8354 unsigned long long loadedkeys
= 0;
8355 int appendonly
= server
.appendonly
;
8357 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
8361 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
8365 /* Temporarily disable AOF, to prevent EXEC from feeding a MULTI
8366 * to the same file we're about to read. */
8367 server
.appendonly
= 0;
8369 fakeClient
= createFakeClient();
8376 struct redisCommand
*cmd
;
8378 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
8384 if (buf
[0] != '*') goto fmterr
;
8386 argv
= zmalloc(sizeof(robj
*)*argc
);
8387 for (j
= 0; j
< argc
; j
++) {
8388 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
8389 if (buf
[0] != '$') goto fmterr
;
8390 len
= strtol(buf
+1,NULL
,10);
8391 argsds
= sdsnewlen(NULL
,len
);
8392 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
8393 argv
[j
] = createObject(REDIS_STRING
,argsds
);
8394 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
8397 /* Command lookup */
8398 cmd
= lookupCommand(argv
[0]->ptr
);
8400 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
8403 /* Try object encoding */
8404 if (cmd
->flags
& REDIS_CMD_BULK
)
8405 argv
[argc
-1] = tryObjectEncoding(argv
[argc
-1]);
8406 /* Run the command in the context of a fake client */
8407 fakeClient
->argc
= argc
;
8408 fakeClient
->argv
= argv
;
8409 cmd
->proc(fakeClient
);
8410 /* Discard the reply objects list from the fake client */
8411 while(listLength(fakeClient
->reply
))
8412 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
8413 /* Clean up, ready for the next command */
8414 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
8416 /* Handle swapping while loading big datasets when VM is on */
8418 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
8419 while (zmalloc_used_memory() > server
.vm_max_memory
) {
8420 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
8425 /* This point can only be reached when EOF is reached without errors.
8426 * If the client is in the middle of a MULTI/EXEC, log error and quit. */
8427 if (fakeClient
->flags
& REDIS_MULTI
) goto readerr
;
8430 freeFakeClient(fakeClient
);
8431 server
.appendonly
= appendonly
;
8436 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
8438 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
8442 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
8446 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
8447 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
8451 /* Avoid the incr/decr ref count business if possible to help
8452 * copy-on-write (we are often in a child process when this function
8454 * Also makes sure that key objects don't get incrRefCount-ed when VM
8456 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
8457 obj
= getDecodedObject(obj
);
8460 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
));
8461 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
;
8462 if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0)
8464 if (fwrite("\r\n",2,1,fp
) == 0) goto err
;
8465 if (decrrc
) decrRefCount(obj
);
8468 if (decrrc
) decrRefCount(obj
);
8472 /* Write binary-safe string into a file in the bulkformat
8473 * $<count>\r\n<payload>\r\n */
8474 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
8477 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
);
8478 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8479 if (len
&& fwrite(s
,len
,1,fp
) == 0) return 0;
8480 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
8484 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
8485 static int fwriteBulkDouble(FILE *fp
, double d
) {
8486 char buf
[128], dbuf
[128];
8488 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
8489 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
8490 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8491 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
8495 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
8496 static int fwriteBulkLong(FILE *fp
, long l
) {
8497 char buf
[128], lbuf
[128];
8499 snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
);
8500 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2);
8501 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8502 if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0;
8506 /* Write a sequence of commands able to fully rebuild the dataset into
8507 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
8508 static int rewriteAppendOnlyFile(char *filename
) {
8509 dictIterator
*di
= NULL
;
8514 time_t now
= time(NULL
);
8516 /* Note that we have to use a different temp name here compared to the
8517 * one used by rewriteAppendOnlyFileBackground() function. */
8518 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
8519 fp
= fopen(tmpfile
,"w");
8521 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
8524 for (j
= 0; j
< server
.dbnum
; j
++) {
8525 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
8526 redisDb
*db
= server
.db
+j
;
8528 if (dictSize(d
) == 0) continue;
8529 di
= dictGetIterator(d
);
8535 /* SELECT the new DB */
8536 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
8537 if (fwriteBulkLong(fp
,j
) == 0) goto werr
;
8539 /* Iterate this DB writing every entry */
8540 while((de
= dictNext(di
)) != NULL
) {
8545 key
= dictGetEntryKey(de
);
8546 /* If the value for this key is swapped, load a preview in memory.
8547 * We use a "swapped" flag to remember if we need to free the
8548 * value object instead to just increment the ref count anyway
8549 * in order to avoid copy-on-write of pages if we are forked() */
8550 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
8551 key
->storage
== REDIS_VM_SWAPPING
) {
8552 o
= dictGetEntryVal(de
);
8555 o
= vmPreviewObject(key
);
8558 expiretime
= getExpire(db
,key
);
8560 /* Save the key and associated value */
8561 if (o
->type
== REDIS_STRING
) {
8562 /* Emit a SET command */
8563 char cmd
[]="*3\r\n$3\r\nSET\r\n";
8564 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8566 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8567 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
8568 } else if (o
->type
== REDIS_LIST
) {
8569 /* Emit the RPUSHes needed to rebuild the list */
8570 list
*list
= o
->ptr
;
8574 listRewind(list
,&li
);
8575 while((ln
= listNext(&li
))) {
8576 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
8577 robj
*eleobj
= listNodeValue(ln
);
8579 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8580 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8581 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8583 } else if (o
->type
== REDIS_SET
) {
8584 /* Emit the SADDs needed to rebuild the set */
8586 dictIterator
*di
= dictGetIterator(set
);
8589 while((de
= dictNext(di
)) != NULL
) {
8590 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
8591 robj
*eleobj
= dictGetEntryKey(de
);
8593 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8594 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8595 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8597 dictReleaseIterator(di
);
8598 } else if (o
->type
== REDIS_ZSET
) {
8599 /* Emit the ZADDs needed to rebuild the sorted set */
8601 dictIterator
*di
= dictGetIterator(zs
->dict
);
8604 while((de
= dictNext(di
)) != NULL
) {
8605 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
8606 robj
*eleobj
= dictGetEntryKey(de
);
8607 double *score
= dictGetEntryVal(de
);
8609 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8610 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8611 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
8612 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8614 dictReleaseIterator(di
);
8615 } else if (o
->type
== REDIS_HASH
) {
8616 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
8618 /* Emit the HSETs needed to rebuild the hash */
8619 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8620 unsigned char *p
= zipmapRewind(o
->ptr
);
8621 unsigned char *field
, *val
;
8622 unsigned int flen
, vlen
;
8624 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
8625 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8626 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8627 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
8629 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
8633 dictIterator
*di
= dictGetIterator(o
->ptr
);
8636 while((de
= dictNext(di
)) != NULL
) {
8637 robj
*field
= dictGetEntryKey(de
);
8638 robj
*val
= dictGetEntryVal(de
);
8640 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8641 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8642 if (fwriteBulkObject(fp
,field
) == -1) return -1;
8643 if (fwriteBulkObject(fp
,val
) == -1) return -1;
8645 dictReleaseIterator(di
);
8648 redisPanic("Unknown object type");
8650 /* Save the expire time */
8651 if (expiretime
!= -1) {
8652 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
8653 /* If this key is already expired skip it */
8654 if (expiretime
< now
) continue;
8655 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8656 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8657 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
;
8659 if (swapped
) decrRefCount(o
);
8661 dictReleaseIterator(di
);
8664 /* Make sure data will not remain on the OS's output buffers */
8669 /* Use RENAME to make sure the DB file is changed atomically only
8670 * if the generate DB file is ok. */
8671 if (rename(tmpfile
,filename
) == -1) {
8672 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
8676 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
8682 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
8683 if (di
) dictReleaseIterator(di
);
8687 /* This is how rewriting of the append only file in background works:
8689 * 1) The user calls BGREWRITEAOF
8690 * 2) Redis calls this function, that forks():
8691 * 2a) the child rewrite the append only file in a temp file.
8692 * 2b) the parent accumulates differences in server.bgrewritebuf.
8693 * 3) When the child finished '2a' exists.
8694 * 4) The parent will trap the exit code, if it's OK, will append the
8695 * data accumulated into server.bgrewritebuf into the temp file, and
8696 * finally will rename(2) the temp file in the actual file name.
8697 * The the new file is reopened as the new append only file. Profit!
8699 static int rewriteAppendOnlyFileBackground(void) {
8702 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
8703 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
8704 if ((childpid
= fork()) == 0) {
8708 if (server
.vm_enabled
) vmReopenSwapFile();
8710 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
8711 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
8718 if (childpid
== -1) {
8719 redisLog(REDIS_WARNING
,
8720 "Can't rewrite append only file in background: fork: %s",
8724 redisLog(REDIS_NOTICE
,
8725 "Background append only file rewriting started by pid %d",childpid
);
8726 server
.bgrewritechildpid
= childpid
;
8727 updateDictResizePolicy();
8728 /* We set appendseldb to -1 in order to force the next call to the
8729 * feedAppendOnlyFile() to issue a SELECT command, so the differences
8730 * accumulated by the parent into server.bgrewritebuf will start
8731 * with a SELECT statement and it will be safe to merge. */
8732 server
.appendseldb
= -1;
8735 return REDIS_OK
; /* unreached */
8738 static void bgrewriteaofCommand(redisClient
*c
) {
8739 if (server
.bgrewritechildpid
!= -1) {
8740 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
8743 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
8744 char *status
= "+Background append only file rewriting started\r\n";
8745 addReplySds(c
,sdsnew(status
));
8747 addReply(c
,shared
.err
);
8751 static void aofRemoveTempFile(pid_t childpid
) {
8754 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
8758 /* Virtual Memory is composed mainly of two subsystems:
8759 * - Blocking Virutal Memory
8760 * - Threaded Virtual Memory I/O
8761 * The two parts are not fully decoupled, but functions are split among two
8762 * different sections of the source code (delimited by comments) in order to
8763 * make more clear what functionality is about the blocking VM and what about
8764 * the threaded (not blocking) VM.
8768 * Redis VM is a blocking VM (one that blocks reading swapped values from
8769 * disk into memory when a value swapped out is needed in memory) that is made
8770 * unblocking by trying to examine the command argument vector in order to
8771 * load in background values that will likely be needed in order to exec
8772 * the command. The command is executed only once all the relevant keys
8773 * are loaded into memory.
8775 * This basically is almost as simple of a blocking VM, but almost as parallel
8776 * as a fully non-blocking VM.
8779 /* Called when the user switches from "appendonly yes" to "appendonly no"
8780 * at runtime using the CONFIG command. */
8781 static void stopAppendOnly(void) {
8782 flushAppendOnlyFile();
8783 fsync(server
.appendfd
);
8784 close(server
.appendfd
);
8786 server
.appendfd
= -1;
8787 server
.appendseldb
= -1;
8788 server
.appendonly
= 0;
8789 /* rewrite operation in progress? kill it, wait child exit */
8790 if (server
.bgsavechildpid
!= -1) {
8793 if (kill(server
.bgsavechildpid
,SIGKILL
) != -1)
8794 wait3(&statloc
,0,NULL
);
8795 /* reset the buffer accumulating changes while the child saves */
8796 sdsfree(server
.bgrewritebuf
);
8797 server
.bgrewritebuf
= sdsempty();
8798 server
.bgsavechildpid
= -1;
8802 /* Called when the user switches from "appendonly no" to "appendonly yes"
8803 * at runtime using the CONFIG command. */
8804 static int startAppendOnly(void) {
8805 server
.appendonly
= 1;
8806 server
.lastfsync
= time(NULL
);
8807 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
8808 if (server
.appendfd
== -1) {
8809 redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, but I can't open the AOF file: %s",strerror(errno
));
8812 if (rewriteAppendOnlyFileBackground() == REDIS_ERR
) {
8813 server
.appendonly
= 0;
8814 close(server
.appendfd
);
8815 redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, I can't trigger a background AOF rewrite operation. Check the above logs for more info about the error.",strerror(errno
));
8821 /* =================== Virtual Memory - Blocking Side ====================== */
8823 static void vmInit(void) {
8829 if (server
.vm_max_threads
!= 0)
8830 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
8832 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
8833 /* Try to open the old swap file, otherwise create it */
8834 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
8835 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
8837 if (server
.vm_fp
== NULL
) {
8838 redisLog(REDIS_WARNING
,
8839 "Can't open the swap file: %s. Exiting.",
8843 server
.vm_fd
= fileno(server
.vm_fp
);
8844 /* Lock the swap file for writing, this is useful in order to avoid
8845 * another instance to use the same swap file for a config error. */
8846 fl
.l_type
= F_WRLCK
;
8847 fl
.l_whence
= SEEK_SET
;
8848 fl
.l_start
= fl
.l_len
= 0;
8849 if (fcntl(server
.vm_fd
,F_SETLK
,&fl
) == -1) {
8850 redisLog(REDIS_WARNING
,
8851 "Can't lock the swap file at '%s': %s. Make sure it is not used by another Redis instance.", server
.vm_swap_file
, strerror(errno
));
8855 server
.vm_next_page
= 0;
8856 server
.vm_near_pages
= 0;
8857 server
.vm_stats_used_pages
= 0;
8858 server
.vm_stats_swapped_objects
= 0;
8859 server
.vm_stats_swapouts
= 0;
8860 server
.vm_stats_swapins
= 0;
8861 totsize
= server
.vm_pages
*server
.vm_page_size
;
8862 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
8863 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
8864 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
8868 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
8870 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
8871 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
8872 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
8873 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
8875 /* Initialize threaded I/O (used by Virtual Memory) */
8876 server
.io_newjobs
= listCreate();
8877 server
.io_processing
= listCreate();
8878 server
.io_processed
= listCreate();
8879 server
.io_ready_clients
= listCreate();
8880 pthread_mutex_init(&server
.io_mutex
,NULL
);
8881 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
8882 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
8883 server
.io_active_threads
= 0;
8884 if (pipe(pipefds
) == -1) {
8885 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
8889 server
.io_ready_pipe_read
= pipefds
[0];
8890 server
.io_ready_pipe_write
= pipefds
[1];
8891 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
8892 /* LZF requires a lot of stack */
8893 pthread_attr_init(&server
.io_threads_attr
);
8894 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
8895 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
8896 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
8897 /* Listen for events in the threaded I/O pipe */
8898 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
8899 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
8900 oom("creating file event");
8903 /* Mark the page as used */
8904 static void vmMarkPageUsed(off_t page
) {
8905 off_t byte
= page
/8;
8907 redisAssert(vmFreePage(page
) == 1);
8908 server
.vm_bitmap
[byte
] |= 1<<bit
;
8911 /* Mark N contiguous pages as used, with 'page' being the first. */
8912 static void vmMarkPagesUsed(off_t page
, off_t count
) {
8915 for (j
= 0; j
< count
; j
++)
8916 vmMarkPageUsed(page
+j
);
8917 server
.vm_stats_used_pages
+= count
;
8918 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
8919 (long long)count
, (long long)page
);
8922 /* Mark the page as free */
8923 static void vmMarkPageFree(off_t page
) {
8924 off_t byte
= page
/8;
8926 redisAssert(vmFreePage(page
) == 0);
8927 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
8930 /* Mark N contiguous pages as free, with 'page' being the first. */
8931 static void vmMarkPagesFree(off_t page
, off_t count
) {
8934 for (j
= 0; j
< count
; j
++)
8935 vmMarkPageFree(page
+j
);
8936 server
.vm_stats_used_pages
-= count
;
8937 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
8938 (long long)count
, (long long)page
);
8941 /* Test if the page is free */
8942 static int vmFreePage(off_t page
) {
8943 off_t byte
= page
/8;
8945 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
8948 /* Find N contiguous free pages storing the first page of the cluster in *first.
8949 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
8950 * REDIS_ERR is returned.
8952 * This function uses a simple algorithm: we try to allocate
8953 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
8954 * again from the start of the swap file searching for free spaces.
8956 * If it looks pretty clear that there are no free pages near our offset
8957 * we try to find less populated places doing a forward jump of
8958 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
8959 * without hurry, and then we jump again and so forth...
8961 * This function can be improved using a free list to avoid to guess
8962 * too much, since we could collect data about freed pages.
8964 * note: I implemented this function just after watching an episode of
8965 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
8967 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
8968 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
8970 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
8971 server
.vm_near_pages
= 0;
8972 server
.vm_next_page
= 0;
8974 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
8975 base
= server
.vm_next_page
;
8977 while(offset
< server
.vm_pages
) {
8978 off_t
this = base
+offset
;
8980 /* If we overflow, restart from page zero */
8981 if (this >= server
.vm_pages
) {
8982 this -= server
.vm_pages
;
8984 /* Just overflowed, what we found on tail is no longer
8985 * interesting, as it's no longer contiguous. */
8989 if (vmFreePage(this)) {
8990 /* This is a free page */
8992 /* Already got N free pages? Return to the caller, with success */
8994 *first
= this-(n
-1);
8995 server
.vm_next_page
= this+1;
8996 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
9000 /* The current one is not a free page */
9004 /* Fast-forward if the current page is not free and we already
9005 * searched enough near this place. */
9007 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
9008 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
9010 /* Note that even if we rewind after the jump, we are don't need
9011 * to make sure numfree is set to zero as we only jump *if* it
9012 * is set to zero. */
9014 /* Otherwise just check the next page */
9021 /* Write the specified object at the specified page of the swap file */
9022 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
9023 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
9024 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
9025 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9026 redisLog(REDIS_WARNING
,
9027 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
9031 rdbSaveObject(server
.vm_fp
,o
);
9032 fflush(server
.vm_fp
);
9033 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9037 /* Swap the 'val' object relative to 'key' into disk. Store all the information
9038 * needed to later retrieve the object into the key object.
9039 * If we can't find enough contiguous empty pages to swap the object on disk
9040 * REDIS_ERR is returned. */
9041 static int vmSwapObjectBlocking(robj
*key
, robj
*val
) {
9042 off_t pages
= rdbSavedObjectPages(val
,NULL
);
9045 assert(key
->storage
== REDIS_VM_MEMORY
);
9046 assert(key
->refcount
== 1);
9047 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
;
9048 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
;
9049 key
->vm
.page
= page
;
9050 key
->vm
.usedpages
= pages
;
9051 key
->storage
= REDIS_VM_SWAPPED
;
9052 key
->vtype
= val
->type
;
9053 decrRefCount(val
); /* Deallocate the object from memory. */
9054 vmMarkPagesUsed(page
,pages
);
9055 redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)",
9056 (unsigned char*) key
->ptr
,
9057 (unsigned long long) page
, (unsigned long long) pages
);
9058 server
.vm_stats_swapped_objects
++;
9059 server
.vm_stats_swapouts
++;
9063 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
9066 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
9067 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
9068 redisLog(REDIS_WARNING
,
9069 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
9073 o
= rdbLoadObject(type
,server
.vm_fp
);
9075 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
9078 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9082 /* Load the value object relative to the 'key' object from swap to memory.
9083 * The newly allocated object is returned.
9085 * If preview is true the unserialized object is returned to the caller but
9086 * no changes are made to the key object, nor the pages are marked as freed */
9087 static robj
*vmGenericLoadObject(robj
*key
, int preview
) {
9090 redisAssert(key
->storage
== REDIS_VM_SWAPPED
|| key
->storage
== REDIS_VM_LOADING
);
9091 val
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
);
9093 key
->storage
= REDIS_VM_MEMORY
;
9094 key
->vm
.atime
= server
.unixtime
;
9095 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
9096 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk",
9097 (unsigned char*) key
->ptr
);
9098 server
.vm_stats_swapped_objects
--;
9100 redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk",
9101 (unsigned char*) key
->ptr
);
9103 server
.vm_stats_swapins
++;
9107 /* Plain object loading, from swap to memory */
9108 static robj
*vmLoadObject(robj
*key
) {
9109 /* If we are loading the object in background, stop it, we
9110 * need to load this object synchronously ASAP. */
9111 if (key
->storage
== REDIS_VM_LOADING
)
9112 vmCancelThreadedIOJob(key
);
9113 return vmGenericLoadObject(key
,0);
9116 /* Just load the value on disk, without to modify the key.
9117 * This is useful when we want to perform some operation on the value
9118 * without to really bring it from swap to memory, like while saving the
9119 * dataset or rewriting the append only log. */
9120 static robj
*vmPreviewObject(robj
*key
) {
9121 return vmGenericLoadObject(key
,1);
9124 /* How a good candidate is this object for swapping?
9125 * The better candidate it is, the greater the returned value.
9127 * Currently we try to perform a fast estimation of the object size in
9128 * memory, and combine it with aging informations.
9130 * Basically swappability = idle-time * log(estimated size)
9132 * Bigger objects are preferred over smaller objects, but not
9133 * proportionally, this is why we use the logarithm. This algorithm is
9134 * just a first try and will probably be tuned later. */
9135 static double computeObjectSwappability(robj
*o
) {
9136 time_t age
= server
.unixtime
- o
->vm
.atime
;
9140 struct dictEntry
*de
;
9143 if (age
<= 0) return 0;
9146 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
9149 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
9154 listNode
*ln
= listFirst(l
);
9156 asize
= sizeof(list
);
9158 robj
*ele
= ln
->value
;
9161 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9162 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9164 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
9169 z
= (o
->type
== REDIS_ZSET
);
9170 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
9172 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
9173 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
9178 de
= dictGetRandomKey(d
);
9179 ele
= dictGetEntryKey(de
);
9180 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9181 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9183 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
9184 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
9188 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
9189 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
9190 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
9191 unsigned int klen
, vlen
;
9192 unsigned char *key
, *val
;
9194 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
9198 asize
= len
*(klen
+vlen
+3);
9199 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
9201 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
9206 de
= dictGetRandomKey(d
);
9207 ele
= dictGetEntryKey(de
);
9208 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9209 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9211 ele
= dictGetEntryVal(de
);
9212 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9213 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9215 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
9220 return (double)age
*log(1+asize
);
9223 /* Try to swap an object that's a good candidate for swapping.
9224 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
9225 * to swap any object at all.
9227 * If 'usethreaded' is true, Redis will try to swap the object in background
9228 * using I/O threads. */
9229 static int vmSwapOneObject(int usethreads
) {
9231 struct dictEntry
*best
= NULL
;
9232 double best_swappability
= 0;
9233 redisDb
*best_db
= NULL
;
9236 for (j
= 0; j
< server
.dbnum
; j
++) {
9237 redisDb
*db
= server
.db
+j
;
9238 /* Why maxtries is set to 100?
9239 * Because this way (usually) we'll find 1 object even if just 1% - 2%
9240 * are swappable objects */
9243 if (dictSize(db
->dict
) == 0) continue;
9244 for (i
= 0; i
< 5; i
++) {
9246 double swappability
;
9248 if (maxtries
) maxtries
--;
9249 de
= dictGetRandomKey(db
->dict
);
9250 key
= dictGetEntryKey(de
);
9251 val
= dictGetEntryVal(de
);
9252 /* Only swap objects that are currently in memory.
9254 * Also don't swap shared objects if threaded VM is on, as we
9255 * try to ensure that the main thread does not touch the
9256 * object while the I/O thread is using it, but we can't
9257 * control other keys without adding additional mutex. */
9258 if (key
->storage
!= REDIS_VM_MEMORY
||
9259 (server
.vm_max_threads
!= 0 && val
->refcount
!= 1)) {
9260 if (maxtries
) i
--; /* don't count this try */
9263 swappability
= computeObjectSwappability(val
);
9264 if (!best
|| swappability
> best_swappability
) {
9266 best_swappability
= swappability
;
9271 if (best
== NULL
) return REDIS_ERR
;
9272 key
= dictGetEntryKey(best
);
9273 val
= dictGetEntryVal(best
);
9275 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
9276 key
->ptr
, best_swappability
);
9278 /* Unshare the key if needed */
9279 if (key
->refcount
> 1) {
9280 robj
*newkey
= dupStringObject(key
);
9282 key
= dictGetEntryKey(best
) = newkey
;
9286 vmSwapObjectThreaded(key
,val
,best_db
);
9289 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
9290 dictGetEntryVal(best
) = NULL
;
9298 static int vmSwapOneObjectBlocking() {
9299 return vmSwapOneObject(0);
9302 static int vmSwapOneObjectThreaded() {
9303 return vmSwapOneObject(1);
9306 /* Return true if it's safe to swap out objects in a given moment.
9307 * Basically we don't want to swap objects out while there is a BGSAVE
9308 * or a BGAEOREWRITE running in backgroud. */
9309 static int vmCanSwapOut(void) {
9310 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
9313 /* Delete a key if swapped. Returns 1 if the key was found, was swapped
9314 * and was deleted. Otherwise 0 is returned. */
9315 static int deleteIfSwapped(redisDb
*db
, robj
*key
) {
9319 if ((de
= dictFind(db
->dict
,key
)) == NULL
) return 0;
9320 foundkey
= dictGetEntryKey(de
);
9321 if (foundkey
->storage
== REDIS_VM_MEMORY
) return 0;
9326 /* =================== Virtual Memory - Threaded I/O ======================= */
9328 static void freeIOJob(iojob
*j
) {
9329 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
9330 j
->type
== REDIS_IOJOB_DO_SWAP
||
9331 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
9332 decrRefCount(j
->val
);
9333 /* We don't decrRefCount the j->key field as we did't incremented
9334 * the count creating IO Jobs. This is because the key field here is
9335 * just used as an indentifier and if a key is removed the Job should
9336 * never be touched again. */
9340 /* Every time a thread finished a Job, it writes a byte into the write side
9341 * of an unix pipe in order to "awake" the main thread, and this function
9343 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
9347 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
9349 REDIS_NOTUSED(mask
);
9350 REDIS_NOTUSED(privdata
);
9352 /* For every byte we read in the read side of the pipe, there is one
9353 * I/O job completed to process. */
9354 while((retval
= read(fd
,buf
,1)) == 1) {
9358 struct dictEntry
*de
;
9360 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
9362 /* Get the processed element (the oldest one) */
9364 assert(listLength(server
.io_processed
) != 0);
9365 if (toprocess
== -1) {
9366 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
9367 if (toprocess
<= 0) toprocess
= 1;
9369 ln
= listFirst(server
.io_processed
);
9371 listDelNode(server
.io_processed
,ln
);
9373 /* If this job is marked as canceled, just ignore it */
9378 /* Post process it in the main thread, as there are things we
9379 * can do just here to avoid race conditions and/or invasive locks */
9380 redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
);
9381 de
= dictFind(j
->db
->dict
,j
->key
);
9383 key
= dictGetEntryKey(de
);
9384 if (j
->type
== REDIS_IOJOB_LOAD
) {
9387 /* Key loaded, bring it at home */
9388 key
->storage
= REDIS_VM_MEMORY
;
9389 key
->vm
.atime
= server
.unixtime
;
9390 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
9391 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
9392 (unsigned char*) key
->ptr
);
9393 server
.vm_stats_swapped_objects
--;
9394 server
.vm_stats_swapins
++;
9395 dictGetEntryVal(de
) = j
->val
;
9396 incrRefCount(j
->val
);
9399 /* Handle clients waiting for this key to be loaded. */
9400 handleClientsBlockedOnSwappedKey(db
,key
);
9401 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9402 /* Now we know the amount of pages required to swap this object.
9403 * Let's find some space for it, and queue this task again
9404 * rebranded as REDIS_IOJOB_DO_SWAP. */
9405 if (!vmCanSwapOut() ||
9406 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
9408 /* Ooops... no space or we can't swap as there is
9409 * a fork()ed Redis trying to save stuff on disk. */
9411 key
->storage
= REDIS_VM_MEMORY
; /* undo operation */
9413 /* Note that we need to mark this pages as used now,
9414 * if the job will be canceled, we'll mark them as freed
9416 vmMarkPagesUsed(j
->page
,j
->pages
);
9417 j
->type
= REDIS_IOJOB_DO_SWAP
;
9422 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9425 /* Key swapped. We can finally free some memory. */
9426 if (key
->storage
!= REDIS_VM_SWAPPING
) {
9427 printf("key->storage: %d\n",key
->storage
);
9428 printf("key->name: %s\n",(char*)key
->ptr
);
9429 printf("key->refcount: %d\n",key
->refcount
);
9430 printf("val: %p\n",(void*)j
->val
);
9431 printf("val->type: %d\n",j
->val
->type
);
9432 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
9434 redisAssert(key
->storage
== REDIS_VM_SWAPPING
);
9435 val
= dictGetEntryVal(de
);
9436 key
->vm
.page
= j
->page
;
9437 key
->vm
.usedpages
= j
->pages
;
9438 key
->storage
= REDIS_VM_SWAPPED
;
9439 key
->vtype
= j
->val
->type
;
9440 decrRefCount(val
); /* Deallocate the object from memory. */
9441 dictGetEntryVal(de
) = NULL
;
9442 redisLog(REDIS_DEBUG
,
9443 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
9444 (unsigned char*) key
->ptr
,
9445 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
9446 server
.vm_stats_swapped_objects
++;
9447 server
.vm_stats_swapouts
++;
9449 /* Put a few more swap requests in queue if we are still
9451 if (trytoswap
&& vmCanSwapOut() &&
9452 zmalloc_used_memory() > server
.vm_max_memory
)
9457 more
= listLength(server
.io_newjobs
) <
9458 (unsigned) server
.vm_max_threads
;
9460 /* Don't waste CPU time if swappable objects are rare. */
9461 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
9469 if (processed
== toprocess
) return;
9471 if (retval
< 0 && errno
!= EAGAIN
) {
9472 redisLog(REDIS_WARNING
,
9473 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
9478 static void lockThreadedIO(void) {
9479 pthread_mutex_lock(&server
.io_mutex
);
9482 static void unlockThreadedIO(void) {
9483 pthread_mutex_unlock(&server
.io_mutex
);
9486 /* Remove the specified object from the threaded I/O queue if still not
9487 * processed, otherwise make sure to flag it as canceled. */
9488 static void vmCancelThreadedIOJob(robj
*o
) {
9490 server
.io_newjobs
, /* 0 */
9491 server
.io_processing
, /* 1 */
9492 server
.io_processed
/* 2 */
9496 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
9499 /* Search for a matching key in one of the queues */
9500 for (i
= 0; i
< 3; i
++) {
9504 listRewind(lists
[i
],&li
);
9505 while ((ln
= listNext(&li
)) != NULL
) {
9506 iojob
*job
= ln
->value
;
9508 if (job
->canceled
) continue; /* Skip this, already canceled. */
9509 if (job
->key
== o
) {
9510 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n",
9511 (void*)job
, (char*)o
->ptr
, job
->type
, i
);
9512 /* Mark the pages as free since the swap didn't happened
9513 * or happened but is now discarded. */
9514 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
9515 vmMarkPagesFree(job
->page
,job
->pages
);
9516 /* Cancel the job. It depends on the list the job is
9519 case 0: /* io_newjobs */
9520 /* If the job was yet not processed the best thing to do
9521 * is to remove it from the queue at all */
9523 listDelNode(lists
[i
],ln
);
9525 case 1: /* io_processing */
9526 /* Oh Shi- the thread is messing with the Job:
9528 * Probably it's accessing the object if this is a
9529 * PREPARE_SWAP or DO_SWAP job.
9530 * If it's a LOAD job it may be reading from disk and
9531 * if we don't wait for the job to terminate before to
9532 * cancel it, maybe in a few microseconds data can be
9533 * corrupted in this pages. So the short story is:
9535 * Better to wait for the job to move into the
9536 * next queue (processed)... */
9538 /* We try again and again until the job is completed. */
9540 /* But let's wait some time for the I/O thread
9541 * to finish with this job. After all this condition
9542 * should be very rare. */
9545 case 2: /* io_processed */
9546 /* The job was already processed, that's easy...
9547 * just mark it as canceled so that we'll ignore it
9548 * when processing completed jobs. */
9552 /* Finally we have to adjust the storage type of the object
9553 * in order to "UNDO" the operaiton. */
9554 if (o
->storage
== REDIS_VM_LOADING
)
9555 o
->storage
= REDIS_VM_SWAPPED
;
9556 else if (o
->storage
== REDIS_VM_SWAPPING
)
9557 o
->storage
= REDIS_VM_MEMORY
;
9564 assert(1 != 1); /* We should never reach this */
9567 static void *IOThreadEntryPoint(void *arg
) {
9572 pthread_detach(pthread_self());
9574 /* Get a new job to process */
9576 if (listLength(server
.io_newjobs
) == 0) {
9577 /* No new jobs in queue, exit. */
9578 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
9579 (long) pthread_self());
9580 server
.io_active_threads
--;
9584 ln
= listFirst(server
.io_newjobs
);
9586 listDelNode(server
.io_newjobs
,ln
);
9587 /* Add the job in the processing queue */
9588 j
->thread
= pthread_self();
9589 listAddNodeTail(server
.io_processing
,j
);
9590 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
9592 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
9593 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
9595 /* Process the Job */
9596 if (j
->type
== REDIS_IOJOB_LOAD
) {
9597 j
->val
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
);
9598 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9599 FILE *fp
= fopen("/dev/null","w+");
9600 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
9602 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9603 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
9607 /* Done: insert the job into the processed queue */
9608 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
9609 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
9611 listDelNode(server
.io_processing
,ln
);
9612 listAddNodeTail(server
.io_processed
,j
);
9615 /* Signal the main thread there is new stuff to process */
9616 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
9618 return NULL
; /* never reached */
9621 static void spawnIOThread(void) {
9623 sigset_t mask
, omask
;
9627 sigaddset(&mask
,SIGCHLD
);
9628 sigaddset(&mask
,SIGHUP
);
9629 sigaddset(&mask
,SIGPIPE
);
9630 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
9631 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
9632 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
9636 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
9637 server
.io_active_threads
++;
9640 /* We need to wait for the last thread to exit before we are able to
9641 * fork() in order to BGSAVE or BGREWRITEAOF. */
9642 static void waitEmptyIOJobsQueue(void) {
9644 int io_processed_len
;
9647 if (listLength(server
.io_newjobs
) == 0 &&
9648 listLength(server
.io_processing
) == 0 &&
9649 server
.io_active_threads
== 0)
9654 /* While waiting for empty jobs queue condition we post-process some
9655 * finshed job, as I/O threads may be hanging trying to write against
9656 * the io_ready_pipe_write FD but there are so much pending jobs that
9658 io_processed_len
= listLength(server
.io_processed
);
9660 if (io_processed_len
) {
9661 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
9662 usleep(1000); /* 1 millisecond */
9664 usleep(10000); /* 10 milliseconds */
9669 static void vmReopenSwapFile(void) {
9670 /* Note: we don't close the old one as we are in the child process
9671 * and don't want to mess at all with the original file object. */
9672 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
9673 if (server
.vm_fp
== NULL
) {
9674 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
9675 server
.vm_swap_file
);
9678 server
.vm_fd
= fileno(server
.vm_fp
);
9681 /* This function must be called while with threaded IO locked */
9682 static void queueIOJob(iojob
*j
) {
9683 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
9684 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
9685 listAddNodeTail(server
.io_newjobs
,j
);
9686 if (server
.io_active_threads
< server
.vm_max_threads
)
9690 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
9693 assert(key
->storage
== REDIS_VM_MEMORY
);
9694 assert(key
->refcount
== 1);
9696 j
= zmalloc(sizeof(*j
));
9697 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
9703 j
->thread
= (pthread_t
) -1;
9704 key
->storage
= REDIS_VM_SWAPPING
;
9712 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
9714 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
9715 * If there is not already a job loading the key, it is craeted.
9716 * The key is added to the io_keys list in the client structure, and also
9717 * in the hash table mapping swapped keys to waiting clients, that is,
9718 * server.io_waited_keys. */
9719 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
9720 struct dictEntry
*de
;
9724 /* If the key does not exist or is already in RAM we don't need to
9725 * block the client at all. */
9726 de
= dictFind(c
->db
->dict
,key
);
9727 if (de
== NULL
) return 0;
9728 o
= dictGetEntryKey(de
);
9729 if (o
->storage
== REDIS_VM_MEMORY
) {
9731 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
9732 /* We were swapping the key, undo it! */
9733 vmCancelThreadedIOJob(o
);
9737 /* OK: the key is either swapped, or being loaded just now. */
9739 /* Add the key to the list of keys this client is waiting for.
9740 * This maps clients to keys they are waiting for. */
9741 listAddNodeTail(c
->io_keys
,key
);
9744 /* Add the client to the swapped keys => clients waiting map. */
9745 de
= dictFind(c
->db
->io_keys
,key
);
9749 /* For every key we take a list of clients blocked for it */
9751 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
9753 assert(retval
== DICT_OK
);
9755 l
= dictGetEntryVal(de
);
9757 listAddNodeTail(l
,c
);
9759 /* Are we already loading the key from disk? If not create a job */
9760 if (o
->storage
== REDIS_VM_SWAPPED
) {
9763 o
->storage
= REDIS_VM_LOADING
;
9764 j
= zmalloc(sizeof(*j
));
9765 j
->type
= REDIS_IOJOB_LOAD
;
9768 j
->key
->vtype
= o
->vtype
;
9769 j
->page
= o
->vm
.page
;
9772 j
->thread
= (pthread_t
) -1;
9780 /* Preload keys for any command with first, last and step values for
9781 * the command keys prototype, as defined in the command table. */
9782 static void waitForMultipleSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
9784 if (cmd
->vm_firstkey
== 0) return;
9785 last
= cmd
->vm_lastkey
;
9786 if (last
< 0) last
= argc
+last
;
9787 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
) {
9788 redisAssert(j
< argc
);
9789 waitForSwappedKey(c
,argv
[j
]);
9793 /* Preload keys needed for the ZUNIONSTORE and ZINTERSTORE commands.
9794 * Note that the number of keys to preload is user-defined, so we need to
9795 * apply a sanity check against argc. */
9796 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
9800 num
= atoi(argv
[2]->ptr
);
9801 if (num
> (argc
-3)) return;
9802 for (i
= 0; i
< num
; i
++) {
9803 waitForSwappedKey(c
,argv
[3+i
]);
9807 /* Preload keys needed to execute the entire MULTI/EXEC block.
9809 * This function is called by blockClientOnSwappedKeys when EXEC is issued,
9810 * and will block the client when any command requires a swapped out value. */
9811 static void execBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
9813 struct redisCommand
*mcmd
;
9816 REDIS_NOTUSED(argc
);
9817 REDIS_NOTUSED(argv
);
9819 if (!(c
->flags
& REDIS_MULTI
)) return;
9820 for (i
= 0; i
< c
->mstate
.count
; i
++) {
9821 mcmd
= c
->mstate
.commands
[i
].cmd
;
9822 margc
= c
->mstate
.commands
[i
].argc
;
9823 margv
= c
->mstate
.commands
[i
].argv
;
9825 if (mcmd
->vm_preload_proc
!= NULL
) {
9826 mcmd
->vm_preload_proc(c
,mcmd
,margc
,margv
);
9828 waitForMultipleSwappedKeys(c
,mcmd
,margc
,margv
);
9833 /* Is this client attempting to run a command against swapped keys?
9834 * If so, block it ASAP, load the keys in background, then resume it.
9836 * The important idea about this function is that it can fail! If keys will
9837 * still be swapped when the client is resumed, this key lookups will
9838 * just block loading keys from disk. In practical terms this should only
9839 * happen with SORT BY command or if there is a bug in this function.
9841 * Return 1 if the client is marked as blocked, 0 if the client can
9842 * continue as the keys it is going to access appear to be in memory. */
9843 static int blockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
) {
9844 if (cmd
->vm_preload_proc
!= NULL
) {
9845 cmd
->vm_preload_proc(c
,cmd
,c
->argc
,c
->argv
);
9847 waitForMultipleSwappedKeys(c
,cmd
,c
->argc
,c
->argv
);
9850 /* If the client was blocked for at least one key, mark it as blocked. */
9851 if (listLength(c
->io_keys
)) {
9852 c
->flags
|= REDIS_IO_WAIT
;
9853 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
9854 server
.vm_blocked_clients
++;
9861 /* Remove the 'key' from the list of blocked keys for a given client.
9863 * The function returns 1 when there are no longer blocking keys after
9864 * the current one was removed (and the client can be unblocked). */
9865 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
9869 struct dictEntry
*de
;
9871 /* Remove the key from the list of keys this client is waiting for. */
9872 listRewind(c
->io_keys
,&li
);
9873 while ((ln
= listNext(&li
)) != NULL
) {
9874 if (equalStringObjects(ln
->value
,key
)) {
9875 listDelNode(c
->io_keys
,ln
);
9881 /* Remove the client form the key => waiting clients map. */
9882 de
= dictFind(c
->db
->io_keys
,key
);
9884 l
= dictGetEntryVal(de
);
9885 ln
= listSearchKey(l
,c
);
9888 if (listLength(l
) == 0)
9889 dictDelete(c
->db
->io_keys
,key
);
9891 return listLength(c
->io_keys
) == 0;
9894 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
9895 struct dictEntry
*de
;
9900 de
= dictFind(db
->io_keys
,key
);
9903 l
= dictGetEntryVal(de
);
9904 len
= listLength(l
);
9905 /* Note: we can't use something like while(listLength(l)) as the list
9906 * can be freed by the calling function when we remove the last element. */
9909 redisClient
*c
= ln
->value
;
9911 if (dontWaitForSwappedKey(c
,key
)) {
9912 /* Put the client in the list of clients ready to go as we
9913 * loaded all the keys about it. */
9914 listAddNodeTail(server
.io_ready_clients
,c
);
9919 /* =========================== Remote Configuration ========================= */
9921 static void configSetCommand(redisClient
*c
) {
9922 robj
*o
= getDecodedObject(c
->argv
[3]);
9925 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
9926 zfree(server
.dbfilename
);
9927 server
.dbfilename
= zstrdup(o
->ptr
);
9928 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
9929 zfree(server
.requirepass
);
9930 server
.requirepass
= zstrdup(o
->ptr
);
9931 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
9932 zfree(server
.masterauth
);
9933 server
.masterauth
= zstrdup(o
->ptr
);
9934 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
9935 if (getLongLongFromObject(o
,&ll
) == REDIS_ERR
||
9936 ll
< 0) goto badfmt
;
9937 server
.maxmemory
= ll
;
9938 } else if (!strcasecmp(c
->argv
[2]->ptr
,"timeout")) {
9939 if (getLongLongFromObject(o
,&ll
) == REDIS_ERR
||
9940 ll
< 0 || ll
> LONG_MAX
) goto badfmt
;
9941 server
.maxidletime
= ll
;
9942 } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendfsync")) {
9943 if (!strcasecmp(o
->ptr
,"no")) {
9944 server
.appendfsync
= APPENDFSYNC_NO
;
9945 } else if (!strcasecmp(o
->ptr
,"everysec")) {
9946 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
9947 } else if (!strcasecmp(o
->ptr
,"always")) {
9948 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
9952 } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendonly")) {
9953 int old
= server
.appendonly
;
9954 int new = yesnotoi(o
->ptr
);
9956 if (new == -1) goto badfmt
;
9961 if (startAppendOnly() == REDIS_ERR
) {
9962 addReplySds(c
,sdscatprintf(sdsempty(),
9963 "-ERR Unable to turn on AOF. Check server logs.\r\n"));
9969 } else if (!strcasecmp(c
->argv
[2]->ptr
,"save")) {
9971 sds
*v
= sdssplitlen(o
->ptr
,sdslen(o
->ptr
)," ",1,&vlen
);
9973 /* Perform sanity check before setting the new config:
9974 * - Even number of args
9975 * - Seconds >= 1, changes >= 0 */
9977 sdsfreesplitres(v
,vlen
);
9980 for (j
= 0; j
< vlen
; j
++) {
9984 val
= strtoll(v
[j
], &eptr
, 10);
9985 if (eptr
[0] != '\0' ||
9986 ((j
& 1) == 0 && val
< 1) ||
9987 ((j
& 1) == 1 && val
< 0)) {
9988 sdsfreesplitres(v
,vlen
);
9992 /* Finally set the new config */
9993 resetServerSaveParams();
9994 for (j
= 0; j
< vlen
; j
+= 2) {
9998 seconds
= strtoll(v
[j
],NULL
,10);
9999 changes
= strtoll(v
[j
+1],NULL
,10);
10000 appendServerSaveParams(seconds
, changes
);
10002 sdsfreesplitres(v
,vlen
);
10004 addReplySds(c
,sdscatprintf(sdsempty(),
10005 "-ERR not supported CONFIG parameter %s\r\n",
10006 (char*)c
->argv
[2]->ptr
));
10011 addReply(c
,shared
.ok
);
10014 badfmt
: /* Bad format errors */
10015 addReplySds(c
,sdscatprintf(sdsempty(),
10016 "-ERR invalid argument '%s' for CONFIG SET '%s'\r\n",
10018 (char*)c
->argv
[2]->ptr
));
10022 static void configGetCommand(redisClient
*c
) {
10023 robj
*o
= getDecodedObject(c
->argv
[2]);
10024 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
10025 char *pattern
= o
->ptr
;
10028 addReply(c
,lenobj
);
10029 decrRefCount(lenobj
);
10031 if (stringmatch(pattern
,"dbfilename",0)) {
10032 addReplyBulkCString(c
,"dbfilename");
10033 addReplyBulkCString(c
,server
.dbfilename
);
10036 if (stringmatch(pattern
,"requirepass",0)) {
10037 addReplyBulkCString(c
,"requirepass");
10038 addReplyBulkCString(c
,server
.requirepass
);
10041 if (stringmatch(pattern
,"masterauth",0)) {
10042 addReplyBulkCString(c
,"masterauth");
10043 addReplyBulkCString(c
,server
.masterauth
);
10046 if (stringmatch(pattern
,"maxmemory",0)) {
10049 ll2string(buf
,128,server
.maxmemory
);
10050 addReplyBulkCString(c
,"maxmemory");
10051 addReplyBulkCString(c
,buf
);
10054 if (stringmatch(pattern
,"timeout",0)) {
10057 ll2string(buf
,128,server
.maxidletime
);
10058 addReplyBulkCString(c
,"timeout");
10059 addReplyBulkCString(c
,buf
);
10062 if (stringmatch(pattern
,"appendonly",0)) {
10063 addReplyBulkCString(c
,"appendonly");
10064 addReplyBulkCString(c
,server
.appendonly
? "yes" : "no");
10067 if (stringmatch(pattern
,"appendfsync",0)) {
10070 switch(server
.appendfsync
) {
10071 case APPENDFSYNC_NO
: policy
= "no"; break;
10072 case APPENDFSYNC_EVERYSEC
: policy
= "everysec"; break;
10073 case APPENDFSYNC_ALWAYS
: policy
= "always"; break;
10074 default: policy
= "unknown"; break; /* too harmless to panic */
10076 addReplyBulkCString(c
,"appendfsync");
10077 addReplyBulkCString(c
,policy
);
10080 if (stringmatch(pattern
,"save",0)) {
10081 sds buf
= sdsempty();
10084 for (j
= 0; j
< server
.saveparamslen
; j
++) {
10085 buf
= sdscatprintf(buf
,"%ld %d",
10086 server
.saveparams
[j
].seconds
,
10087 server
.saveparams
[j
].changes
);
10088 if (j
!= server
.saveparamslen
-1)
10089 buf
= sdscatlen(buf
," ",1);
10091 addReplyBulkCString(c
,"save");
10092 addReplyBulkCString(c
,buf
);
10097 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
10100 static void configCommand(redisClient
*c
) {
10101 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
10102 if (c
->argc
!= 4) goto badarity
;
10103 configSetCommand(c
);
10104 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
10105 if (c
->argc
!= 3) goto badarity
;
10106 configGetCommand(c
);
10107 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
10108 if (c
->argc
!= 2) goto badarity
;
10109 server
.stat_numcommands
= 0;
10110 server
.stat_numconnections
= 0;
10111 server
.stat_expiredkeys
= 0;
10112 server
.stat_starttime
= time(NULL
);
10113 addReply(c
,shared
.ok
);
10115 addReplySds(c
,sdscatprintf(sdsempty(),
10116 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
10121 addReplySds(c
,sdscatprintf(sdsempty(),
10122 "-ERR Wrong number of arguments for CONFIG %s\r\n",
10123 (char*) c
->argv
[1]->ptr
));
10126 /* =========================== Pubsub implementation ======================== */
10128 static void freePubsubPattern(void *p
) {
10129 pubsubPattern
*pat
= p
;
10131 decrRefCount(pat
->pattern
);
10135 static int listMatchPubsubPattern(void *a
, void *b
) {
10136 pubsubPattern
*pa
= a
, *pb
= b
;
10138 return (pa
->client
== pb
->client
) &&
10139 (equalStringObjects(pa
->pattern
,pb
->pattern
));
10142 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
10143 * 0 if the client was already subscribed to that channel. */
10144 static int pubsubSubscribeChannel(redisClient
*c
, robj
*channel
) {
10145 struct dictEntry
*de
;
10146 list
*clients
= NULL
;
10149 /* Add the channel to the client -> channels hash table */
10150 if (dictAdd(c
->pubsub_channels
,channel
,NULL
) == DICT_OK
) {
10152 incrRefCount(channel
);
10153 /* Add the client to the channel -> list of clients hash table */
10154 de
= dictFind(server
.pubsub_channels
,channel
);
10156 clients
= listCreate();
10157 dictAdd(server
.pubsub_channels
,channel
,clients
);
10158 incrRefCount(channel
);
10160 clients
= dictGetEntryVal(de
);
10162 listAddNodeTail(clients
,c
);
10164 /* Notify the client */
10165 addReply(c
,shared
.mbulk3
);
10166 addReply(c
,shared
.subscribebulk
);
10167 addReplyBulk(c
,channel
);
10168 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
10172 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
10173 * 0 if the client was not subscribed to the specified channel. */
10174 static int pubsubUnsubscribeChannel(redisClient
*c
, robj
*channel
, int notify
) {
10175 struct dictEntry
*de
;
10180 /* Remove the channel from the client -> channels hash table */
10181 incrRefCount(channel
); /* channel may be just a pointer to the same object
10182 we have in the hash tables. Protect it... */
10183 if (dictDelete(c
->pubsub_channels
,channel
) == DICT_OK
) {
10185 /* Remove the client from the channel -> clients list hash table */
10186 de
= dictFind(server
.pubsub_channels
,channel
);
10187 assert(de
!= NULL
);
10188 clients
= dictGetEntryVal(de
);
10189 ln
= listSearchKey(clients
,c
);
10190 assert(ln
!= NULL
);
10191 listDelNode(clients
,ln
);
10192 if (listLength(clients
) == 0) {
10193 /* Free the list and associated hash entry at all if this was
10194 * the latest client, so that it will be possible to abuse
10195 * Redis PUBSUB creating millions of channels. */
10196 dictDelete(server
.pubsub_channels
,channel
);
10199 /* Notify the client */
10201 addReply(c
,shared
.mbulk3
);
10202 addReply(c
,shared
.unsubscribebulk
);
10203 addReplyBulk(c
,channel
);
10204 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+
10205 listLength(c
->pubsub_patterns
));
10208 decrRefCount(channel
); /* it is finally safe to release it */
10212 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */
10213 static int pubsubSubscribePattern(redisClient
*c
, robj
*pattern
) {
10216 if (listSearchKey(c
->pubsub_patterns
,pattern
) == NULL
) {
10218 pubsubPattern
*pat
;
10219 listAddNodeTail(c
->pubsub_patterns
,pattern
);
10220 incrRefCount(pattern
);
10221 pat
= zmalloc(sizeof(*pat
));
10222 pat
->pattern
= getDecodedObject(pattern
);
10224 listAddNodeTail(server
.pubsub_patterns
,pat
);
10226 /* Notify the client */
10227 addReply(c
,shared
.mbulk3
);
10228 addReply(c
,shared
.psubscribebulk
);
10229 addReplyBulk(c
,pattern
);
10230 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
10234 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
10235 * 0 if the client was not subscribed to the specified channel. */
10236 static int pubsubUnsubscribePattern(redisClient
*c
, robj
*pattern
, int notify
) {
10241 incrRefCount(pattern
); /* Protect the object. May be the same we remove */
10242 if ((ln
= listSearchKey(c
->pubsub_patterns
,pattern
)) != NULL
) {
10244 listDelNode(c
->pubsub_patterns
,ln
);
10246 pat
.pattern
= pattern
;
10247 ln
= listSearchKey(server
.pubsub_patterns
,&pat
);
10248 listDelNode(server
.pubsub_patterns
,ln
);
10250 /* Notify the client */
10252 addReply(c
,shared
.mbulk3
);
10253 addReply(c
,shared
.punsubscribebulk
);
10254 addReplyBulk(c
,pattern
);
10255 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+
10256 listLength(c
->pubsub_patterns
));
10258 decrRefCount(pattern
);
10262 /* Unsubscribe from all the channels. Return the number of channels the
10263 * client was subscribed from. */
10264 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
) {
10265 dictIterator
*di
= dictGetIterator(c
->pubsub_channels
);
10269 while((de
= dictNext(di
)) != NULL
) {
10270 robj
*channel
= dictGetEntryKey(de
);
10272 count
+= pubsubUnsubscribeChannel(c
,channel
,notify
);
10274 dictReleaseIterator(di
);
10278 /* Unsubscribe from all the patterns. Return the number of patterns the
10279 * client was subscribed from. */
10280 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
) {
10285 listRewind(c
->pubsub_patterns
,&li
);
10286 while ((ln
= listNext(&li
)) != NULL
) {
10287 robj
*pattern
= ln
->value
;
10289 count
+= pubsubUnsubscribePattern(c
,pattern
,notify
);
10294 /* Publish a message */
10295 static int pubsubPublishMessage(robj
*channel
, robj
*message
) {
10297 struct dictEntry
*de
;
10301 /* Send to clients listening for that channel */
10302 de
= dictFind(server
.pubsub_channels
,channel
);
10304 list
*list
= dictGetEntryVal(de
);
10308 listRewind(list
,&li
);
10309 while ((ln
= listNext(&li
)) != NULL
) {
10310 redisClient
*c
= ln
->value
;
10312 addReply(c
,shared
.mbulk3
);
10313 addReply(c
,shared
.messagebulk
);
10314 addReplyBulk(c
,channel
);
10315 addReplyBulk(c
,message
);
10319 /* Send to clients listening to matching channels */
10320 if (listLength(server
.pubsub_patterns
)) {
10321 listRewind(server
.pubsub_patterns
,&li
);
10322 channel
= getDecodedObject(channel
);
10323 while ((ln
= listNext(&li
)) != NULL
) {
10324 pubsubPattern
*pat
= ln
->value
;
10326 if (stringmatchlen((char*)pat
->pattern
->ptr
,
10327 sdslen(pat
->pattern
->ptr
),
10328 (char*)channel
->ptr
,
10329 sdslen(channel
->ptr
),0)) {
10330 addReply(pat
->client
,shared
.mbulk4
);
10331 addReply(pat
->client
,shared
.pmessagebulk
);
10332 addReplyBulk(pat
->client
,pat
->pattern
);
10333 addReplyBulk(pat
->client
,channel
);
10334 addReplyBulk(pat
->client
,message
);
10338 decrRefCount(channel
);
10343 static void subscribeCommand(redisClient
*c
) {
10346 for (j
= 1; j
< c
->argc
; j
++)
10347 pubsubSubscribeChannel(c
,c
->argv
[j
]);
10350 static void unsubscribeCommand(redisClient
*c
) {
10351 if (c
->argc
== 1) {
10352 pubsubUnsubscribeAllChannels(c
,1);
10357 for (j
= 1; j
< c
->argc
; j
++)
10358 pubsubUnsubscribeChannel(c
,c
->argv
[j
],1);
10362 static void psubscribeCommand(redisClient
*c
) {
10365 for (j
= 1; j
< c
->argc
; j
++)
10366 pubsubSubscribePattern(c
,c
->argv
[j
]);
10369 static void punsubscribeCommand(redisClient
*c
) {
10370 if (c
->argc
== 1) {
10371 pubsubUnsubscribeAllPatterns(c
,1);
10376 for (j
= 1; j
< c
->argc
; j
++)
10377 pubsubUnsubscribePattern(c
,c
->argv
[j
],1);
10381 static void publishCommand(redisClient
*c
) {
10382 int receivers
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]);
10383 addReplyLongLong(c
,receivers
);
10386 /* ===================== WATCH (CAS alike for MULTI/EXEC) ===================
10388 * The implementation uses a per-DB hash table mapping keys to list of clients
10389 * WATCHing those keys, so that given a key that is going to be modified
10390 * we can mark all the associated clients as dirty.
10392 * Also every client contains a list of WATCHed keys so that's possible to
10393 * un-watch such keys when the client is freed or when UNWATCH is called. */
10395 /* In the client->watched_keys list we need to use watchedKey structures
10396 * as in order to identify a key in Redis we need both the key name and the
10398 typedef struct watchedKey
{
10403 /* Watch for the specified key */
10404 static void watchForKey(redisClient
*c
, robj
*key
) {
10405 list
*clients
= NULL
;
10410 /* Check if we are already watching for this key */
10411 listRewind(c
->watched_keys
,&li
);
10412 while((ln
= listNext(&li
))) {
10413 wk
= listNodeValue(ln
);
10414 if (wk
->db
== c
->db
&& equalStringObjects(key
,wk
->key
))
10415 return; /* Key already watched */
10417 /* This key is not already watched in this DB. Let's add it */
10418 clients
= dictFetchValue(c
->db
->watched_keys
,key
);
10420 clients
= listCreate();
10421 dictAdd(c
->db
->watched_keys
,key
,clients
);
10424 listAddNodeTail(clients
,c
);
10425 /* Add the new key to the lits of keys watched by this client */
10426 wk
= zmalloc(sizeof(*wk
));
10430 listAddNodeTail(c
->watched_keys
,wk
);
10433 /* Unwatch all the keys watched by this client. To clean the EXEC dirty
10434 * flag is up to the caller. */
10435 static void unwatchAllKeys(redisClient
*c
) {
10439 if (listLength(c
->watched_keys
) == 0) return;
10440 listRewind(c
->watched_keys
,&li
);
10441 while((ln
= listNext(&li
))) {
10445 /* Lookup the watched key -> clients list and remove the client
10447 wk
= listNodeValue(ln
);
10448 clients
= dictFetchValue(wk
->db
->watched_keys
, wk
->key
);
10449 assert(clients
!= NULL
);
10450 listDelNode(clients
,listSearchKey(clients
,c
));
10451 /* Kill the entry at all if this was the only client */
10452 if (listLength(clients
) == 0)
10453 dictDelete(wk
->db
->watched_keys
, wk
->key
);
10454 /* Remove this watched key from the client->watched list */
10455 listDelNode(c
->watched_keys
,ln
);
10456 decrRefCount(wk
->key
);
10461 /* "Touch" a key, so that if this key is being WATCHed by soem client the
10462 * next EXEC will fail. */
10463 static void touchWatchedKey(redisDb
*db
, robj
*key
) {
10468 if (dictSize(db
->watched_keys
) == 0) return;
10469 clients
= dictFetchValue(db
->watched_keys
, key
);
10470 if (!clients
) return;
10472 /* Mark all the clients watching this key as REDIS_DIRTY_CAS */
10473 /* Check if we are already watching for this key */
10474 listRewind(clients
,&li
);
10475 while((ln
= listNext(&li
))) {
10476 redisClient
*c
= listNodeValue(ln
);
10478 c
->flags
|= REDIS_DIRTY_CAS
;
10482 /* On FLUSHDB or FLUSHALL all the watched keys that are present before the
10483 * flush but will be deleted as effect of the flushing operation should
10484 * be touched. "dbid" is the DB that's getting the flush. -1 if it is
10485 * a FLUSHALL operation (all the DBs flushed). */
10486 static void touchWatchedKeysOnFlush(int dbid
) {
10490 /* For every client, check all the waited keys */
10491 listRewind(server
.clients
,&li1
);
10492 while((ln
= listNext(&li1
))) {
10493 redisClient
*c
= listNodeValue(ln
);
10494 listRewind(c
->watched_keys
,&li2
);
10495 while((ln
= listNext(&li2
))) {
10496 watchedKey
*wk
= listNodeValue(ln
);
10498 /* For every watched key matching the specified DB, if the
10499 * key exists, mark the client as dirty, as the key will be
10501 if (dbid
== -1 || wk
->db
->id
== dbid
) {
10502 if (dictFind(wk
->db
->dict
, wk
->key
) != NULL
)
10503 c
->flags
|= REDIS_DIRTY_CAS
;
10509 static void watchCommand(redisClient
*c
) {
10512 for (j
= 1; j
< c
->argc
; j
++)
10513 watchForKey(c
,c
->argv
[j
]);
10514 addReply(c
,shared
.ok
);
10517 static void unwatchCommand(redisClient
*c
) {
10519 c
->flags
&= (~REDIS_DIRTY_CAS
);
10520 addReply(c
,shared
.ok
);
10523 /* ================================= Debugging ============================== */
10525 /* Compute the sha1 of string at 's' with 'len' bytes long.
10526 * The SHA1 is then xored againt the string pointed by digest.
10527 * Since xor is commutative, this operation is used in order to
10528 * "add" digests relative to unordered elements.
10530 * So digest(a,b,c,d) will be the same of digest(b,a,c,d) */
10531 static void xorDigest(unsigned char *digest
, void *ptr
, size_t len
) {
10533 unsigned char hash
[20], *s
= ptr
;
10537 SHA1Update(&ctx
,s
,len
);
10538 SHA1Final(hash
,&ctx
);
10540 for (j
= 0; j
< 20; j
++)
10541 digest
[j
] ^= hash
[j
];
10544 static void xorObjectDigest(unsigned char *digest
, robj
*o
) {
10545 o
= getDecodedObject(o
);
10546 xorDigest(digest
,o
->ptr
,sdslen(o
->ptr
));
10550 /* This function instead of just computing the SHA1 and xoring it
10551 * against diget, also perform the digest of "digest" itself and
10552 * replace the old value with the new one.
10554 * So the final digest will be:
10556 * digest = SHA1(digest xor SHA1(data))
10558 * This function is used every time we want to preserve the order so
10559 * that digest(a,b,c,d) will be different than digest(b,c,d,a)
10561 * Also note that mixdigest("foo") followed by mixdigest("bar")
10562 * will lead to a different digest compared to "fo", "obar".
10564 static void mixDigest(unsigned char *digest
, void *ptr
, size_t len
) {
10568 xorDigest(digest
,s
,len
);
10570 SHA1Update(&ctx
,digest
,20);
10571 SHA1Final(digest
,&ctx
);
10574 static void mixObjectDigest(unsigned char *digest
, robj
*o
) {
10575 o
= getDecodedObject(o
);
10576 mixDigest(digest
,o
->ptr
,sdslen(o
->ptr
));
10580 /* Compute the dataset digest. Since keys, sets elements, hashes elements
10581 * are not ordered, we use a trick: every aggregate digest is the xor
10582 * of the digests of their elements. This way the order will not change
10583 * the result. For list instead we use a feedback entering the output digest
10584 * as input in order to ensure that a different ordered list will result in
10585 * a different digest. */
10586 static void computeDatasetDigest(unsigned char *final
) {
10587 unsigned char digest
[20];
10589 dictIterator
*di
= NULL
;
10594 memset(final
,0,20); /* Start with a clean result */
10596 for (j
= 0; j
< server
.dbnum
; j
++) {
10597 redisDb
*db
= server
.db
+j
;
10599 if (dictSize(db
->dict
) == 0) continue;
10600 di
= dictGetIterator(db
->dict
);
10602 /* hash the DB id, so the same dataset moved in a different
10603 * DB will lead to a different digest */
10605 mixDigest(final
,&aux
,sizeof(aux
));
10607 /* Iterate this DB writing every entry */
10608 while((de
= dictNext(di
)) != NULL
) {
10609 robj
*key
, *o
, *kcopy
;
10612 memset(digest
,0,20); /* This key-val digest */
10613 key
= dictGetEntryKey(de
);
10615 if (!server
.vm_enabled
) {
10616 mixObjectDigest(digest
,key
);
10617 o
= dictGetEntryVal(de
);
10619 /* Don't work with the key directly as when VM is active
10620 * this is unsafe: TODO: fix decrRefCount to check if the
10621 * count really reached 0 to avoid this mess */
10622 kcopy
= dupStringObject(key
);
10623 mixObjectDigest(digest
,kcopy
);
10624 o
= lookupKeyRead(db
,kcopy
);
10625 decrRefCount(kcopy
);
10627 aux
= htonl(o
->type
);
10628 mixDigest(digest
,&aux
,sizeof(aux
));
10629 expiretime
= getExpire(db
,key
);
10631 /* Save the key and associated value */
10632 if (o
->type
== REDIS_STRING
) {
10633 mixObjectDigest(digest
,o
);
10634 } else if (o
->type
== REDIS_LIST
) {
10635 list
*list
= o
->ptr
;
10639 listRewind(list
,&li
);
10640 while((ln
= listNext(&li
))) {
10641 robj
*eleobj
= listNodeValue(ln
);
10643 mixObjectDigest(digest
,eleobj
);
10645 } else if (o
->type
== REDIS_SET
) {
10646 dict
*set
= o
->ptr
;
10647 dictIterator
*di
= dictGetIterator(set
);
10650 while((de
= dictNext(di
)) != NULL
) {
10651 robj
*eleobj
= dictGetEntryKey(de
);
10653 xorObjectDigest(digest
,eleobj
);
10655 dictReleaseIterator(di
);
10656 } else if (o
->type
== REDIS_ZSET
) {
10658 dictIterator
*di
= dictGetIterator(zs
->dict
);
10661 while((de
= dictNext(di
)) != NULL
) {
10662 robj
*eleobj
= dictGetEntryKey(de
);
10663 double *score
= dictGetEntryVal(de
);
10664 unsigned char eledigest
[20];
10666 snprintf(buf
,sizeof(buf
),"%.17g",*score
);
10667 memset(eledigest
,0,20);
10668 mixObjectDigest(eledigest
,eleobj
);
10669 mixDigest(eledigest
,buf
,strlen(buf
));
10670 xorDigest(digest
,eledigest
,20);
10672 dictReleaseIterator(di
);
10673 } else if (o
->type
== REDIS_HASH
) {
10677 hi
= hashInitIterator(o
);
10678 while (hashNext(hi
) != REDIS_ERR
) {
10679 unsigned char eledigest
[20];
10681 memset(eledigest
,0,20);
10682 obj
= hashCurrent(hi
,REDIS_HASH_KEY
);
10683 mixObjectDigest(eledigest
,obj
);
10685 obj
= hashCurrent(hi
,REDIS_HASH_VALUE
);
10686 mixObjectDigest(eledigest
,obj
);
10688 xorDigest(digest
,eledigest
,20);
10690 hashReleaseIterator(hi
);
10692 redisPanic("Unknown object type");
10694 /* If the key has an expire, add it to the mix */
10695 if (expiretime
!= -1) xorDigest(digest
,"!!expire!!",10);
10696 /* We can finally xor the key-val digest to the final digest */
10697 xorDigest(final
,digest
,20);
10699 dictReleaseIterator(di
);
10703 static void debugCommand(redisClient
*c
) {
10704 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
10705 *((char*)-1) = 'x';
10706 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
10707 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
10708 addReply(c
,shared
.err
);
10712 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
10713 addReply(c
,shared
.err
);
10716 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
10717 addReply(c
,shared
.ok
);
10718 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
10720 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
10721 addReply(c
,shared
.err
);
10724 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
10725 addReply(c
,shared
.ok
);
10726 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
10727 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
10731 addReply(c
,shared
.nokeyerr
);
10734 key
= dictGetEntryKey(de
);
10735 val
= dictGetEntryVal(de
);
10736 if (!server
.vm_enabled
|| (key
->storage
== REDIS_VM_MEMORY
||
10737 key
->storage
== REDIS_VM_SWAPPING
)) {
10741 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
10742 strenc
= strencoding
[val
->encoding
];
10744 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
10747 addReplySds(c
,sdscatprintf(sdsempty(),
10748 "+Key at:%p refcount:%d, value at:%p refcount:%d "
10749 "encoding:%s serializedlength:%lld\r\n",
10750 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
,
10751 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
10753 addReplySds(c
,sdscatprintf(sdsempty(),
10754 "+Key at:%p refcount:%d, value swapped at: page %llu "
10755 "using %llu pages\r\n",
10756 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
,
10757 (unsigned long long) key
->vm
.usedpages
));
10759 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapin") && c
->argc
== 3) {
10760 lookupKeyRead(c
->db
,c
->argv
[2]);
10761 addReply(c
,shared
.ok
);
10762 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
10763 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
10766 if (!server
.vm_enabled
) {
10767 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
10771 addReply(c
,shared
.nokeyerr
);
10774 key
= dictGetEntryKey(de
);
10775 val
= dictGetEntryVal(de
);
10776 /* If the key is shared we want to create a copy */
10777 if (key
->refcount
> 1) {
10778 robj
*newkey
= dupStringObject(key
);
10780 key
= dictGetEntryKey(de
) = newkey
;
10783 if (key
->storage
!= REDIS_VM_MEMORY
) {
10784 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
10785 } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
10786 dictGetEntryVal(de
) = NULL
;
10787 addReply(c
,shared
.ok
);
10789 addReply(c
,shared
.err
);
10791 } else if (!strcasecmp(c
->argv
[1]->ptr
,"populate") && c
->argc
== 3) {
10796 if (getLongFromObjectOrReply(c
, c
->argv
[2], &keys
, NULL
) != REDIS_OK
)
10798 for (j
= 0; j
< keys
; j
++) {
10799 snprintf(buf
,sizeof(buf
),"key:%lu",j
);
10800 key
= createStringObject(buf
,strlen(buf
));
10801 if (lookupKeyRead(c
->db
,key
) != NULL
) {
10805 snprintf(buf
,sizeof(buf
),"value:%lu",j
);
10806 val
= createStringObject(buf
,strlen(buf
));
10807 dictAdd(c
->db
->dict
,key
,val
);
10809 addReply(c
,shared
.ok
);
10810 } else if (!strcasecmp(c
->argv
[1]->ptr
,"digest") && c
->argc
== 2) {
10811 unsigned char digest
[20];
10812 sds d
= sdsnew("+");
10815 computeDatasetDigest(digest
);
10816 for (j
= 0; j
< 20; j
++)
10817 d
= sdscatprintf(d
, "%02x",digest
[j
]);
10819 d
= sdscatlen(d
,"\r\n",2);
10822 addReplySds(c
,sdsnew(
10823 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n"));
10827 static void _redisAssert(char *estr
, char *file
, int line
) {
10828 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
10829 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true",file
,line
,estr
);
10830 #ifdef HAVE_BACKTRACE
10831 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
10832 *((char*)-1) = 'x';
10836 static void _redisPanic(char *msg
, char *file
, int line
) {
10837 redisLog(REDIS_WARNING
,"!!! Software Failure. Press left mouse button to continue");
10838 redisLog(REDIS_WARNING
,"Guru Meditation: %s #%s:%d",msg
,file
,line
);
10839 #ifdef HAVE_BACKTRACE
10840 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
10841 *((char*)-1) = 'x';
10845 /* =================================== Main! ================================ */
10848 int linuxOvercommitMemoryValue(void) {
10849 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
10852 if (!fp
) return -1;
10853 if (fgets(buf
,64,fp
) == NULL
) {
10862 void linuxOvercommitMemoryWarning(void) {
10863 if (linuxOvercommitMemoryValue() == 0) {
10864 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
10867 #endif /* __linux__ */
10869 static void daemonize(void) {
10873 if (fork() != 0) exit(0); /* parent exits */
10874 setsid(); /* create a new session */
10876 /* Every output goes to /dev/null. If Redis is daemonized but
10877 * the 'logfile' is set to 'stdout' in the configuration file
10878 * it will not log at all. */
10879 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
10880 dup2(fd
, STDIN_FILENO
);
10881 dup2(fd
, STDOUT_FILENO
);
10882 dup2(fd
, STDERR_FILENO
);
10883 if (fd
> STDERR_FILENO
) close(fd
);
10885 /* Try to write the pid file */
10886 fp
= fopen(server
.pidfile
,"w");
10888 fprintf(fp
,"%d\n",getpid());
10893 static void version() {
10894 printf("Redis server version %s\n", REDIS_VERSION
);
10898 static void usage() {
10899 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
10900 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
10904 int main(int argc
, char **argv
) {
10907 initServerConfig();
10909 if (strcmp(argv
[1], "-v") == 0 ||
10910 strcmp(argv
[1], "--version") == 0) version();
10911 if (strcmp(argv
[1], "--help") == 0) usage();
10912 resetServerSaveParams();
10913 loadServerConfig(argv
[1]);
10914 } else if ((argc
> 2)) {
10917 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
10919 if (server
.daemonize
) daemonize();
10921 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
10923 linuxOvercommitMemoryWarning();
10925 start
= time(NULL
);
10926 if (server
.appendonly
) {
10927 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
10928 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
10930 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
10931 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
10933 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
10934 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
10936 aeDeleteEventLoop(server
.el
);
10940 /* ============================= Backtrace support ========================= */
10942 #ifdef HAVE_BACKTRACE
10943 static char *findFuncName(void *pointer
, unsigned long *offset
);
10945 static void *getMcontextEip(ucontext_t
*uc
) {
10946 #if defined(__FreeBSD__)
10947 return (void*) uc
->uc_mcontext
.mc_eip
;
10948 #elif defined(__dietlibc__)
10949 return (void*) uc
->uc_mcontext
.eip
;
10950 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
10952 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
10954 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
10956 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
10957 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
10958 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
10960 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
10962 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
10963 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
10964 #elif defined(__ia64__) /* Linux IA64 */
10965 return (void*) uc
->uc_mcontext
.sc_ip
;
10971 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
10973 char **messages
= NULL
;
10974 int i
, trace_size
= 0;
10975 unsigned long offset
=0;
10976 ucontext_t
*uc
= (ucontext_t
*) secret
;
10978 REDIS_NOTUSED(info
);
10980 redisLog(REDIS_WARNING
,
10981 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
10982 infostring
= genRedisInfoString();
10983 redisLog(REDIS_WARNING
, "%s",infostring
);
10984 /* It's not safe to sdsfree() the returned string under memory
10985 * corruption conditions. Let it leak as we are going to abort */
10987 trace_size
= backtrace(trace
, 100);
10988 /* overwrite sigaction with caller's address */
10989 if (getMcontextEip(uc
) != NULL
) {
10990 trace
[1] = getMcontextEip(uc
);
10992 messages
= backtrace_symbols(trace
, trace_size
);
10994 for (i
=1; i
<trace_size
; ++i
) {
10995 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
10997 p
= strchr(messages
[i
],'+');
10998 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
10999 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
11001 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
11004 /* free(messages); Don't call free() with possibly corrupted memory. */
11008 static void sigtermHandler(int sig
) {
11009 REDIS_NOTUSED(sig
);
11011 redisLog(REDIS_WARNING
,"SIGTERM received, scheduling shutting down...");
11012 server
.shutdown_asap
= 1;
11015 static void setupSigSegvAction(void) {
11016 struct sigaction act
;
11018 sigemptyset (&act
.sa_mask
);
11019 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
11020 * is used. Otherwise, sa_handler is used */
11021 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
11022 act
.sa_sigaction
= segvHandler
;
11023 sigaction (SIGSEGV
, &act
, NULL
);
11024 sigaction (SIGBUS
, &act
, NULL
);
11025 sigaction (SIGFPE
, &act
, NULL
);
11026 sigaction (SIGILL
, &act
, NULL
);
11027 sigaction (SIGBUS
, &act
, NULL
);
11029 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
;
11030 act
.sa_handler
= sigtermHandler
;
11031 sigaction (SIGTERM
, &act
, NULL
);
11035 #include "staticsymbols.h"
11036 /* This function try to convert a pointer into a function name. It's used in
11037 * oreder to provide a backtrace under segmentation fault that's able to
11038 * display functions declared as static (otherwise the backtrace is useless). */
11039 static char *findFuncName(void *pointer
, unsigned long *offset
){
11041 unsigned long off
, minoff
= 0;
11043 /* Try to match against the Symbol with the smallest offset */
11044 for (i
=0; symsTable
[i
].pointer
; i
++) {
11045 unsigned long lp
= (unsigned long) pointer
;
11047 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
11048 off
=lp
-symsTable
[i
].pointer
;
11049 if (ret
< 0 || off
< minoff
) {
11055 if (ret
== -1) return NULL
;
11057 return symsTable
[ret
].name
;
11059 #else /* HAVE_BACKTRACE */
11060 static void setupSigSegvAction(void) {
11062 #endif /* HAVE_BACKTRACE */