2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "1.3.8"
40 #define __USE_POSIX199309
47 #endif /* HAVE_BACKTRACE */
55 #include <arpa/inet.h>
59 #include <sys/resource.h>
66 #include "solarisfixes.h"
70 #include "ae.h" /* Event driven programming library */
71 #include "sds.h" /* Dynamic safe strings */
72 #include "anet.h" /* Networking the easy way */
73 #include "dict.h" /* Hash tables */
74 #include "adlist.h" /* Linked lists */
75 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
76 #include "lzf.h" /* LZF compression library */
77 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
84 /* Static server configuration */
85 #define REDIS_SERVERPORT 6379 /* TCP port */
86 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
87 #define REDIS_IOBUF_LEN 1024
88 #define REDIS_LOADBUF_LEN 1024
89 #define REDIS_STATIC_ARGS 8
90 #define REDIS_DEFAULT_DBNUM 16
91 #define REDIS_CONFIGLINE_MAX 1024
92 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
93 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
94 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* try to expire 10 keys/loop */
95 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
96 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
98 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
99 #define REDIS_WRITEV_THRESHOLD 3
100 /* Max number of iovecs used for each writev call */
101 #define REDIS_WRITEV_IOVEC_COUNT 256
103 /* Hash table parameters */
104 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
107 #define REDIS_CMD_BULK 1 /* Bulk write command */
108 #define REDIS_CMD_INLINE 2 /* Inline command */
109 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
110 this flags will return an error when the 'maxmemory' option is set in the
111 config file and the server is using more than maxmemory bytes of memory.
112 In short this commands are denied on low memory conditions. */
113 #define REDIS_CMD_DENYOOM 4
114 #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */
117 #define REDIS_STRING 0
123 /* Objects encoding. Some kind of objects like Strings and Hashes can be
124 * internally represented in multiple ways. The 'encoding' field of the object
125 * is set to one of this fields for this object. */
126 #define REDIS_ENCODING_RAW 0 /* Raw representation */
127 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
128 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
129 #define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
131 static char* strencoding
[] = {
132 "raw", "int", "zipmap", "hashtable"
135 /* Object types only used for dumping to disk */
136 #define REDIS_EXPIRETIME 253
137 #define REDIS_SELECTDB 254
138 #define REDIS_EOF 255
140 /* Defines related to the dump file format. To store 32 bits lengths for short
141 * keys requires a lot of space, so we check the most significant 2 bits of
142 * the first byte to interpreter the length:
144 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
145 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
146 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
147 * 11|000000 this means: specially encoded object will follow. The six bits
148 * number specify the kind of object that follows.
149 * See the REDIS_RDB_ENC_* defines.
151 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
152 * values, will fit inside. */
153 #define REDIS_RDB_6BITLEN 0
154 #define REDIS_RDB_14BITLEN 1
155 #define REDIS_RDB_32BITLEN 2
156 #define REDIS_RDB_ENCVAL 3
157 #define REDIS_RDB_LENERR UINT_MAX
159 /* When a length of a string object stored on disk has the first two bits
160 * set, the remaining two bits specify a special encoding for the object
161 * accordingly to the following defines: */
162 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
163 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
164 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
165 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
167 /* Virtual memory object->where field. */
168 #define REDIS_VM_MEMORY 0 /* The object is on memory */
169 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
170 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
171 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
173 /* Virtual memory static configuration stuff.
174 * Check vmFindContiguousPages() to know more about this magic numbers. */
175 #define REDIS_VM_MAX_NEAR_PAGES 65536
176 #define REDIS_VM_MAX_RANDOM_JUMP 4096
177 #define REDIS_VM_MAX_THREADS 32
178 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
179 /* The following is the *percentage* of completed I/O jobs to process when the
180 * handelr is called. While Virtual Memory I/O operations are performed by
181 * threads, this operations must be processed by the main thread when completed
182 * in order to take effect. */
183 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
186 #define REDIS_SLAVE 1 /* This client is a slave server */
187 #define REDIS_MASTER 2 /* This client is a master server */
188 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
189 #define REDIS_MULTI 8 /* This client is in a MULTI context */
190 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
191 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
193 /* Slave replication state - slave side */
194 #define REDIS_REPL_NONE 0 /* No active replication */
195 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
196 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
198 /* Slave replication state - from the point of view of master
199 * Note that in SEND_BULK and ONLINE state the slave receives new updates
200 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
201 * to start the next background saving in order to send updates to it. */
202 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
203 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
204 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
205 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
207 /* List related stuff */
211 /* Sort operations */
212 #define REDIS_SORT_GET 0
213 #define REDIS_SORT_ASC 1
214 #define REDIS_SORT_DESC 2
215 #define REDIS_SORTKEY_MAX 1024
218 #define REDIS_DEBUG 0
219 #define REDIS_VERBOSE 1
220 #define REDIS_NOTICE 2
221 #define REDIS_WARNING 3
223 /* Anti-warning macro... */
224 #define REDIS_NOTUSED(V) ((void) V)
226 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
227 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
229 /* Append only defines */
230 #define APPENDFSYNC_NO 0
231 #define APPENDFSYNC_ALWAYS 1
232 #define APPENDFSYNC_EVERYSEC 2
234 /* Hashes related defaults */
235 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
236 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
238 /* We can print the stacktrace, so our assert is defined this way: */
239 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
240 static void _redisAssert(char *estr
, char *file
, int line
);
242 /*================================= Data types ============================== */
244 /* A redis object, that is a type able to hold a string / list / set */
246 /* The VM object structure */
247 struct redisObjectVM
{
248 off_t page
; /* the page at witch the object is stored on disk */
249 off_t usedpages
; /* number of pages used on disk */
250 time_t atime
; /* Last access time */
253 /* The actual Redis Object */
254 typedef struct redisObject
{
257 unsigned char encoding
;
258 unsigned char storage
; /* If this object is a key, where is the value?
259 * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
260 unsigned char vtype
; /* If this object is a key, and value is swapped out,
261 * this is the type of the swapped out object. */
263 /* VM fields, this are only allocated if VM is active, otherwise the
264 * object allocation function will just allocate
265 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
266 * Redis without VM active will not have any overhead. */
267 struct redisObjectVM vm
;
270 /* Macro used to initalize a Redis object allocated on the stack.
271 * Note that this macro is taken near the structure definition to make sure
272 * we'll update it when the structure is changed, to avoid bugs like
273 * bug #85 introduced exactly in this way. */
274 #define initStaticStringObject(_var,_ptr) do { \
276 _var.type = REDIS_STRING; \
277 _var.encoding = REDIS_ENCODING_RAW; \
279 if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \
282 typedef struct redisDb
{
283 dict
*dict
; /* The keyspace for this DB */
284 dict
*expires
; /* Timeout of keys with a timeout set */
285 dict
*blockingkeys
; /* Keys with clients waiting for data (BLPOP) */
286 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
290 /* Client MULTI/EXEC state */
291 typedef struct multiCmd
{
294 struct redisCommand
*cmd
;
297 typedef struct multiState
{
298 multiCmd
*commands
; /* Array of MULTI commands */
299 int count
; /* Total number of MULTI commands */
302 /* With multiplexing we need to take per-clinet state.
303 * Clients are taken in a liked list. */
304 typedef struct redisClient
{
309 robj
**argv
, **mbargv
;
311 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
312 int multibulk
; /* multi bulk command format active */
315 time_t lastinteraction
; /* time of the last interaction, used for timeout */
316 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
317 int slaveseldb
; /* slave selected db, if this client is a slave */
318 int authenticated
; /* when requirepass is non-NULL */
319 int replstate
; /* replication state if this is a slave */
320 int repldbfd
; /* replication DB file descriptor */
321 long repldboff
; /* replication DB file offset */
322 off_t repldbsize
; /* replication DB file size */
323 multiState mstate
; /* MULTI/EXEC state */
324 robj
**blockingkeys
; /* The key we are waiting to terminate a blocking
325 * operation such as BLPOP. Otherwise NULL. */
326 int blockingkeysnum
; /* Number of blocking keys */
327 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
328 * is >= blockingto then the operation timed out. */
329 list
*io_keys
; /* Keys this client is waiting to be loaded from the
330 * swap file in order to continue. */
331 dict
*pubsub_channels
; /* channels a client is interested in (SUBSCRIBE) */
332 list
*pubsub_patterns
; /* patterns a client is interested in (SUBSCRIBE) */
340 /* Global server state structure */
345 long long dirty
; /* changes to DB from the last save */
347 list
*slaves
, *monitors
;
348 char neterr
[ANET_ERR_LEN
];
350 int cronloops
; /* number of times the cron function run */
351 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
352 time_t lastsave
; /* Unix time of last save succeeede */
353 /* Fields used only for stats */
354 time_t stat_starttime
; /* server start time */
355 long long stat_numcommands
; /* number of processed commands */
356 long long stat_numconnections
; /* number of connections received */
357 long long stat_expiredkeys
; /* number of expired keys */
370 pid_t bgsavechildpid
;
371 pid_t bgrewritechildpid
;
372 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
373 struct saveparam
*saveparams
;
378 char *appendfilename
;
382 /* Replication related */
387 redisClient
*master
; /* client that is master for this slave */
389 unsigned int maxclients
;
390 unsigned long long maxmemory
;
391 unsigned int blpop_blocked_clients
;
392 unsigned int vm_blocked_clients
;
393 /* Sort parameters - qsort_r() is only available under BSD so we
394 * have to take this state global, in order to pass it to sortCompare() */
398 /* Virtual memory configuration */
403 unsigned long long vm_max_memory
;
405 size_t hash_max_zipmap_entries
;
406 size_t hash_max_zipmap_value
;
407 /* Virtual memory state */
410 off_t vm_next_page
; /* Next probably empty page */
411 off_t vm_near_pages
; /* Number of pages allocated sequentially */
412 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
413 time_t unixtime
; /* Unix time sampled every second. */
414 /* Virtual memory I/O threads stuff */
415 /* An I/O thread process an element taken from the io_jobs queue and
416 * put the result of the operation in the io_done list. While the
417 * job is being processed, it's put on io_processing queue. */
418 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
419 list
*io_processing
; /* List of VM I/O jobs being processed */
420 list
*io_processed
; /* List of VM I/O jobs already processed */
421 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
422 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
423 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
424 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
425 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
426 int io_active_threads
; /* Number of running I/O threads */
427 int vm_max_threads
; /* Max number of I/O threads running at the same time */
428 /* Our main thread is blocked on the event loop, locking for sockets ready
429 * to be read or written, so when a threaded I/O operation is ready to be
430 * processed by the main thread, the I/O thread will use a unix pipe to
431 * awake the main thread. The followings are the two pipe FDs. */
432 int io_ready_pipe_read
;
433 int io_ready_pipe_write
;
434 /* Virtual memory stats */
435 unsigned long long vm_stats_used_pages
;
436 unsigned long long vm_stats_swapped_objects
;
437 unsigned long long vm_stats_swapouts
;
438 unsigned long long vm_stats_swapins
;
440 dict
*pubsub_channels
; /* Map channels to list of subscribed clients */
441 list
*pubsub_patterns
; /* A list of pubsub_patterns */
446 typedef struct pubsubPattern
{
451 typedef void redisCommandProc(redisClient
*c
);
452 struct redisCommand
{
454 redisCommandProc
*proc
;
457 /* Use a function to determine which keys need to be loaded
458 * in the background prior to executing this command. Takes precedence
459 * over vm_firstkey and others, ignored when NULL */
460 redisCommandProc
*vm_preload_proc
;
461 /* What keys should be loaded in background when calling this command? */
462 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
463 int vm_lastkey
; /* THe last argument that's a key */
464 int vm_keystep
; /* The step between first and last key */
467 struct redisFunctionSym
{
469 unsigned long pointer
;
472 typedef struct _redisSortObject
{
480 typedef struct _redisSortOperation
{
483 } redisSortOperation
;
485 /* ZSETs use a specialized version of Skiplists */
487 typedef struct zskiplistNode
{
488 struct zskiplistNode
**forward
;
489 struct zskiplistNode
*backward
;
495 typedef struct zskiplist
{
496 struct zskiplistNode
*header
, *tail
;
497 unsigned long length
;
501 typedef struct zset
{
506 /* Our shared "common" objects */
508 struct sharedObjectsStruct
{
509 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
510 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
511 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
512 *outofrangeerr
, *plus
,
513 *select0
, *select1
, *select2
, *select3
, *select4
,
514 *select5
, *select6
, *select7
, *select8
, *select9
,
515 *messagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
,
516 *psubscribebulk
, *punsubscribebulk
;
519 /* Global vars that are actally used as constants. The following double
520 * values are used for double on-disk serialization, and are initialized
521 * at runtime to avoid strange compiler optimizations. */
523 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
525 /* VM threaded I/O request message */
526 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
527 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
528 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
529 typedef struct iojob
{
530 int type
; /* Request type, REDIS_IOJOB_* */
531 redisDb
*db
;/* Redis database */
532 robj
*key
; /* This I/O request is about swapping this key */
533 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
534 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
535 off_t page
; /* Swap page where to read/write the object */
536 off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */
537 int canceled
; /* True if this command was canceled by blocking side of VM */
538 pthread_t thread
; /* ID of the thread processing this entry */
541 /*================================ Prototypes =============================== */
543 static void freeStringObject(robj
*o
);
544 static void freeListObject(robj
*o
);
545 static void freeSetObject(robj
*o
);
546 static void decrRefCount(void *o
);
547 static robj
*createObject(int type
, void *ptr
);
548 static void freeClient(redisClient
*c
);
549 static int rdbLoad(char *filename
);
550 static void addReply(redisClient
*c
, robj
*obj
);
551 static void addReplySds(redisClient
*c
, sds s
);
552 static void incrRefCount(robj
*o
);
553 static int rdbSaveBackground(char *filename
);
554 static robj
*createStringObject(char *ptr
, size_t len
);
555 static robj
*dupStringObject(robj
*o
);
556 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
);
557 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
558 static int syncWithMaster(void);
559 static int tryObjectEncoding(robj
*o
);
560 static robj
*getDecodedObject(robj
*o
);
561 static int removeExpire(redisDb
*db
, robj
*key
);
562 static int expireIfNeeded(redisDb
*db
, robj
*key
);
563 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
564 static int deleteIfSwapped(redisDb
*db
, robj
*key
);
565 static int deleteKey(redisDb
*db
, robj
*key
);
566 static time_t getExpire(redisDb
*db
, robj
*key
);
567 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
568 static void updateSlavesWaitingBgsave(int bgsaveerr
);
569 static void freeMemoryIfNeeded(void);
570 static int processCommand(redisClient
*c
);
571 static void setupSigSegvAction(void);
572 static void rdbRemoveTempFile(pid_t childpid
);
573 static void aofRemoveTempFile(pid_t childpid
);
574 static size_t stringObjectLen(robj
*o
);
575 static void processInputBuffer(redisClient
*c
);
576 static zskiplist
*zslCreate(void);
577 static void zslFree(zskiplist
*zsl
);
578 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
579 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
580 static void initClientMultiState(redisClient
*c
);
581 static void freeClientMultiState(redisClient
*c
);
582 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
583 static void unblockClientWaitingData(redisClient
*c
);
584 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
585 static void vmInit(void);
586 static void vmMarkPagesFree(off_t page
, off_t count
);
587 static robj
*vmLoadObject(robj
*key
);
588 static robj
*vmPreviewObject(robj
*key
);
589 static int vmSwapOneObjectBlocking(void);
590 static int vmSwapOneObjectThreaded(void);
591 static int vmCanSwapOut(void);
592 static int tryFreeOneObjectFromFreelist(void);
593 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
594 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
595 static void vmCancelThreadedIOJob(robj
*o
);
596 static void lockThreadedIO(void);
597 static void unlockThreadedIO(void);
598 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
599 static void freeIOJob(iojob
*j
);
600 static void queueIOJob(iojob
*j
);
601 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
602 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
603 static void waitEmptyIOJobsQueue(void);
604 static void vmReopenSwapFile(void);
605 static int vmFreePage(off_t page
);
606 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
);
607 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
);
608 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
609 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
610 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
611 static struct redisCommand
*lookupCommand(char *name
);
612 static void call(redisClient
*c
, struct redisCommand
*cmd
);
613 static void resetClient(redisClient
*c
);
614 static void convertToRealHash(robj
*o
);
615 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
);
616 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
);
617 static void freePubsubPattern(void *p
);
618 static int listMatchPubsubPattern(void *a
, void *b
);
619 static int compareStringObjects(robj
*a
, robj
*b
);
622 static void authCommand(redisClient
*c
);
623 static void pingCommand(redisClient
*c
);
624 static void echoCommand(redisClient
*c
);
625 static void setCommand(redisClient
*c
);
626 static void setnxCommand(redisClient
*c
);
627 static void getCommand(redisClient
*c
);
628 static void delCommand(redisClient
*c
);
629 static void existsCommand(redisClient
*c
);
630 static void incrCommand(redisClient
*c
);
631 static void decrCommand(redisClient
*c
);
632 static void incrbyCommand(redisClient
*c
);
633 static void decrbyCommand(redisClient
*c
);
634 static void selectCommand(redisClient
*c
);
635 static void randomkeyCommand(redisClient
*c
);
636 static void keysCommand(redisClient
*c
);
637 static void dbsizeCommand(redisClient
*c
);
638 static void lastsaveCommand(redisClient
*c
);
639 static void saveCommand(redisClient
*c
);
640 static void bgsaveCommand(redisClient
*c
);
641 static void bgrewriteaofCommand(redisClient
*c
);
642 static void shutdownCommand(redisClient
*c
);
643 static void moveCommand(redisClient
*c
);
644 static void renameCommand(redisClient
*c
);
645 static void renamenxCommand(redisClient
*c
);
646 static void lpushCommand(redisClient
*c
);
647 static void rpushCommand(redisClient
*c
);
648 static void lpopCommand(redisClient
*c
);
649 static void rpopCommand(redisClient
*c
);
650 static void llenCommand(redisClient
*c
);
651 static void lindexCommand(redisClient
*c
);
652 static void lrangeCommand(redisClient
*c
);
653 static void ltrimCommand(redisClient
*c
);
654 static void typeCommand(redisClient
*c
);
655 static void lsetCommand(redisClient
*c
);
656 static void saddCommand(redisClient
*c
);
657 static void sremCommand(redisClient
*c
);
658 static void smoveCommand(redisClient
*c
);
659 static void sismemberCommand(redisClient
*c
);
660 static void scardCommand(redisClient
*c
);
661 static void spopCommand(redisClient
*c
);
662 static void srandmemberCommand(redisClient
*c
);
663 static void sinterCommand(redisClient
*c
);
664 static void sinterstoreCommand(redisClient
*c
);
665 static void sunionCommand(redisClient
*c
);
666 static void sunionstoreCommand(redisClient
*c
);
667 static void sdiffCommand(redisClient
*c
);
668 static void sdiffstoreCommand(redisClient
*c
);
669 static void syncCommand(redisClient
*c
);
670 static void flushdbCommand(redisClient
*c
);
671 static void flushallCommand(redisClient
*c
);
672 static void sortCommand(redisClient
*c
);
673 static void lremCommand(redisClient
*c
);
674 static void rpoplpushcommand(redisClient
*c
);
675 static void infoCommand(redisClient
*c
);
676 static void mgetCommand(redisClient
*c
);
677 static void monitorCommand(redisClient
*c
);
678 static void expireCommand(redisClient
*c
);
679 static void expireatCommand(redisClient
*c
);
680 static void getsetCommand(redisClient
*c
);
681 static void ttlCommand(redisClient
*c
);
682 static void slaveofCommand(redisClient
*c
);
683 static void debugCommand(redisClient
*c
);
684 static void msetCommand(redisClient
*c
);
685 static void msetnxCommand(redisClient
*c
);
686 static void zaddCommand(redisClient
*c
);
687 static void zincrbyCommand(redisClient
*c
);
688 static void zrangeCommand(redisClient
*c
);
689 static void zrangebyscoreCommand(redisClient
*c
);
690 static void zcountCommand(redisClient
*c
);
691 static void zrevrangeCommand(redisClient
*c
);
692 static void zcardCommand(redisClient
*c
);
693 static void zremCommand(redisClient
*c
);
694 static void zscoreCommand(redisClient
*c
);
695 static void zremrangebyscoreCommand(redisClient
*c
);
696 static void multiCommand(redisClient
*c
);
697 static void execCommand(redisClient
*c
);
698 static void discardCommand(redisClient
*c
);
699 static void blpopCommand(redisClient
*c
);
700 static void brpopCommand(redisClient
*c
);
701 static void appendCommand(redisClient
*c
);
702 static void substrCommand(redisClient
*c
);
703 static void zrankCommand(redisClient
*c
);
704 static void zrevrankCommand(redisClient
*c
);
705 static void hsetCommand(redisClient
*c
);
706 static void hgetCommand(redisClient
*c
);
707 static void hdelCommand(redisClient
*c
);
708 static void hlenCommand(redisClient
*c
);
709 static void zremrangebyrankCommand(redisClient
*c
);
710 static void zunionCommand(redisClient
*c
);
711 static void zinterCommand(redisClient
*c
);
712 static void hkeysCommand(redisClient
*c
);
713 static void hvalsCommand(redisClient
*c
);
714 static void hgetallCommand(redisClient
*c
);
715 static void hexistsCommand(redisClient
*c
);
716 static void configCommand(redisClient
*c
);
717 static void hincrbyCommand(redisClient
*c
);
718 static void subscribeCommand(redisClient
*c
);
719 static void unsubscribeCommand(redisClient
*c
);
720 static void psubscribeCommand(redisClient
*c
);
721 static void punsubscribeCommand(redisClient
*c
);
722 static void publishCommand(redisClient
*c
);
724 /*================================= Globals ================================= */
727 static struct redisServer server
; /* server global state */
728 static struct redisCommand cmdTable
[] = {
729 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
730 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
731 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
732 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
733 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
734 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
735 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
736 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
737 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
738 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
739 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
740 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
741 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
742 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
743 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
744 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
745 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
746 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
747 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
748 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
749 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
750 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
751 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
752 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
753 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
754 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
755 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
756 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
757 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
758 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
759 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
760 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
761 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
762 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
763 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
764 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
765 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
766 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
767 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
768 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
769 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
770 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
771 {"zunion",zunionCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
772 {"zinter",zinterCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
773 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
774 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
775 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
776 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
777 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
778 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
779 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
780 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
781 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
782 {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
783 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
784 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
785 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
786 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
787 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
788 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
789 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
790 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
791 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
792 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
793 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
794 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
795 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
796 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
797 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
798 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
799 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
800 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
801 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
802 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
803 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
804 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
805 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
806 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
807 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
808 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
809 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
810 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
811 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
812 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
813 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
814 {"exec",execCommand
,1,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
815 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
816 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
817 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
818 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
819 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
820 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
821 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
822 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
823 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
824 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
825 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
826 {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
827 {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
828 {"psubscribe",psubscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
829 {"punsubscribe",punsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
830 {"publish",publishCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_FORCE_REPLICATION
,NULL
,0,0,0},
831 {NULL
,NULL
,0,0,NULL
,0,0,0}
834 /*============================ Utility functions ============================ */
836 /* Glob-style pattern matching. */
837 static int stringmatchlen(const char *pattern
, int patternLen
,
838 const char *string
, int stringLen
, int nocase
)
843 while (pattern
[1] == '*') {
848 return 1; /* match */
850 if (stringmatchlen(pattern
+1, patternLen
-1,
851 string
, stringLen
, nocase
))
852 return 1; /* match */
856 return 0; /* no match */
860 return 0; /* no match */
870 not = pattern
[0] == '^';
877 if (pattern
[0] == '\\') {
880 if (pattern
[0] == string
[0])
882 } else if (pattern
[0] == ']') {
884 } else if (patternLen
== 0) {
888 } else if (pattern
[1] == '-' && patternLen
>= 3) {
889 int start
= pattern
[0];
890 int end
= pattern
[2];
898 start
= tolower(start
);
904 if (c
>= start
&& c
<= end
)
908 if (pattern
[0] == string
[0])
911 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
921 return 0; /* no match */
927 if (patternLen
>= 2) {
934 if (pattern
[0] != string
[0])
935 return 0; /* no match */
937 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
938 return 0; /* no match */
946 if (stringLen
== 0) {
947 while(*pattern
== '*') {
954 if (patternLen
== 0 && stringLen
== 0)
959 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
960 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
963 static void redisLog(int level
, const char *fmt
, ...) {
967 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
971 if (level
>= server
.verbosity
) {
977 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
978 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
979 vfprintf(fp
, fmt
, ap
);
985 if (server
.logfile
) fclose(fp
);
988 /*====================== Hash table type implementation ==================== */
990 /* This is an hash table type that uses the SDS dynamic strings libary as
991 * keys and radis objects as values (objects can hold SDS strings,
994 static void dictVanillaFree(void *privdata
, void *val
)
996 DICT_NOTUSED(privdata
);
1000 static void dictListDestructor(void *privdata
, void *val
)
1002 DICT_NOTUSED(privdata
);
1003 listRelease((list
*)val
);
1006 static int sdsDictKeyCompare(void *privdata
, const void *key1
,
1010 DICT_NOTUSED(privdata
);
1012 l1
= sdslen((sds
)key1
);
1013 l2
= sdslen((sds
)key2
);
1014 if (l1
!= l2
) return 0;
1015 return memcmp(key1
, key2
, l1
) == 0;
1018 static void dictRedisObjectDestructor(void *privdata
, void *val
)
1020 DICT_NOTUSED(privdata
);
1022 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
1026 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1029 const robj
*o1
= key1
, *o2
= key2
;
1030 return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1033 static unsigned int dictObjHash(const void *key
) {
1034 const robj
*o
= key
;
1035 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1038 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1041 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1044 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1045 o2
->encoding
== REDIS_ENCODING_INT
&&
1046 o1
->ptr
== o2
->ptr
) return 1;
1048 o1
= getDecodedObject(o1
);
1049 o2
= getDecodedObject(o2
);
1050 cmp
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1056 static unsigned int dictEncObjHash(const void *key
) {
1057 robj
*o
= (robj
*) key
;
1059 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1060 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1062 if (o
->encoding
== REDIS_ENCODING_INT
) {
1066 len
= snprintf(buf
,32,"%ld",(long)o
->ptr
);
1067 return dictGenHashFunction((unsigned char*)buf
, len
);
1071 o
= getDecodedObject(o
);
1072 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1079 /* Sets type and expires */
1080 static dictType setDictType
= {
1081 dictEncObjHash
, /* hash function */
1084 dictEncObjKeyCompare
, /* key compare */
1085 dictRedisObjectDestructor
, /* key destructor */
1086 NULL
/* val destructor */
1089 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1090 static dictType zsetDictType
= {
1091 dictEncObjHash
, /* hash function */
1094 dictEncObjKeyCompare
, /* key compare */
1095 dictRedisObjectDestructor
, /* key destructor */
1096 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1100 static dictType dbDictType
= {
1101 dictObjHash
, /* hash function */
1104 dictObjKeyCompare
, /* key compare */
1105 dictRedisObjectDestructor
, /* key destructor */
1106 dictRedisObjectDestructor
/* val destructor */
1110 static dictType keyptrDictType
= {
1111 dictObjHash
, /* hash function */
1114 dictObjKeyCompare
, /* key compare */
1115 dictRedisObjectDestructor
, /* key destructor */
1116 NULL
/* val destructor */
1119 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1120 static dictType hashDictType
= {
1121 dictEncObjHash
, /* hash function */
1124 dictEncObjKeyCompare
, /* key compare */
1125 dictRedisObjectDestructor
, /* key destructor */
1126 dictRedisObjectDestructor
/* val destructor */
1129 /* Keylist hash table type has unencoded redis objects as keys and
1130 * lists as values. It's used for blocking operations (BLPOP) and to
1131 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1132 static dictType keylistDictType
= {
1133 dictObjHash
, /* hash function */
1136 dictObjKeyCompare
, /* key compare */
1137 dictRedisObjectDestructor
, /* key destructor */
1138 dictListDestructor
/* val destructor */
1141 static void version();
1143 /* ========================= Random utility functions ======================= */
1145 /* Redis generally does not try to recover from out of memory conditions
1146 * when allocating objects or strings, it is not clear if it will be possible
1147 * to report this condition to the client since the networking layer itself
1148 * is based on heap allocation for send buffers, so we simply abort.
1149 * At least the code will be simpler to read... */
1150 static void oom(const char *msg
) {
1151 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1156 /* ====================== Redis server networking stuff ===================== */
1157 static void closeTimedoutClients(void) {
1160 time_t now
= time(NULL
);
1163 listRewind(server
.clients
,&li
);
1164 while ((ln
= listNext(&li
)) != NULL
) {
1165 c
= listNodeValue(ln
);
1166 if (server
.maxidletime
&&
1167 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1168 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1169 dictSize(c
->pubsub_channels
) == 0 && /* no timeout for pubsub */
1170 listLength(c
->pubsub_patterns
) == 0 &&
1171 (now
- c
->lastinteraction
> server
.maxidletime
))
1173 redisLog(REDIS_VERBOSE
,"Closing idle client");
1175 } else if (c
->flags
& REDIS_BLOCKED
) {
1176 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1177 addReply(c
,shared
.nullmultibulk
);
1178 unblockClientWaitingData(c
);
1184 static int htNeedsResize(dict
*dict
) {
1185 long long size
, used
;
1187 size
= dictSlots(dict
);
1188 used
= dictSize(dict
);
1189 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1190 (used
*100/size
< REDIS_HT_MINFILL
));
1193 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1194 * we resize the hash table to save memory */
1195 static void tryResizeHashTables(void) {
1198 for (j
= 0; j
< server
.dbnum
; j
++) {
1199 if (htNeedsResize(server
.db
[j
].dict
)) {
1200 redisLog(REDIS_VERBOSE
,"The hash table %d is too sparse, resize it...",j
);
1201 dictResize(server
.db
[j
].dict
);
1202 redisLog(REDIS_VERBOSE
,"Hash table %d resized.",j
);
1204 if (htNeedsResize(server
.db
[j
].expires
))
1205 dictResize(server
.db
[j
].expires
);
1209 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1210 void backgroundSaveDoneHandler(int statloc
) {
1211 int exitcode
= WEXITSTATUS(statloc
);
1212 int bysignal
= WIFSIGNALED(statloc
);
1214 if (!bysignal
&& exitcode
== 0) {
1215 redisLog(REDIS_NOTICE
,
1216 "Background saving terminated with success");
1218 server
.lastsave
= time(NULL
);
1219 } else if (!bysignal
&& exitcode
!= 0) {
1220 redisLog(REDIS_WARNING
, "Background saving error");
1222 redisLog(REDIS_WARNING
,
1223 "Background saving terminated by signal %d", WTERMSIG(statloc
));
1224 rdbRemoveTempFile(server
.bgsavechildpid
);
1226 server
.bgsavechildpid
= -1;
1227 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1228 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1229 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1232 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1234 void backgroundRewriteDoneHandler(int statloc
) {
1235 int exitcode
= WEXITSTATUS(statloc
);
1236 int bysignal
= WIFSIGNALED(statloc
);
1238 if (!bysignal
&& exitcode
== 0) {
1242 redisLog(REDIS_NOTICE
,
1243 "Background append only file rewriting terminated with success");
1244 /* Now it's time to flush the differences accumulated by the parent */
1245 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1246 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1248 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1251 /* Flush our data... */
1252 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1253 (signed) sdslen(server
.bgrewritebuf
)) {
1254 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1258 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1259 /* Now our work is to rename the temp file into the stable file. And
1260 * switch the file descriptor used by the server for append only. */
1261 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1262 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1266 /* Mission completed... almost */
1267 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1268 if (server
.appendfd
!= -1) {
1269 /* If append only is actually enabled... */
1270 close(server
.appendfd
);
1271 server
.appendfd
= fd
;
1273 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1274 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1276 /* If append only is disabled we just generate a dump in this
1277 * format. Why not? */
1280 } else if (!bysignal
&& exitcode
!= 0) {
1281 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1283 redisLog(REDIS_WARNING
,
1284 "Background append only file rewriting terminated by signal %d",
1288 sdsfree(server
.bgrewritebuf
);
1289 server
.bgrewritebuf
= sdsempty();
1290 aofRemoveTempFile(server
.bgrewritechildpid
);
1291 server
.bgrewritechildpid
= -1;
1294 /* This function is called once a background process of some kind terminates,
1295 * as we want to avoid resizing the hash tables when there is a child in order
1296 * to play well with copy-on-write (otherwise when a resize happens lots of
1297 * memory pages are copied). The goal of this function is to update the ability
1298 * for dict.c to resize the hash tables accordingly to the fact we have o not
1299 * running childs. */
1300 static void updateDictResizePolicy(void) {
1301 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1)
1304 dictDisableResize();
1307 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1308 int j
, loops
= server
.cronloops
++;
1309 REDIS_NOTUSED(eventLoop
);
1311 REDIS_NOTUSED(clientData
);
1313 /* We take a cached value of the unix time in the global state because
1314 * with virtual memory and aging there is to store the current time
1315 * in objects at every object access, and accuracy is not needed.
1316 * To access a global var is faster than calling time(NULL) */
1317 server
.unixtime
= time(NULL
);
1319 /* Show some info about non-empty databases */
1320 for (j
= 0; j
< server
.dbnum
; j
++) {
1321 long long size
, used
, vkeys
;
1323 size
= dictSlots(server
.db
[j
].dict
);
1324 used
= dictSize(server
.db
[j
].dict
);
1325 vkeys
= dictSize(server
.db
[j
].expires
);
1326 if (!(loops
% 50) && (used
|| vkeys
)) {
1327 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1328 /* dictPrintStats(server.dict); */
1332 /* We don't want to resize the hash tables while a bacground saving
1333 * is in progress: the saving child is created using fork() that is
1334 * implemented with a copy-on-write semantic in most modern systems, so
1335 * if we resize the HT while there is the saving child at work actually
1336 * a lot of memory movements in the parent will cause a lot of pages
1338 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1 &&
1341 tryResizeHashTables();
1344 /* Show information about connected clients */
1345 if (!(loops
% 50)) {
1346 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use",
1347 listLength(server
.clients
)-listLength(server
.slaves
),
1348 listLength(server
.slaves
),
1349 zmalloc_used_memory());
1352 /* Close connections of timedout clients */
1353 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1354 closeTimedoutClients();
1356 /* Check if a background saving or AOF rewrite in progress terminated */
1357 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1361 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1362 if (pid
== server
.bgsavechildpid
) {
1363 backgroundSaveDoneHandler(statloc
);
1365 backgroundRewriteDoneHandler(statloc
);
1367 updateDictResizePolicy();
1370 /* If there is not a background saving in progress check if
1371 * we have to save now */
1372 time_t now
= time(NULL
);
1373 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1374 struct saveparam
*sp
= server
.saveparams
+j
;
1376 if (server
.dirty
>= sp
->changes
&&
1377 now
-server
.lastsave
> sp
->seconds
) {
1378 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1379 sp
->changes
, sp
->seconds
);
1380 rdbSaveBackground(server
.dbfilename
);
1386 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1387 * will use few CPU cycles if there are few expiring keys, otherwise
1388 * it will get more aggressive to avoid that too much memory is used by
1389 * keys that can be removed from the keyspace. */
1390 for (j
= 0; j
< server
.dbnum
; j
++) {
1392 redisDb
*db
= server
.db
+j
;
1394 /* Continue to expire if at the end of the cycle more than 25%
1395 * of the keys were expired. */
1397 long num
= dictSize(db
->expires
);
1398 time_t now
= time(NULL
);
1401 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1402 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1407 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1408 t
= (time_t) dictGetEntryVal(de
);
1410 deleteKey(db
,dictGetEntryKey(de
));
1412 server
.stat_expiredkeys
++;
1415 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1418 /* Swap a few keys on disk if we are over the memory limit and VM
1419 * is enbled. Try to free objects from the free list first. */
1420 if (vmCanSwapOut()) {
1421 while (server
.vm_enabled
&& zmalloc_used_memory() >
1422 server
.vm_max_memory
)
1426 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1427 retval
= (server
.vm_max_threads
== 0) ?
1428 vmSwapOneObjectBlocking() :
1429 vmSwapOneObjectThreaded();
1430 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1431 zmalloc_used_memory() >
1432 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1434 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1436 /* Note that when using threade I/O we free just one object,
1437 * because anyway when the I/O thread in charge to swap this
1438 * object out will finish, the handler of completed jobs
1439 * will try to swap more objects if we are still out of memory. */
1440 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1444 /* Check if we should connect to a MASTER */
1445 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1446 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1447 if (syncWithMaster() == REDIS_OK
) {
1448 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1454 /* This function gets called every time Redis is entering the
1455 * main loop of the event driven library, that is, before to sleep
1456 * for ready file descriptors. */
1457 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1458 REDIS_NOTUSED(eventLoop
);
1460 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1464 listRewind(server
.io_ready_clients
,&li
);
1465 while((ln
= listNext(&li
))) {
1466 redisClient
*c
= ln
->value
;
1467 struct redisCommand
*cmd
;
1469 /* Resume the client. */
1470 listDelNode(server
.io_ready_clients
,ln
);
1471 c
->flags
&= (~REDIS_IO_WAIT
);
1472 server
.vm_blocked_clients
--;
1473 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1474 readQueryFromClient
, c
);
1475 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1476 assert(cmd
!= NULL
);
1479 /* There may be more data to process in the input buffer. */
1480 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1481 processInputBuffer(c
);
1486 static void createSharedObjects(void) {
1487 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1488 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1489 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1490 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1491 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1492 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1493 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1494 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1495 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1496 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1497 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1498 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1499 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1500 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1501 "-ERR no such key\r\n"));
1502 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1503 "-ERR syntax error\r\n"));
1504 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1505 "-ERR source and destination objects are the same\r\n"));
1506 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1507 "-ERR index out of range\r\n"));
1508 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1509 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1510 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1511 shared
.select0
= createStringObject("select 0\r\n",10);
1512 shared
.select1
= createStringObject("select 1\r\n",10);
1513 shared
.select2
= createStringObject("select 2\r\n",10);
1514 shared
.select3
= createStringObject("select 3\r\n",10);
1515 shared
.select4
= createStringObject("select 4\r\n",10);
1516 shared
.select5
= createStringObject("select 5\r\n",10);
1517 shared
.select6
= createStringObject("select 6\r\n",10);
1518 shared
.select7
= createStringObject("select 7\r\n",10);
1519 shared
.select8
= createStringObject("select 8\r\n",10);
1520 shared
.select9
= createStringObject("select 9\r\n",10);
1521 shared
.messagebulk
= createStringObject("$7\r\nmessage\r\n",13);
1522 shared
.subscribebulk
= createStringObject("$9\r\nsubscribe\r\n",15);
1523 shared
.unsubscribebulk
= createStringObject("$11\r\nunsubscribe\r\n",18);
1524 shared
.psubscribebulk
= createStringObject("$10\r\npsubscribe\r\n",17);
1525 shared
.punsubscribebulk
= createStringObject("$12\r\npunsubscribe\r\n",19);
1526 shared
.mbulk3
= createStringObject("*3\r\n",4);
1529 static void appendServerSaveParams(time_t seconds
, int changes
) {
1530 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1531 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1532 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1533 server
.saveparamslen
++;
1536 static void resetServerSaveParams() {
1537 zfree(server
.saveparams
);
1538 server
.saveparams
= NULL
;
1539 server
.saveparamslen
= 0;
1542 static void initServerConfig() {
1543 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1544 server
.port
= REDIS_SERVERPORT
;
1545 server
.verbosity
= REDIS_VERBOSE
;
1546 server
.maxidletime
= REDIS_MAXIDLETIME
;
1547 server
.saveparams
= NULL
;
1548 server
.logfile
= NULL
; /* NULL = log on standard output */
1549 server
.bindaddr
= NULL
;
1550 server
.glueoutputbuf
= 1;
1551 server
.daemonize
= 0;
1552 server
.appendonly
= 0;
1553 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1554 server
.lastfsync
= time(NULL
);
1555 server
.appendfd
= -1;
1556 server
.appendseldb
= -1; /* Make sure the first time will not match */
1557 server
.pidfile
= zstrdup("/var/run/redis.pid");
1558 server
.dbfilename
= zstrdup("dump.rdb");
1559 server
.appendfilename
= zstrdup("appendonly.aof");
1560 server
.requirepass
= NULL
;
1561 server
.shareobjects
= 0;
1562 server
.rdbcompression
= 1;
1563 server
.maxclients
= 0;
1564 server
.blpop_blocked_clients
= 0;
1565 server
.maxmemory
= 0;
1566 server
.vm_enabled
= 0;
1567 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1568 server
.vm_page_size
= 256; /* 256 bytes per page */
1569 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1570 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1571 server
.vm_max_threads
= 4;
1572 server
.vm_blocked_clients
= 0;
1573 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1574 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1576 resetServerSaveParams();
1578 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1579 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1580 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1581 /* Replication related */
1583 server
.masterauth
= NULL
;
1584 server
.masterhost
= NULL
;
1585 server
.masterport
= 6379;
1586 server
.master
= NULL
;
1587 server
.replstate
= REDIS_REPL_NONE
;
1589 /* Double constants initialization */
1591 R_PosInf
= 1.0/R_Zero
;
1592 R_NegInf
= -1.0/R_Zero
;
1593 R_Nan
= R_Zero
/R_Zero
;
1596 static void initServer() {
1599 signal(SIGHUP
, SIG_IGN
);
1600 signal(SIGPIPE
, SIG_IGN
);
1601 setupSigSegvAction();
1603 server
.devnull
= fopen("/dev/null","w");
1604 if (server
.devnull
== NULL
) {
1605 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1608 server
.clients
= listCreate();
1609 server
.slaves
= listCreate();
1610 server
.monitors
= listCreate();
1611 server
.objfreelist
= listCreate();
1612 createSharedObjects();
1613 server
.el
= aeCreateEventLoop();
1614 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1615 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1616 if (server
.fd
== -1) {
1617 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1620 for (j
= 0; j
< server
.dbnum
; j
++) {
1621 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1622 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1623 server
.db
[j
].blockingkeys
= dictCreate(&keylistDictType
,NULL
);
1624 if (server
.vm_enabled
)
1625 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1626 server
.db
[j
].id
= j
;
1628 server
.pubsub_channels
= dictCreate(&keylistDictType
,NULL
);
1629 server
.pubsub_patterns
= listCreate();
1630 listSetFreeMethod(server
.pubsub_patterns
,freePubsubPattern
);
1631 listSetMatchMethod(server
.pubsub_patterns
,listMatchPubsubPattern
);
1632 server
.cronloops
= 0;
1633 server
.bgsavechildpid
= -1;
1634 server
.bgrewritechildpid
= -1;
1635 server
.bgrewritebuf
= sdsempty();
1636 server
.lastsave
= time(NULL
);
1638 server
.stat_numcommands
= 0;
1639 server
.stat_numconnections
= 0;
1640 server
.stat_expiredkeys
= 0;
1641 server
.stat_starttime
= time(NULL
);
1642 server
.unixtime
= time(NULL
);
1643 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1644 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1645 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1647 if (server
.appendonly
) {
1648 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1649 if (server
.appendfd
== -1) {
1650 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1656 if (server
.vm_enabled
) vmInit();
1659 /* Empty the whole database */
1660 static long long emptyDb() {
1662 long long removed
= 0;
1664 for (j
= 0; j
< server
.dbnum
; j
++) {
1665 removed
+= dictSize(server
.db
[j
].dict
);
1666 dictEmpty(server
.db
[j
].dict
);
1667 dictEmpty(server
.db
[j
].expires
);
1672 static int yesnotoi(char *s
) {
1673 if (!strcasecmp(s
,"yes")) return 1;
1674 else if (!strcasecmp(s
,"no")) return 0;
1678 /* I agree, this is a very rudimental way to load a configuration...
1679 will improve later if the config gets more complex */
1680 static void loadServerConfig(char *filename
) {
1682 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1685 char *errormsg
= "Fatal error, can't open config file '%s'";
1686 char *errorbuf
= zmalloc(sizeof(char)*(strlen(errormsg
)+strlen(filename
)));
1687 sprintf(errorbuf
, errormsg
, filename
);
1689 if (filename
[0] == '-' && filename
[1] == '\0')
1692 if ((fp
= fopen(filename
,"r")) == NULL
) {
1693 redisLog(REDIS_WARNING
, errorbuf
);
1698 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1704 line
= sdstrim(line
," \t\r\n");
1706 /* Skip comments and blank lines*/
1707 if (line
[0] == '#' || line
[0] == '\0') {
1712 /* Split into arguments */
1713 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1714 sdstolower(argv
[0]);
1716 /* Execute config directives */
1717 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1718 server
.maxidletime
= atoi(argv
[1]);
1719 if (server
.maxidletime
< 0) {
1720 err
= "Invalid timeout value"; goto loaderr
;
1722 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1723 server
.port
= atoi(argv
[1]);
1724 if (server
.port
< 1 || server
.port
> 65535) {
1725 err
= "Invalid port"; goto loaderr
;
1727 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1728 server
.bindaddr
= zstrdup(argv
[1]);
1729 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1730 int seconds
= atoi(argv
[1]);
1731 int changes
= atoi(argv
[2]);
1732 if (seconds
< 1 || changes
< 0) {
1733 err
= "Invalid save parameters"; goto loaderr
;
1735 appendServerSaveParams(seconds
,changes
);
1736 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1737 if (chdir(argv
[1]) == -1) {
1738 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1739 argv
[1], strerror(errno
));
1742 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1743 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1744 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1745 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1746 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1748 err
= "Invalid log level. Must be one of debug, notice, warning";
1751 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1754 server
.logfile
= zstrdup(argv
[1]);
1755 if (!strcasecmp(server
.logfile
,"stdout")) {
1756 zfree(server
.logfile
);
1757 server
.logfile
= NULL
;
1759 if (server
.logfile
) {
1760 /* Test if we are able to open the file. The server will not
1761 * be able to abort just for this problem later... */
1762 logfp
= fopen(server
.logfile
,"a");
1763 if (logfp
== NULL
) {
1764 err
= sdscatprintf(sdsempty(),
1765 "Can't open the log file: %s", strerror(errno
));
1770 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1771 server
.dbnum
= atoi(argv
[1]);
1772 if (server
.dbnum
< 1) {
1773 err
= "Invalid number of databases"; goto loaderr
;
1775 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1776 loadServerConfig(argv
[1]);
1777 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1778 server
.maxclients
= atoi(argv
[1]);
1779 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1780 server
.maxmemory
= strtoll(argv
[1], NULL
, 10);
1781 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1782 server
.masterhost
= sdsnew(argv
[1]);
1783 server
.masterport
= atoi(argv
[2]);
1784 server
.replstate
= REDIS_REPL_CONNECT
;
1785 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1786 server
.masterauth
= zstrdup(argv
[1]);
1787 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1788 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1789 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1791 } else if (!strcasecmp(argv
[0],"shareobjects") && argc
== 2) {
1792 if ((server
.shareobjects
= yesnotoi(argv
[1])) == -1) {
1793 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1795 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1796 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1797 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1799 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1800 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1801 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1803 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1804 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1805 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1807 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
1808 if (!strcasecmp(argv
[1],"no")) {
1809 server
.appendfsync
= APPENDFSYNC_NO
;
1810 } else if (!strcasecmp(argv
[1],"always")) {
1811 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1812 } else if (!strcasecmp(argv
[1],"everysec")) {
1813 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1815 err
= "argument must be 'no', 'always' or 'everysec'";
1818 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
1819 server
.requirepass
= zstrdup(argv
[1]);
1820 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
1821 zfree(server
.pidfile
);
1822 server
.pidfile
= zstrdup(argv
[1]);
1823 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
1824 zfree(server
.dbfilename
);
1825 server
.dbfilename
= zstrdup(argv
[1]);
1826 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
1827 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
1828 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1830 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
1831 zfree(server
.vm_swap_file
);
1832 server
.vm_swap_file
= zstrdup(argv
[1]);
1833 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
1834 server
.vm_max_memory
= strtoll(argv
[1], NULL
, 10);
1835 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
1836 server
.vm_page_size
= strtoll(argv
[1], NULL
, 10);
1837 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
1838 server
.vm_pages
= strtoll(argv
[1], NULL
, 10);
1839 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1840 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1841 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
1842 server
.hash_max_zipmap_entries
= strtol(argv
[1], NULL
, 10);
1843 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
1844 server
.hash_max_zipmap_value
= strtol(argv
[1], NULL
, 10);
1845 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1846 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1848 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
1850 for (j
= 0; j
< argc
; j
++)
1855 if (fp
!= stdin
) fclose(fp
);
1859 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
1860 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
1861 fprintf(stderr
, ">>> '%s'\n", line
);
1862 fprintf(stderr
, "%s\n", err
);
1866 static void freeClientArgv(redisClient
*c
) {
1869 for (j
= 0; j
< c
->argc
; j
++)
1870 decrRefCount(c
->argv
[j
]);
1871 for (j
= 0; j
< c
->mbargc
; j
++)
1872 decrRefCount(c
->mbargv
[j
]);
1877 static void freeClient(redisClient
*c
) {
1880 /* Note that if the client we are freeing is blocked into a blocking
1881 * call, we have to set querybuf to NULL *before* to call
1882 * unblockClientWaitingData() to avoid processInputBuffer() will get
1883 * called. Also it is important to remove the file events after
1884 * this, because this call adds the READABLE event. */
1885 sdsfree(c
->querybuf
);
1887 if (c
->flags
& REDIS_BLOCKED
)
1888 unblockClientWaitingData(c
);
1890 /* Unsubscribe from all the pubsub channels */
1891 pubsubUnsubscribeAllChannels(c
,0);
1892 pubsubUnsubscribeAllPatterns(c
,0);
1893 dictRelease(c
->pubsub_channels
);
1894 listRelease(c
->pubsub_patterns
);
1895 /* Obvious cleanup */
1896 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
1897 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
1898 listRelease(c
->reply
);
1901 /* Remove from the list of clients */
1902 ln
= listSearchKey(server
.clients
,c
);
1903 redisAssert(ln
!= NULL
);
1904 listDelNode(server
.clients
,ln
);
1905 /* Remove from the list of clients waiting for swapped keys */
1906 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
1907 ln
= listSearchKey(server
.io_ready_clients
,c
);
1909 listDelNode(server
.io_ready_clients
,ln
);
1910 server
.vm_blocked_clients
--;
1913 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
1914 ln
= listFirst(c
->io_keys
);
1915 dontWaitForSwappedKey(c
,ln
->value
);
1917 listRelease(c
->io_keys
);
1918 /* Master/slave cleanup */
1919 if (c
->flags
& REDIS_SLAVE
) {
1920 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
1922 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
1923 ln
= listSearchKey(l
,c
);
1924 redisAssert(ln
!= NULL
);
1927 if (c
->flags
& REDIS_MASTER
) {
1928 server
.master
= NULL
;
1929 server
.replstate
= REDIS_REPL_CONNECT
;
1931 /* Release memory */
1934 freeClientMultiState(c
);
1938 #define GLUEREPLY_UP_TO (1024)
1939 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
1941 char buf
[GLUEREPLY_UP_TO
];
1946 listRewind(c
->reply
,&li
);
1947 while((ln
= listNext(&li
))) {
1951 objlen
= sdslen(o
->ptr
);
1952 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
1953 memcpy(buf
+copylen
,o
->ptr
,objlen
);
1955 listDelNode(c
->reply
,ln
);
1957 if (copylen
== 0) return;
1961 /* Now the output buffer is empty, add the new single element */
1962 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
1963 listAddNodeHead(c
->reply
,o
);
1966 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
1967 redisClient
*c
= privdata
;
1968 int nwritten
= 0, totwritten
= 0, objlen
;
1971 REDIS_NOTUSED(mask
);
1973 /* Use writev() if we have enough buffers to send */
1974 if (!server
.glueoutputbuf
&&
1975 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
1976 !(c
->flags
& REDIS_MASTER
))
1978 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
1982 while(listLength(c
->reply
)) {
1983 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
1984 glueReplyBuffersIfNeeded(c
);
1986 o
= listNodeValue(listFirst(c
->reply
));
1987 objlen
= sdslen(o
->ptr
);
1990 listDelNode(c
->reply
,listFirst(c
->reply
));
1994 if (c
->flags
& REDIS_MASTER
) {
1995 /* Don't reply to a master */
1996 nwritten
= objlen
- c
->sentlen
;
1998 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
1999 if (nwritten
<= 0) break;
2001 c
->sentlen
+= nwritten
;
2002 totwritten
+= nwritten
;
2003 /* If we fully sent the object on head go to the next one */
2004 if (c
->sentlen
== objlen
) {
2005 listDelNode(c
->reply
,listFirst(c
->reply
));
2008 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
2009 * bytes, in a single threaded server it's a good idea to serve
2010 * other clients as well, even if a very large request comes from
2011 * super fast link that is always able to accept data (in real world
2012 * scenario think about 'KEYS *' against the loopback interfae) */
2013 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
2015 if (nwritten
== -1) {
2016 if (errno
== EAGAIN
) {
2019 redisLog(REDIS_VERBOSE
,
2020 "Error writing to client: %s", strerror(errno
));
2025 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
2026 if (listLength(c
->reply
) == 0) {
2028 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2032 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
2034 redisClient
*c
= privdata
;
2035 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
2037 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
2038 int offset
, ion
= 0;
2040 REDIS_NOTUSED(mask
);
2043 while (listLength(c
->reply
)) {
2044 offset
= c
->sentlen
;
2048 /* fill-in the iov[] array */
2049 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
2050 o
= listNodeValue(node
);
2051 objlen
= sdslen(o
->ptr
);
2053 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
2056 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2057 break; /* no more iovecs */
2059 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2060 iov
[ion
].iov_len
= objlen
- offset
;
2061 willwrite
+= objlen
- offset
;
2062 offset
= 0; /* just for the first item */
2069 /* write all collected blocks at once */
2070 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2071 if (errno
!= EAGAIN
) {
2072 redisLog(REDIS_VERBOSE
,
2073 "Error writing to client: %s", strerror(errno
));
2080 totwritten
+= nwritten
;
2081 offset
= c
->sentlen
;
2083 /* remove written robjs from c->reply */
2084 while (nwritten
&& listLength(c
->reply
)) {
2085 o
= listNodeValue(listFirst(c
->reply
));
2086 objlen
= sdslen(o
->ptr
);
2088 if(nwritten
>= objlen
- offset
) {
2089 listDelNode(c
->reply
, listFirst(c
->reply
));
2090 nwritten
-= objlen
- offset
;
2094 c
->sentlen
+= nwritten
;
2102 c
->lastinteraction
= time(NULL
);
2104 if (listLength(c
->reply
) == 0) {
2106 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2110 static struct redisCommand
*lookupCommand(char *name
) {
2112 while(cmdTable
[j
].name
!= NULL
) {
2113 if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
];
2119 /* resetClient prepare the client to process the next command */
2120 static void resetClient(redisClient
*c
) {
2126 /* Call() is the core of Redis execution of a command */
2127 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2130 dirty
= server
.dirty
;
2132 dirty
= server
.dirty
-dirty
;
2134 if (server
.appendonly
&& dirty
)
2135 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2136 if ((dirty
|| cmd
->flags
& REDIS_CMD_FORCE_REPLICATION
) &&
2137 listLength(server
.slaves
))
2138 replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
);
2139 if (listLength(server
.monitors
))
2140 replicationFeedSlaves(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
);
2141 server
.stat_numcommands
++;
2144 /* If this function gets called we already read a whole
2145 * command, argments are in the client argv/argc fields.
2146 * processCommand() execute the command or prepare the
2147 * server for a bulk read from the client.
2149 * If 1 is returned the client is still alive and valid and
2150 * and other operations can be performed by the caller. Otherwise
2151 * if 0 is returned the client was destroied (i.e. after QUIT). */
2152 static int processCommand(redisClient
*c
) {
2153 struct redisCommand
*cmd
;
2155 /* Free some memory if needed (maxmemory setting) */
2156 if (server
.maxmemory
) freeMemoryIfNeeded();
2158 /* Handle the multi bulk command type. This is an alternative protocol
2159 * supported by Redis in order to receive commands that are composed of
2160 * multiple binary-safe "bulk" arguments. The latency of processing is
2161 * a bit higher but this allows things like multi-sets, so if this
2162 * protocol is used only for MSET and similar commands this is a big win. */
2163 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2164 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2165 if (c
->multibulk
<= 0) {
2169 decrRefCount(c
->argv
[c
->argc
-1]);
2173 } else if (c
->multibulk
) {
2174 if (c
->bulklen
== -1) {
2175 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2176 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2180 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2181 decrRefCount(c
->argv
[0]);
2182 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2184 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2189 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2193 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2194 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2198 if (c
->multibulk
== 0) {
2202 /* Here we need to swap the multi-bulk argc/argv with the
2203 * normal argc/argv of the client structure. */
2205 c
->argv
= c
->mbargv
;
2206 c
->mbargv
= auxargv
;
2209 c
->argc
= c
->mbargc
;
2210 c
->mbargc
= auxargc
;
2212 /* We need to set bulklen to something different than -1
2213 * in order for the code below to process the command without
2214 * to try to read the last argument of a bulk command as
2215 * a special argument. */
2217 /* continue below and process the command */
2224 /* -- end of multi bulk commands processing -- */
2226 /* The QUIT command is handled as a special case. Normal command
2227 * procs are unable to close the client connection safely */
2228 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2233 /* Now lookup the command and check ASAP about trivial error conditions
2234 * such wrong arity, bad command name and so forth. */
2235 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2238 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2239 (char*)c
->argv
[0]->ptr
));
2242 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2243 (c
->argc
< -cmd
->arity
)) {
2245 sdscatprintf(sdsempty(),
2246 "-ERR wrong number of arguments for '%s' command\r\n",
2250 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2251 /* This is a bulk command, we have to read the last argument yet. */
2252 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2254 decrRefCount(c
->argv
[c
->argc
-1]);
2255 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2257 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2262 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2263 /* It is possible that the bulk read is already in the
2264 * buffer. Check this condition and handle it accordingly.
2265 * This is just a fast path, alternative to call processInputBuffer().
2266 * It's a good idea since the code is small and this condition
2267 * happens most of the times. */
2268 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2269 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2271 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2273 /* Otherwise return... there is to read the last argument
2274 * from the socket. */
2278 /* Let's try to encode the bulk object to save space. */
2279 if (cmd
->flags
& REDIS_CMD_BULK
)
2280 tryObjectEncoding(c
->argv
[c
->argc
-1]);
2282 /* Check if the user is authenticated */
2283 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2284 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2289 /* Handle the maxmemory directive */
2290 if (server
.maxmemory
&& (cmd
->flags
& REDIS_CMD_DENYOOM
) &&
2291 zmalloc_used_memory() > server
.maxmemory
)
2293 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2298 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
2299 if (dictSize(c
->pubsub_channels
) > 0 &&
2300 cmd
->proc
!= subscribeCommand
&& cmd
->proc
!= unsubscribeCommand
&&
2301 cmd
->proc
!= psubscribeCommand
&& cmd
->proc
!= punsubscribeCommand
) {
2302 addReplySds(c
,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n"));
2307 /* Exec the command */
2308 if (c
->flags
& REDIS_MULTI
&& cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
) {
2309 queueMultiCommand(c
,cmd
);
2310 addReply(c
,shared
.queued
);
2312 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2313 blockClientOnSwappedKeys(cmd
,c
)) return 1;
2317 /* Prepare the client for the next command */
2322 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
) {
2327 /* We need 1+(ARGS*3) objects since commands are using the new protocol
2328 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
2329 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
2330 robj
*static_outv
[REDIS_STATIC_ARGS
*3+1];
2333 if (argc
<= REDIS_STATIC_ARGS
) {
2336 outv
= zmalloc(sizeof(robj
*)*(argc
*3+1));
2339 lenobj
= createObject(REDIS_STRING
,
2340 sdscatprintf(sdsempty(), "*%d\r\n", argc
));
2341 lenobj
->refcount
= 0;
2342 outv
[outc
++] = lenobj
;
2343 for (j
= 0; j
< argc
; j
++) {
2344 lenobj
= createObject(REDIS_STRING
,
2345 sdscatprintf(sdsempty(),"$%lu\r\n",
2346 (unsigned long) stringObjectLen(argv
[j
])));
2347 lenobj
->refcount
= 0;
2348 outv
[outc
++] = lenobj
;
2349 outv
[outc
++] = argv
[j
];
2350 outv
[outc
++] = shared
.crlf
;
2353 /* Increment all the refcounts at start and decrement at end in order to
2354 * be sure to free objects if there is no slave in a replication state
2355 * able to be feed with commands */
2356 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2357 listRewind(slaves
,&li
);
2358 while((ln
= listNext(&li
))) {
2359 redisClient
*slave
= ln
->value
;
2361 /* Don't feed slaves that are still waiting for BGSAVE to start */
2362 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2364 /* Feed all the other slaves, MONITORs and so on */
2365 if (slave
->slaveseldb
!= dictid
) {
2369 case 0: selectcmd
= shared
.select0
; break;
2370 case 1: selectcmd
= shared
.select1
; break;
2371 case 2: selectcmd
= shared
.select2
; break;
2372 case 3: selectcmd
= shared
.select3
; break;
2373 case 4: selectcmd
= shared
.select4
; break;
2374 case 5: selectcmd
= shared
.select5
; break;
2375 case 6: selectcmd
= shared
.select6
; break;
2376 case 7: selectcmd
= shared
.select7
; break;
2377 case 8: selectcmd
= shared
.select8
; break;
2378 case 9: selectcmd
= shared
.select9
; break;
2380 selectcmd
= createObject(REDIS_STRING
,
2381 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2382 selectcmd
->refcount
= 0;
2385 addReply(slave
,selectcmd
);
2386 slave
->slaveseldb
= dictid
;
2388 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2390 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2391 if (outv
!= static_outv
) zfree(outv
);
2394 static void processInputBuffer(redisClient
*c
) {
2396 /* Before to process the input buffer, make sure the client is not
2397 * waitig for a blocking operation such as BLPOP. Note that the first
2398 * iteration the client is never blocked, otherwise the processInputBuffer
2399 * would not be called at all, but after the execution of the first commands
2400 * in the input buffer the client may be blocked, and the "goto again"
2401 * will try to reiterate. The following line will make it return asap. */
2402 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2403 if (c
->bulklen
== -1) {
2404 /* Read the first line of the query */
2405 char *p
= strchr(c
->querybuf
,'\n');
2412 query
= c
->querybuf
;
2413 c
->querybuf
= sdsempty();
2414 querylen
= 1+(p
-(query
));
2415 if (sdslen(query
) > querylen
) {
2416 /* leave data after the first line of the query in the buffer */
2417 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2419 *p
= '\0'; /* remove "\n" */
2420 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2421 sdsupdatelen(query
);
2423 /* Now we can split the query in arguments */
2424 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2427 if (c
->argv
) zfree(c
->argv
);
2428 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2430 for (j
= 0; j
< argc
; j
++) {
2431 if (sdslen(argv
[j
])) {
2432 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2440 /* Execute the command. If the client is still valid
2441 * after processCommand() return and there is something
2442 * on the query buffer try to process the next command. */
2443 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2445 /* Nothing to process, argc == 0. Just process the query
2446 * buffer if it's not empty or return to the caller */
2447 if (sdslen(c
->querybuf
)) goto again
;
2450 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2451 redisLog(REDIS_VERBOSE
, "Client protocol error");
2456 /* Bulk read handling. Note that if we are at this point
2457 the client already sent a command terminated with a newline,
2458 we are reading the bulk data that is actually the last
2459 argument of the command. */
2460 int qbl
= sdslen(c
->querybuf
);
2462 if (c
->bulklen
<= qbl
) {
2463 /* Copy everything but the final CRLF as final argument */
2464 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2466 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2467 /* Process the command. If the client is still valid after
2468 * the processing and there is more data in the buffer
2469 * try to parse it. */
2470 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2476 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2477 redisClient
*c
= (redisClient
*) privdata
;
2478 char buf
[REDIS_IOBUF_LEN
];
2481 REDIS_NOTUSED(mask
);
2483 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2485 if (errno
== EAGAIN
) {
2488 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2492 } else if (nread
== 0) {
2493 redisLog(REDIS_VERBOSE
, "Client closed connection");
2498 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2499 c
->lastinteraction
= time(NULL
);
2503 processInputBuffer(c
);
2506 static int selectDb(redisClient
*c
, int id
) {
2507 if (id
< 0 || id
>= server
.dbnum
)
2509 c
->db
= &server
.db
[id
];
2513 static void *dupClientReplyValue(void *o
) {
2514 incrRefCount((robj
*)o
);
2518 static int listMatchObjects(void *a
, void *b
) {
2519 return compareStringObjects(a
,b
) == 0;
2522 static redisClient
*createClient(int fd
) {
2523 redisClient
*c
= zmalloc(sizeof(*c
));
2525 anetNonBlock(NULL
,fd
);
2526 anetTcpNoDelay(NULL
,fd
);
2527 if (!c
) return NULL
;
2530 c
->querybuf
= sdsempty();
2539 c
->lastinteraction
= time(NULL
);
2540 c
->authenticated
= 0;
2541 c
->replstate
= REDIS_REPL_NONE
;
2542 c
->reply
= listCreate();
2543 listSetFreeMethod(c
->reply
,decrRefCount
);
2544 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2545 c
->blockingkeys
= NULL
;
2546 c
->blockingkeysnum
= 0;
2547 c
->io_keys
= listCreate();
2548 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2549 c
->pubsub_channels
= dictCreate(&setDictType
,NULL
);
2550 c
->pubsub_patterns
= listCreate();
2551 listSetFreeMethod(c
->pubsub_patterns
,decrRefCount
);
2552 listSetMatchMethod(c
->pubsub_patterns
,listMatchObjects
);
2553 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2554 readQueryFromClient
, c
) == AE_ERR
) {
2558 listAddNodeTail(server
.clients
,c
);
2559 initClientMultiState(c
);
2563 static void addReply(redisClient
*c
, robj
*obj
) {
2564 if (listLength(c
->reply
) == 0 &&
2565 (c
->replstate
== REDIS_REPL_NONE
||
2566 c
->replstate
== REDIS_REPL_ONLINE
) &&
2567 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2568 sendReplyToClient
, c
) == AE_ERR
) return;
2570 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2571 obj
= dupStringObject(obj
);
2572 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2574 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2577 static void addReplySds(redisClient
*c
, sds s
) {
2578 robj
*o
= createObject(REDIS_STRING
,s
);
2583 static void addReplyDouble(redisClient
*c
, double d
) {
2586 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2587 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2588 (unsigned long) strlen(buf
),buf
));
2591 static void addReplyLong(redisClient
*c
, long l
) {
2596 addReply(c
,shared
.czero
);
2598 } else if (l
== 1) {
2599 addReply(c
,shared
.cone
);
2602 len
= snprintf(buf
,sizeof(buf
),":%ld\r\n",l
);
2603 addReplySds(c
,sdsnewlen(buf
,len
));
2606 static void addReplyLongLong(redisClient
*c
, long long ll
) {
2611 addReply(c
,shared
.czero
);
2613 } else if (ll
== 1) {
2614 addReply(c
,shared
.cone
);
2617 len
= snprintf(buf
,sizeof(buf
),":%lld\r\n",ll
);
2618 addReplySds(c
,sdsnewlen(buf
,len
));
2621 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2626 addReply(c
,shared
.czero
);
2628 } else if (ul
== 1) {
2629 addReply(c
,shared
.cone
);
2632 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2633 addReplySds(c
,sdsnewlen(buf
,len
));
2636 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2639 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2640 len
= sdslen(obj
->ptr
);
2642 long n
= (long)obj
->ptr
;
2644 /* Compute how many bytes will take this integer as a radix 10 string */
2650 while((n
= n
/10) != 0) {
2654 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",(unsigned long)len
));
2657 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2658 addReplyBulkLen(c
,obj
);
2660 addReply(c
,shared
.crlf
);
2663 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2664 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2666 addReply(c
,shared
.nullbulk
);
2668 robj
*o
= createStringObject(s
,strlen(s
));
2674 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2679 REDIS_NOTUSED(mask
);
2680 REDIS_NOTUSED(privdata
);
2682 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2683 if (cfd
== AE_ERR
) {
2684 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2687 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2688 if ((c
= createClient(cfd
)) == NULL
) {
2689 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2690 close(cfd
); /* May be already closed, just ingore errors */
2693 /* If maxclient directive is set and this is one client more... close the
2694 * connection. Note that we create the client instead to check before
2695 * for this condition, since now the socket is already set in nonblocking
2696 * mode and we can send an error for free using the Kernel I/O */
2697 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2698 char *err
= "-ERR max number of clients reached\r\n";
2700 /* That's a best effort error message, don't check write errors */
2701 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2702 /* Nothing to do, Just to avoid the warning... */
2707 server
.stat_numconnections
++;
2710 /* ======================= Redis objects implementation ===================== */
2712 static robj
*createObject(int type
, void *ptr
) {
2715 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2716 if (listLength(server
.objfreelist
)) {
2717 listNode
*head
= listFirst(server
.objfreelist
);
2718 o
= listNodeValue(head
);
2719 listDelNode(server
.objfreelist
,head
);
2720 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2722 if (server
.vm_enabled
) {
2723 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2724 o
= zmalloc(sizeof(*o
));
2726 o
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
));
2730 o
->encoding
= REDIS_ENCODING_RAW
;
2733 if (server
.vm_enabled
) {
2734 /* Note that this code may run in the context of an I/O thread
2735 * and accessing to server.unixtime in theory is an error
2736 * (no locks). But in practice this is safe, and even if we read
2737 * garbage Redis will not fail, as it's just a statistical info */
2738 o
->vm
.atime
= server
.unixtime
;
2739 o
->storage
= REDIS_VM_MEMORY
;
2744 static robj
*createStringObject(char *ptr
, size_t len
) {
2745 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
2748 static robj
*dupStringObject(robj
*o
) {
2749 assert(o
->encoding
== REDIS_ENCODING_RAW
);
2750 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
2753 static robj
*createListObject(void) {
2754 list
*l
= listCreate();
2756 listSetFreeMethod(l
,decrRefCount
);
2757 return createObject(REDIS_LIST
,l
);
2760 static robj
*createSetObject(void) {
2761 dict
*d
= dictCreate(&setDictType
,NULL
);
2762 return createObject(REDIS_SET
,d
);
2765 static robj
*createHashObject(void) {
2766 /* All the Hashes start as zipmaps. Will be automatically converted
2767 * into hash tables if there are enough elements or big elements
2769 unsigned char *zm
= zipmapNew();
2770 robj
*o
= createObject(REDIS_HASH
,zm
);
2771 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
2775 static robj
*createZsetObject(void) {
2776 zset
*zs
= zmalloc(sizeof(*zs
));
2778 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
2779 zs
->zsl
= zslCreate();
2780 return createObject(REDIS_ZSET
,zs
);
2783 static void freeStringObject(robj
*o
) {
2784 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2789 static void freeListObject(robj
*o
) {
2790 listRelease((list
*) o
->ptr
);
2793 static void freeSetObject(robj
*o
) {
2794 dictRelease((dict
*) o
->ptr
);
2797 static void freeZsetObject(robj
*o
) {
2800 dictRelease(zs
->dict
);
2805 static void freeHashObject(robj
*o
) {
2806 switch (o
->encoding
) {
2807 case REDIS_ENCODING_HT
:
2808 dictRelease((dict
*) o
->ptr
);
2810 case REDIS_ENCODING_ZIPMAP
:
2819 static void incrRefCount(robj
*o
) {
2823 static void decrRefCount(void *obj
) {
2826 /* Object is a key of a swapped out value, or in the process of being
2828 if (server
.vm_enabled
&&
2829 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
2831 if (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
) {
2832 redisAssert(o
->refcount
== 1);
2834 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
);
2835 redisAssert(o
->type
== REDIS_STRING
);
2836 freeStringObject(o
);
2837 vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
);
2838 pthread_mutex_lock(&server
.obj_freelist_mutex
);
2839 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2840 !listAddNodeHead(server
.objfreelist
,o
))
2842 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2843 server
.vm_stats_swapped_objects
--;
2846 /* Object is in memory, or in the process of being swapped out. */
2847 if (--(o
->refcount
) == 0) {
2848 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
2849 vmCancelThreadedIOJob(obj
);
2851 case REDIS_STRING
: freeStringObject(o
); break;
2852 case REDIS_LIST
: freeListObject(o
); break;
2853 case REDIS_SET
: freeSetObject(o
); break;
2854 case REDIS_ZSET
: freeZsetObject(o
); break;
2855 case REDIS_HASH
: freeHashObject(o
); break;
2856 default: redisAssert(0); break;
2858 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2859 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2860 !listAddNodeHead(server
.objfreelist
,o
))
2862 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2866 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
2867 dictEntry
*de
= dictFind(db
->dict
,key
);
2869 robj
*key
= dictGetEntryKey(de
);
2870 robj
*val
= dictGetEntryVal(de
);
2872 if (server
.vm_enabled
) {
2873 if (key
->storage
== REDIS_VM_MEMORY
||
2874 key
->storage
== REDIS_VM_SWAPPING
)
2876 /* If we were swapping the object out, stop it, this key
2878 if (key
->storage
== REDIS_VM_SWAPPING
)
2879 vmCancelThreadedIOJob(key
);
2880 /* Update the access time of the key for the aging algorithm. */
2881 key
->vm
.atime
= server
.unixtime
;
2883 int notify
= (key
->storage
== REDIS_VM_LOADING
);
2885 /* Our value was swapped on disk. Bring it at home. */
2886 redisAssert(val
== NULL
);
2887 val
= vmLoadObject(key
);
2888 dictGetEntryVal(de
) = val
;
2890 /* Clients blocked by the VM subsystem may be waiting for
2892 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
2901 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
2902 expireIfNeeded(db
,key
);
2903 return lookupKey(db
,key
);
2906 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
2907 deleteIfVolatile(db
,key
);
2908 return lookupKey(db
,key
);
2911 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
2912 robj
*o
= lookupKeyRead(c
->db
, key
);
2913 if (!o
) addReply(c
,reply
);
2917 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
2918 robj
*o
= lookupKeyWrite(c
->db
, key
);
2919 if (!o
) addReply(c
,reply
);
2923 static int checkType(redisClient
*c
, robj
*o
, int type
) {
2924 if (o
->type
!= type
) {
2925 addReply(c
,shared
.wrongtypeerr
);
2931 static int deleteKey(redisDb
*db
, robj
*key
) {
2934 /* We need to protect key from destruction: after the first dictDelete()
2935 * it may happen that 'key' is no longer valid if we don't increment
2936 * it's count. This may happen when we get the object reference directly
2937 * from the hash table with dictRandomKey() or dict iterators */
2939 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
);
2940 retval
= dictDelete(db
->dict
,key
);
2943 return retval
== DICT_OK
;
2946 /* Check if the nul-terminated string 's' can be represented by a long
2947 * (that is, is a number that fits into long without any other space or
2948 * character before or after the digits).
2950 * If so, the function returns REDIS_OK and *longval is set to the value
2951 * of the number. Otherwise REDIS_ERR is returned */
2952 static int isStringRepresentableAsLong(sds s
, long *longval
) {
2953 char buf
[32], *endptr
;
2957 value
= strtol(s
, &endptr
, 10);
2958 if (endptr
[0] != '\0') return REDIS_ERR
;
2959 slen
= snprintf(buf
,32,"%ld",value
);
2961 /* If the number converted back into a string is not identical
2962 * then it's not possible to encode the string as integer */
2963 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
2964 if (longval
) *longval
= value
;
2968 /* Try to encode a string object in order to save space */
2969 static int tryObjectEncoding(robj
*o
) {
2973 if (o
->encoding
!= REDIS_ENCODING_RAW
)
2974 return REDIS_ERR
; /* Already encoded */
2976 /* It's not save to encode shared objects: shared objects can be shared
2977 * everywhere in the "object space" of Redis. Encoded objects can only
2978 * appear as "values" (and not, for instance, as keys) */
2979 if (o
->refcount
> 1) return REDIS_ERR
;
2981 /* Currently we try to encode only strings */
2982 redisAssert(o
->type
== REDIS_STRING
);
2984 /* Check if we can represent this string as a long integer */
2985 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return REDIS_ERR
;
2987 /* Ok, this object can be encoded */
2988 o
->encoding
= REDIS_ENCODING_INT
;
2990 o
->ptr
= (void*) value
;
2994 /* Get a decoded version of an encoded object (returned as a new object).
2995 * If the object is already raw-encoded just increment the ref count. */
2996 static robj
*getDecodedObject(robj
*o
) {
2999 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3003 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
3006 snprintf(buf
,32,"%ld",(long)o
->ptr
);
3007 dec
= createStringObject(buf
,strlen(buf
));
3010 redisAssert(1 != 1);
3014 /* Compare two string objects via strcmp() or alike.
3015 * Note that the objects may be integer-encoded. In such a case we
3016 * use snprintf() to get a string representation of the numbers on the stack
3017 * and compare the strings, it's much faster than calling getDecodedObject().
3019 * Important note: if objects are not integer encoded, but binary-safe strings,
3020 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
3022 static int compareStringObjects(robj
*a
, robj
*b
) {
3023 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
3024 char bufa
[128], bufb
[128], *astr
, *bstr
;
3027 if (a
== b
) return 0;
3028 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
3029 snprintf(bufa
,sizeof(bufa
),"%ld",(long) a
->ptr
);
3035 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
3036 snprintf(bufb
,sizeof(bufb
),"%ld",(long) b
->ptr
);
3042 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
3045 static size_t stringObjectLen(robj
*o
) {
3046 redisAssert(o
->type
== REDIS_STRING
);
3047 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3048 return sdslen(o
->ptr
);
3052 return snprintf(buf
,32,"%ld",(long)o
->ptr
);
3056 /*============================ RDB saving/loading =========================== */
3058 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3059 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3063 static int rdbSaveTime(FILE *fp
, time_t t
) {
3064 int32_t t32
= (int32_t) t
;
3065 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3069 /* check rdbLoadLen() comments for more info */
3070 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3071 unsigned char buf
[2];
3074 /* Save a 6 bit len */
3075 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3076 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3077 } else if (len
< (1<<14)) {
3078 /* Save a 14 bit len */
3079 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3081 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3083 /* Save a 32 bit len */
3084 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3085 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3087 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3092 /* String objects in the form "2391" "-100" without any space and with a
3093 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3094 * encoded as integers to save space */
3095 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3097 char *endptr
, buf
[32];
3099 /* Check if it's possible to encode this value as a number */
3100 value
= strtoll(s
, &endptr
, 10);
3101 if (endptr
[0] != '\0') return 0;
3102 snprintf(buf
,32,"%lld",value
);
3104 /* If the number converted back into a string is not identical
3105 * then it's not possible to encode the string as integer */
3106 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3108 /* Finally check if it fits in our ranges */
3109 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3110 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3111 enc
[1] = value
&0xFF;
3113 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3114 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3115 enc
[1] = value
&0xFF;
3116 enc
[2] = (value
>>8)&0xFF;
3118 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3119 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3120 enc
[1] = value
&0xFF;
3121 enc
[2] = (value
>>8)&0xFF;
3122 enc
[3] = (value
>>16)&0xFF;
3123 enc
[4] = (value
>>24)&0xFF;
3130 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3131 size_t comprlen
, outlen
;
3135 /* We require at least four bytes compression for this to be worth it */
3136 if (len
<= 4) return 0;
3138 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3139 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3140 if (comprlen
== 0) {
3144 /* Data compressed! Let's save it on disk */
3145 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3146 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3147 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3148 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3149 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3158 /* Save a string objet as [len][data] on disk. If the object is a string
3159 * representation of an integer value we try to safe it in a special form */
3160 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3163 /* Try integer encoding */
3165 unsigned char buf
[5];
3166 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3167 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3172 /* Try LZF compression - under 20 bytes it's unable to compress even
3173 * aaaaaaaaaaaaaaaaaa so skip it */
3174 if (server
.rdbcompression
&& len
> 20) {
3177 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3178 if (retval
== -1) return -1;
3179 if (retval
> 0) return 0;
3180 /* retval == 0 means data can't be compressed, save the old way */
3183 /* Store verbatim */
3184 if (rdbSaveLen(fp
,len
) == -1) return -1;
3185 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3189 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3190 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3193 /* Avoid incr/decr ref count business when possible.
3194 * This plays well with copy-on-write given that we are probably
3195 * in a child process (BGSAVE). Also this makes sure key objects
3196 * of swapped objects are not incRefCount-ed (an assert does not allow
3197 * this in order to avoid bugs) */
3198 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
3199 obj
= getDecodedObject(obj
);
3200 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3203 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3208 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3209 * 8 bit integer specifing the length of the representation.
3210 * This 8 bit integer has special values in order to specify the following
3216 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3217 unsigned char buf
[128];
3223 } else if (!isfinite(val
)) {
3225 buf
[0] = (val
< 0) ? 255 : 254;
3227 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3228 buf
[0] = strlen((char*)buf
+1);
3231 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3235 /* Save a Redis object. */
3236 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3237 if (o
->type
== REDIS_STRING
) {
3238 /* Save a string value */
3239 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3240 } else if (o
->type
== REDIS_LIST
) {
3241 /* Save a list value */
3242 list
*list
= o
->ptr
;
3246 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3247 listRewind(list
,&li
);
3248 while((ln
= listNext(&li
))) {
3249 robj
*eleobj
= listNodeValue(ln
);
3251 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3253 } else if (o
->type
== REDIS_SET
) {
3254 /* Save a set value */
3256 dictIterator
*di
= dictGetIterator(set
);
3259 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3260 while((de
= dictNext(di
)) != NULL
) {
3261 robj
*eleobj
= dictGetEntryKey(de
);
3263 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3265 dictReleaseIterator(di
);
3266 } else if (o
->type
== REDIS_ZSET
) {
3267 /* Save a set value */
3269 dictIterator
*di
= dictGetIterator(zs
->dict
);
3272 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3273 while((de
= dictNext(di
)) != NULL
) {
3274 robj
*eleobj
= dictGetEntryKey(de
);
3275 double *score
= dictGetEntryVal(de
);
3277 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3278 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3280 dictReleaseIterator(di
);
3281 } else if (o
->type
== REDIS_HASH
) {
3282 /* Save a hash value */
3283 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3284 unsigned char *p
= zipmapRewind(o
->ptr
);
3285 unsigned int count
= zipmapLen(o
->ptr
);
3286 unsigned char *key
, *val
;
3287 unsigned int klen
, vlen
;
3289 if (rdbSaveLen(fp
,count
) == -1) return -1;
3290 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3291 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3292 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3295 dictIterator
*di
= dictGetIterator(o
->ptr
);
3298 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3299 while((de
= dictNext(di
)) != NULL
) {
3300 robj
*key
= dictGetEntryKey(de
);
3301 robj
*val
= dictGetEntryVal(de
);
3303 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3304 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3306 dictReleaseIterator(di
);
3314 /* Return the length the object will have on disk if saved with
3315 * the rdbSaveObject() function. Currently we use a trick to get
3316 * this length with very little changes to the code. In the future
3317 * we could switch to a faster solution. */
3318 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3319 if (fp
== NULL
) fp
= server
.devnull
;
3321 assert(rdbSaveObject(fp
,o
) != 1);
3325 /* Return the number of pages required to save this object in the swap file */
3326 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3327 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3329 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3332 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3333 static int rdbSave(char *filename
) {
3334 dictIterator
*di
= NULL
;
3339 time_t now
= time(NULL
);
3341 /* Wait for I/O therads to terminate, just in case this is a
3342 * foreground-saving, to avoid seeking the swap file descriptor at the
3344 if (server
.vm_enabled
)
3345 waitEmptyIOJobsQueue();
3347 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3348 fp
= fopen(tmpfile
,"w");
3350 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3353 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3354 for (j
= 0; j
< server
.dbnum
; j
++) {
3355 redisDb
*db
= server
.db
+j
;
3357 if (dictSize(d
) == 0) continue;
3358 di
= dictGetIterator(d
);
3364 /* Write the SELECT DB opcode */
3365 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3366 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3368 /* Iterate this DB writing every entry */
3369 while((de
= dictNext(di
)) != NULL
) {
3370 robj
*key
= dictGetEntryKey(de
);
3371 robj
*o
= dictGetEntryVal(de
);
3372 time_t expiretime
= getExpire(db
,key
);
3374 /* Save the expire time */
3375 if (expiretime
!= -1) {
3376 /* If this key is already expired skip it */
3377 if (expiretime
< now
) continue;
3378 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3379 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3381 /* Save the key and associated value. This requires special
3382 * handling if the value is swapped out. */
3383 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
3384 key
->storage
== REDIS_VM_SWAPPING
) {
3385 /* Save type, key, value */
3386 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3387 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3388 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3390 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3392 /* Get a preview of the object in memory */
3393 po
= vmPreviewObject(key
);
3394 /* Save type, key, value */
3395 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
;
3396 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3397 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3398 /* Remove the loaded object from memory */
3402 dictReleaseIterator(di
);
3405 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3407 /* Make sure data will not remain on the OS's output buffers */
3412 /* Use RENAME to make sure the DB file is changed atomically only
3413 * if the generate DB file is ok. */
3414 if (rename(tmpfile
,filename
) == -1) {
3415 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3419 redisLog(REDIS_NOTICE
,"DB saved on disk");
3421 server
.lastsave
= time(NULL
);
3427 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3428 if (di
) dictReleaseIterator(di
);
3432 static int rdbSaveBackground(char *filename
) {
3435 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3436 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3437 if ((childpid
= fork()) == 0) {
3439 if (server
.vm_enabled
) vmReopenSwapFile();
3441 if (rdbSave(filename
) == REDIS_OK
) {
3448 if (childpid
== -1) {
3449 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3453 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3454 server
.bgsavechildpid
= childpid
;
3455 updateDictResizePolicy();
3458 return REDIS_OK
; /* unreached */
3461 static void rdbRemoveTempFile(pid_t childpid
) {
3464 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
3468 static int rdbLoadType(FILE *fp
) {
3470 if (fread(&type
,1,1,fp
) == 0) return -1;
3474 static time_t rdbLoadTime(FILE *fp
) {
3476 if (fread(&t32
,4,1,fp
) == 0) return -1;
3477 return (time_t) t32
;
3480 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3481 * of this file for a description of how this are stored on disk.
3483 * isencoded is set to 1 if the readed length is not actually a length but
3484 * an "encoding type", check the above comments for more info */
3485 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
3486 unsigned char buf
[2];
3490 if (isencoded
) *isencoded
= 0;
3491 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3492 type
= (buf
[0]&0xC0)>>6;
3493 if (type
== REDIS_RDB_6BITLEN
) {
3494 /* Read a 6 bit len */
3496 } else if (type
== REDIS_RDB_ENCVAL
) {
3497 /* Read a 6 bit len encoding type */
3498 if (isencoded
) *isencoded
= 1;
3500 } else if (type
== REDIS_RDB_14BITLEN
) {
3501 /* Read a 14 bit len */
3502 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3503 return ((buf
[0]&0x3F)<<8)|buf
[1];
3505 /* Read a 32 bit len */
3506 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
3511 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
) {
3512 unsigned char enc
[4];
3515 if (enctype
== REDIS_RDB_ENC_INT8
) {
3516 if (fread(enc
,1,1,fp
) == 0) return NULL
;
3517 val
= (signed char)enc
[0];
3518 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
3520 if (fread(enc
,2,1,fp
) == 0) return NULL
;
3521 v
= enc
[0]|(enc
[1]<<8);
3523 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
3525 if (fread(enc
,4,1,fp
) == 0) return NULL
;
3526 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
3529 val
= 0; /* anti-warning */
3532 return createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",val
));
3535 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
3536 unsigned int len
, clen
;
3537 unsigned char *c
= NULL
;
3540 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3541 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3542 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
3543 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
3544 if (fread(c
,clen
,1,fp
) == 0) goto err
;
3545 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
3547 return createObject(REDIS_STRING
,val
);
3554 static robj
*rdbLoadStringObject(FILE*fp
) {
3559 len
= rdbLoadLen(fp
,&isencoded
);
3562 case REDIS_RDB_ENC_INT8
:
3563 case REDIS_RDB_ENC_INT16
:
3564 case REDIS_RDB_ENC_INT32
:
3565 return rdbLoadIntegerObject(fp
,len
);
3566 case REDIS_RDB_ENC_LZF
:
3567 return rdbLoadLzfStringObject(fp
);
3573 if (len
== REDIS_RDB_LENERR
) return NULL
;
3574 val
= sdsnewlen(NULL
,len
);
3575 if (len
&& fread(val
,len
,1,fp
) == 0) {
3579 return createObject(REDIS_STRING
,val
);
3582 /* For information about double serialization check rdbSaveDoubleValue() */
3583 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
3587 if (fread(&len
,1,1,fp
) == 0) return -1;
3589 case 255: *val
= R_NegInf
; return 0;
3590 case 254: *val
= R_PosInf
; return 0;
3591 case 253: *val
= R_Nan
; return 0;
3593 if (fread(buf
,len
,1,fp
) == 0) return -1;
3595 sscanf(buf
, "%lg", val
);
3600 /* Load a Redis object of the specified type from the specified file.
3601 * On success a newly allocated object is returned, otherwise NULL. */
3602 static robj
*rdbLoadObject(int type
, FILE *fp
) {
3605 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
3606 if (type
== REDIS_STRING
) {
3607 /* Read string value */
3608 if ((o
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3609 tryObjectEncoding(o
);
3610 } else if (type
== REDIS_LIST
|| type
== REDIS_SET
) {
3611 /* Read list/set value */
3614 if ((listlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3615 o
= (type
== REDIS_LIST
) ? createListObject() : createSetObject();
3616 /* It's faster to expand the dict to the right size asap in order
3617 * to avoid rehashing */
3618 if (type
== REDIS_SET
&& listlen
> DICT_HT_INITIAL_SIZE
)
3619 dictExpand(o
->ptr
,listlen
);
3620 /* Load every single element of the list/set */
3624 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3625 tryObjectEncoding(ele
);
3626 if (type
== REDIS_LIST
) {
3627 listAddNodeTail((list
*)o
->ptr
,ele
);
3629 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
3632 } else if (type
== REDIS_ZSET
) {
3633 /* Read list/set value */
3637 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3638 o
= createZsetObject();
3640 /* Load every single element of the list/set */
3643 double *score
= zmalloc(sizeof(double));
3645 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3646 tryObjectEncoding(ele
);
3647 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
3648 dictAdd(zs
->dict
,ele
,score
);
3649 zslInsert(zs
->zsl
,*score
,ele
);
3650 incrRefCount(ele
); /* added to skiplist */
3652 } else if (type
== REDIS_HASH
) {
3655 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3656 o
= createHashObject();
3657 /* Too many entries? Use an hash table. */
3658 if (hashlen
> server
.hash_max_zipmap_entries
)
3659 convertToRealHash(o
);
3660 /* Load every key/value, then set it into the zipmap or hash
3661 * table, as needed. */
3665 if ((key
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3666 if ((val
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3667 /* If we are using a zipmap and there are too big values
3668 * the object is converted to real hash table encoding. */
3669 if (o
->encoding
!= REDIS_ENCODING_HT
&&
3670 (sdslen(key
->ptr
) > server
.hash_max_zipmap_value
||
3671 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
))
3673 convertToRealHash(o
);
3676 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3677 unsigned char *zm
= o
->ptr
;
3679 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
3680 val
->ptr
,sdslen(val
->ptr
),NULL
);
3685 tryObjectEncoding(key
);
3686 tryObjectEncoding(val
);
3687 dictAdd((dict
*)o
->ptr
,key
,val
);
3696 static int rdbLoad(char *filename
) {
3698 robj
*keyobj
= NULL
;
3700 int type
, retval
, rdbver
;
3701 dict
*d
= server
.db
[0].dict
;
3702 redisDb
*db
= server
.db
+0;
3704 time_t expiretime
= -1, now
= time(NULL
);
3705 long long loadedkeys
= 0;
3707 fp
= fopen(filename
,"r");
3708 if (!fp
) return REDIS_ERR
;
3709 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
3711 if (memcmp(buf
,"REDIS",5) != 0) {
3713 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
3716 rdbver
= atoi(buf
+5);
3719 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
3726 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3727 if (type
== REDIS_EXPIRETIME
) {
3728 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
3729 /* We read the time so we need to read the object type again */
3730 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3732 if (type
== REDIS_EOF
) break;
3733 /* Handle SELECT DB opcode as a special case */
3734 if (type
== REDIS_SELECTDB
) {
3735 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
3737 if (dbid
>= (unsigned)server
.dbnum
) {
3738 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
3741 db
= server
.db
+dbid
;
3746 if ((keyobj
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
3748 if ((o
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
3749 /* Add the new object in the hash table */
3750 retval
= dictAdd(d
,keyobj
,o
);
3751 if (retval
== DICT_ERR
) {
3752 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj
->ptr
);
3755 /* Set the expire time if needed */
3756 if (expiretime
!= -1) {
3757 setExpire(db
,keyobj
,expiretime
);
3758 /* Delete this key if already expired */
3759 if (expiretime
< now
) deleteKey(db
,keyobj
);
3763 /* Handle swapping while loading big datasets when VM is on */
3765 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
3766 while (zmalloc_used_memory() > server
.vm_max_memory
) {
3767 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
3774 eoferr
: /* unexpected end of file is handled here with a fatal exit */
3775 if (keyobj
) decrRefCount(keyobj
);
3776 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
3778 return REDIS_ERR
; /* Just to avoid warning */
3781 /*================================== Commands =============================== */
3783 static void authCommand(redisClient
*c
) {
3784 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
3785 c
->authenticated
= 1;
3786 addReply(c
,shared
.ok
);
3788 c
->authenticated
= 0;
3789 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
3793 static void pingCommand(redisClient
*c
) {
3794 addReply(c
,shared
.pong
);
3797 static void echoCommand(redisClient
*c
) {
3798 addReplyBulk(c
,c
->argv
[1]);
3801 /*=================================== Strings =============================== */
3803 static void setGenericCommand(redisClient
*c
, int nx
) {
3806 if (nx
) deleteIfVolatile(c
->db
,c
->argv
[1]);
3807 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3808 if (retval
== DICT_ERR
) {
3810 /* If the key is about a swapped value, we want a new key object
3811 * to overwrite the old. So we delete the old key in the database.
3812 * This will also make sure that swap pages about the old object
3813 * will be marked as free. */
3814 if (server
.vm_enabled
&& deleteIfSwapped(c
->db
,c
->argv
[1]))
3815 incrRefCount(c
->argv
[1]);
3816 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3817 incrRefCount(c
->argv
[2]);
3819 addReply(c
,shared
.czero
);
3823 incrRefCount(c
->argv
[1]);
3824 incrRefCount(c
->argv
[2]);
3827 removeExpire(c
->db
,c
->argv
[1]);
3828 addReply(c
, nx
? shared
.cone
: shared
.ok
);
3831 static void setCommand(redisClient
*c
) {
3832 setGenericCommand(c
,0);
3835 static void setnxCommand(redisClient
*c
) {
3836 setGenericCommand(c
,1);
3839 static int getGenericCommand(redisClient
*c
) {
3842 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
3845 if (o
->type
!= REDIS_STRING
) {
3846 addReply(c
,shared
.wrongtypeerr
);
3854 static void getCommand(redisClient
*c
) {
3855 getGenericCommand(c
);
3858 static void getsetCommand(redisClient
*c
) {
3859 if (getGenericCommand(c
) == REDIS_ERR
) return;
3860 if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) {
3861 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3863 incrRefCount(c
->argv
[1]);
3865 incrRefCount(c
->argv
[2]);
3867 removeExpire(c
->db
,c
->argv
[1]);
3870 static void mgetCommand(redisClient
*c
) {
3873 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
3874 for (j
= 1; j
< c
->argc
; j
++) {
3875 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
3877 addReply(c
,shared
.nullbulk
);
3879 if (o
->type
!= REDIS_STRING
) {
3880 addReply(c
,shared
.nullbulk
);
3888 static void msetGenericCommand(redisClient
*c
, int nx
) {
3889 int j
, busykeys
= 0;
3891 if ((c
->argc
% 2) == 0) {
3892 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
3895 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
3896 * set nothing at all if at least one already key exists. */
3898 for (j
= 1; j
< c
->argc
; j
+= 2) {
3899 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
3905 addReply(c
, shared
.czero
);
3909 for (j
= 1; j
< c
->argc
; j
+= 2) {
3912 tryObjectEncoding(c
->argv
[j
+1]);
3913 retval
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
3914 if (retval
== DICT_ERR
) {
3915 dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
3916 incrRefCount(c
->argv
[j
+1]);
3918 incrRefCount(c
->argv
[j
]);
3919 incrRefCount(c
->argv
[j
+1]);
3921 removeExpire(c
->db
,c
->argv
[j
]);
3923 server
.dirty
+= (c
->argc
-1)/2;
3924 addReply(c
, nx
? shared
.cone
: shared
.ok
);
3927 static void msetCommand(redisClient
*c
) {
3928 msetGenericCommand(c
,0);
3931 static void msetnxCommand(redisClient
*c
) {
3932 msetGenericCommand(c
,1);
3935 static void incrDecrCommand(redisClient
*c
, long long incr
) {
3940 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
3944 if (o
->type
!= REDIS_STRING
) {
3949 if (o
->encoding
== REDIS_ENCODING_RAW
)
3950 value
= strtoll(o
->ptr
, &eptr
, 10);
3951 else if (o
->encoding
== REDIS_ENCODING_INT
)
3952 value
= (long)o
->ptr
;
3954 redisAssert(1 != 1);
3959 o
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
3960 tryObjectEncoding(o
);
3961 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],o
);
3962 if (retval
== DICT_ERR
) {
3963 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
3964 removeExpire(c
->db
,c
->argv
[1]);
3966 incrRefCount(c
->argv
[1]);
3969 addReply(c
,shared
.colon
);
3971 addReply(c
,shared
.crlf
);
3974 static void incrCommand(redisClient
*c
) {
3975 incrDecrCommand(c
,1);
3978 static void decrCommand(redisClient
*c
) {
3979 incrDecrCommand(c
,-1);
3982 static void incrbyCommand(redisClient
*c
) {
3983 long long incr
= strtoll(c
->argv
[2]->ptr
, NULL
, 10);
3984 incrDecrCommand(c
,incr
);
3987 static void decrbyCommand(redisClient
*c
) {
3988 long long incr
= strtoll(c
->argv
[2]->ptr
, NULL
, 10);
3989 incrDecrCommand(c
,-incr
);
3992 static void appendCommand(redisClient
*c
) {
3997 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
3999 /* Create the key */
4000 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4001 incrRefCount(c
->argv
[1]);
4002 incrRefCount(c
->argv
[2]);
4003 totlen
= stringObjectLen(c
->argv
[2]);
4007 de
= dictFind(c
->db
->dict
,c
->argv
[1]);
4010 o
= dictGetEntryVal(de
);
4011 if (o
->type
!= REDIS_STRING
) {
4012 addReply(c
,shared
.wrongtypeerr
);
4015 /* If the object is specially encoded or shared we have to make
4017 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
4018 robj
*decoded
= getDecodedObject(o
);
4020 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
4021 decrRefCount(decoded
);
4022 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4025 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
4026 o
->ptr
= sdscatlen(o
->ptr
,
4027 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
4029 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
4030 (unsigned long) c
->argv
[2]->ptr
);
4032 totlen
= sdslen(o
->ptr
);
4035 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
4038 static void substrCommand(redisClient
*c
) {
4040 long start
= atoi(c
->argv
[2]->ptr
);
4041 long end
= atoi(c
->argv
[3]->ptr
);
4042 size_t rangelen
, strlen
;
4045 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4046 checkType(c
,o
,REDIS_STRING
)) return;
4048 o
= getDecodedObject(o
);
4049 strlen
= sdslen(o
->ptr
);
4051 /* convert negative indexes */
4052 if (start
< 0) start
= strlen
+start
;
4053 if (end
< 0) end
= strlen
+end
;
4054 if (start
< 0) start
= 0;
4055 if (end
< 0) end
= 0;
4057 /* indexes sanity checks */
4058 if (start
> end
|| (size_t)start
>= strlen
) {
4059 /* Out of range start or start > end result in null reply */
4060 addReply(c
,shared
.nullbulk
);
4064 if ((size_t)end
>= strlen
) end
= strlen
-1;
4065 rangelen
= (end
-start
)+1;
4067 /* Return the result */
4068 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4069 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4070 addReplySds(c
,range
);
4071 addReply(c
,shared
.crlf
);
4075 /* ========================= Type agnostic commands ========================= */
4077 static void delCommand(redisClient
*c
) {
4080 for (j
= 1; j
< c
->argc
; j
++) {
4081 if (deleteKey(c
->db
,c
->argv
[j
])) {
4086 addReplyLong(c
,deleted
);
4089 static void existsCommand(redisClient
*c
) {
4090 addReply(c
,lookupKeyRead(c
->db
,c
->argv
[1]) ? shared
.cone
: shared
.czero
);
4093 static void selectCommand(redisClient
*c
) {
4094 int id
= atoi(c
->argv
[1]->ptr
);
4096 if (selectDb(c
,id
) == REDIS_ERR
) {
4097 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4099 addReply(c
,shared
.ok
);
4103 static void randomkeyCommand(redisClient
*c
) {
4107 de
= dictGetRandomKey(c
->db
->dict
);
4108 if (!de
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break;
4111 addReply(c
,shared
.plus
);
4112 addReply(c
,shared
.crlf
);
4114 addReply(c
,shared
.plus
);
4115 addReply(c
,dictGetEntryKey(de
));
4116 addReply(c
,shared
.crlf
);
4120 static void keysCommand(redisClient
*c
) {
4123 sds pattern
= c
->argv
[1]->ptr
;
4124 int plen
= sdslen(pattern
);
4125 unsigned long numkeys
= 0;
4126 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4128 di
= dictGetIterator(c
->db
->dict
);
4130 decrRefCount(lenobj
);
4131 while((de
= dictNext(di
)) != NULL
) {
4132 robj
*keyobj
= dictGetEntryKey(de
);
4134 sds key
= keyobj
->ptr
;
4135 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4136 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4137 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4138 addReplyBulk(c
,keyobj
);
4143 dictReleaseIterator(di
);
4144 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4147 static void dbsizeCommand(redisClient
*c
) {
4149 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4152 static void lastsaveCommand(redisClient
*c
) {
4154 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4157 static void typeCommand(redisClient
*c
) {
4161 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4166 case REDIS_STRING
: type
= "+string"; break;
4167 case REDIS_LIST
: type
= "+list"; break;
4168 case REDIS_SET
: type
= "+set"; break;
4169 case REDIS_ZSET
: type
= "+zset"; break;
4170 case REDIS_HASH
: type
= "+hash"; break;
4171 default: type
= "+unknown"; break;
4174 addReplySds(c
,sdsnew(type
));
4175 addReply(c
,shared
.crlf
);
4178 static void saveCommand(redisClient
*c
) {
4179 if (server
.bgsavechildpid
!= -1) {
4180 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4183 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4184 addReply(c
,shared
.ok
);
4186 addReply(c
,shared
.err
);
4190 static void bgsaveCommand(redisClient
*c
) {
4191 if (server
.bgsavechildpid
!= -1) {
4192 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4195 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4196 char *status
= "+Background saving started\r\n";
4197 addReplySds(c
,sdsnew(status
));
4199 addReply(c
,shared
.err
);
4203 static void shutdownCommand(redisClient
*c
) {
4204 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4205 /* Kill the saving child if there is a background saving in progress.
4206 We want to avoid race conditions, for instance our saving child may
4207 overwrite the synchronous saving did by SHUTDOWN. */
4208 if (server
.bgsavechildpid
!= -1) {
4209 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4210 kill(server
.bgsavechildpid
,SIGKILL
);
4211 rdbRemoveTempFile(server
.bgsavechildpid
);
4213 if (server
.appendonly
) {
4214 /* Append only file: fsync() the AOF and exit */
4215 fsync(server
.appendfd
);
4216 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4219 /* Snapshotting. Perform a SYNC SAVE and exit */
4220 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4221 if (server
.daemonize
)
4222 unlink(server
.pidfile
);
4223 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4224 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4225 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4228 /* Ooops.. error saving! The best we can do is to continue
4229 * operating. Note that if there was a background saving process,
4230 * in the next cron() Redis will be notified that the background
4231 * saving aborted, handling special stuff like slaves pending for
4232 * synchronization... */
4233 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4235 sdsnew("-ERR can't quit, problems saving the DB\r\n"));
4240 static void renameGenericCommand(redisClient
*c
, int nx
) {
4243 /* To use the same key as src and dst is probably an error */
4244 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4245 addReply(c
,shared
.sameobjecterr
);
4249 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4253 deleteIfVolatile(c
->db
,c
->argv
[2]);
4254 if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) {
4257 addReply(c
,shared
.czero
);
4260 dictReplace(c
->db
->dict
,c
->argv
[2],o
);
4262 incrRefCount(c
->argv
[2]);
4264 deleteKey(c
->db
,c
->argv
[1]);
4266 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4269 static void renameCommand(redisClient
*c
) {
4270 renameGenericCommand(c
,0);
4273 static void renamenxCommand(redisClient
*c
) {
4274 renameGenericCommand(c
,1);
4277 static void moveCommand(redisClient
*c
) {
4282 /* Obtain source and target DB pointers */
4285 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4286 addReply(c
,shared
.outofrangeerr
);
4290 selectDb(c
,srcid
); /* Back to the source DB */
4292 /* If the user is moving using as target the same
4293 * DB as the source DB it is probably an error. */
4295 addReply(c
,shared
.sameobjecterr
);
4299 /* Check if the element exists and get a reference */
4300 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4302 addReply(c
,shared
.czero
);
4306 /* Try to add the element to the target DB */
4307 deleteIfVolatile(dst
,c
->argv
[1]);
4308 if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) {
4309 addReply(c
,shared
.czero
);
4312 incrRefCount(c
->argv
[1]);
4315 /* OK! key moved, free the entry in the source DB */
4316 deleteKey(src
,c
->argv
[1]);
4318 addReply(c
,shared
.cone
);
4321 /* =================================== Lists ================================ */
4322 static void pushGenericCommand(redisClient
*c
, int where
) {
4326 lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4328 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4329 addReply(c
,shared
.cone
);
4332 lobj
= createListObject();
4334 if (where
== REDIS_HEAD
) {
4335 listAddNodeHead(list
,c
->argv
[2]);
4337 listAddNodeTail(list
,c
->argv
[2]);
4339 dictAdd(c
->db
->dict
,c
->argv
[1],lobj
);
4340 incrRefCount(c
->argv
[1]);
4341 incrRefCount(c
->argv
[2]);
4343 if (lobj
->type
!= REDIS_LIST
) {
4344 addReply(c
,shared
.wrongtypeerr
);
4347 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4348 addReply(c
,shared
.cone
);
4352 if (where
== REDIS_HEAD
) {
4353 listAddNodeHead(list
,c
->argv
[2]);
4355 listAddNodeTail(list
,c
->argv
[2]);
4357 incrRefCount(c
->argv
[2]);
4360 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",listLength(list
)));
4363 static void lpushCommand(redisClient
*c
) {
4364 pushGenericCommand(c
,REDIS_HEAD
);
4367 static void rpushCommand(redisClient
*c
) {
4368 pushGenericCommand(c
,REDIS_TAIL
);
4371 static void llenCommand(redisClient
*c
) {
4375 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4376 checkType(c
,o
,REDIS_LIST
)) return;
4379 addReplyUlong(c
,listLength(l
));
4382 static void lindexCommand(redisClient
*c
) {
4384 int index
= atoi(c
->argv
[2]->ptr
);
4388 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4389 checkType(c
,o
,REDIS_LIST
)) return;
4392 ln
= listIndex(list
, index
);
4394 addReply(c
,shared
.nullbulk
);
4396 robj
*ele
= listNodeValue(ln
);
4397 addReplyBulk(c
,ele
);
4401 static void lsetCommand(redisClient
*c
) {
4403 int index
= atoi(c
->argv
[2]->ptr
);
4407 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
||
4408 checkType(c
,o
,REDIS_LIST
)) return;
4411 ln
= listIndex(list
, index
);
4413 addReply(c
,shared
.outofrangeerr
);
4415 robj
*ele
= listNodeValue(ln
);
4418 listNodeValue(ln
) = c
->argv
[3];
4419 incrRefCount(c
->argv
[3]);
4420 addReply(c
,shared
.ok
);
4425 static void popGenericCommand(redisClient
*c
, int where
) {
4430 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4431 checkType(c
,o
,REDIS_LIST
)) return;
4434 if (where
== REDIS_HEAD
)
4435 ln
= listFirst(list
);
4437 ln
= listLast(list
);
4440 addReply(c
,shared
.nullbulk
);
4442 robj
*ele
= listNodeValue(ln
);
4443 addReplyBulk(c
,ele
);
4444 listDelNode(list
,ln
);
4445 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4450 static void lpopCommand(redisClient
*c
) {
4451 popGenericCommand(c
,REDIS_HEAD
);
4454 static void rpopCommand(redisClient
*c
) {
4455 popGenericCommand(c
,REDIS_TAIL
);
4458 static void lrangeCommand(redisClient
*c
) {
4460 int start
= atoi(c
->argv
[2]->ptr
);
4461 int end
= atoi(c
->argv
[3]->ptr
);
4468 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL
||
4469 checkType(c
,o
,REDIS_LIST
)) return;
4471 llen
= listLength(list
);
4473 /* convert negative indexes */
4474 if (start
< 0) start
= llen
+start
;
4475 if (end
< 0) end
= llen
+end
;
4476 if (start
< 0) start
= 0;
4477 if (end
< 0) end
= 0;
4479 /* indexes sanity checks */
4480 if (start
> end
|| start
>= llen
) {
4481 /* Out of range start or start > end result in empty list */
4482 addReply(c
,shared
.emptymultibulk
);
4485 if (end
>= llen
) end
= llen
-1;
4486 rangelen
= (end
-start
)+1;
4488 /* Return the result in form of a multi-bulk reply */
4489 ln
= listIndex(list
, start
);
4490 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
4491 for (j
= 0; j
< rangelen
; j
++) {
4492 ele
= listNodeValue(ln
);
4493 addReplyBulk(c
,ele
);
4498 static void ltrimCommand(redisClient
*c
) {
4500 int start
= atoi(c
->argv
[2]->ptr
);
4501 int end
= atoi(c
->argv
[3]->ptr
);
4503 int j
, ltrim
, rtrim
;
4507 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
4508 checkType(c
,o
,REDIS_LIST
)) return;
4510 llen
= listLength(list
);
4512 /* convert negative indexes */
4513 if (start
< 0) start
= llen
+start
;
4514 if (end
< 0) end
= llen
+end
;
4515 if (start
< 0) start
= 0;
4516 if (end
< 0) end
= 0;
4518 /* indexes sanity checks */
4519 if (start
> end
|| start
>= llen
) {
4520 /* Out of range start or start > end result in empty list */
4524 if (end
>= llen
) end
= llen
-1;
4529 /* Remove list elements to perform the trim */
4530 for (j
= 0; j
< ltrim
; j
++) {
4531 ln
= listFirst(list
);
4532 listDelNode(list
,ln
);
4534 for (j
= 0; j
< rtrim
; j
++) {
4535 ln
= listLast(list
);
4536 listDelNode(list
,ln
);
4538 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4540 addReply(c
,shared
.ok
);
4543 static void lremCommand(redisClient
*c
) {
4546 listNode
*ln
, *next
;
4547 int toremove
= atoi(c
->argv
[2]->ptr
);
4551 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4552 checkType(c
,o
,REDIS_LIST
)) return;
4556 toremove
= -toremove
;
4559 ln
= fromtail
? list
->tail
: list
->head
;
4561 robj
*ele
= listNodeValue(ln
);
4563 next
= fromtail
? ln
->prev
: ln
->next
;
4564 if (compareStringObjects(ele
,c
->argv
[3]) == 0) {
4565 listDelNode(list
,ln
);
4568 if (toremove
&& removed
== toremove
) break;
4572 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4573 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
4576 /* This is the semantic of this command:
4577 * RPOPLPUSH srclist dstlist:
4578 * IF LLEN(srclist) > 0
4579 * element = RPOP srclist
4580 * LPUSH dstlist element
4587 * The idea is to be able to get an element from a list in a reliable way
4588 * since the element is not just returned but pushed against another list
4589 * as well. This command was originally proposed by Ezra Zygmuntowicz.
4591 static void rpoplpushcommand(redisClient
*c
) {
4596 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4597 checkType(c
,sobj
,REDIS_LIST
)) return;
4598 srclist
= sobj
->ptr
;
4599 ln
= listLast(srclist
);
4602 addReply(c
,shared
.nullbulk
);
4604 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4605 robj
*ele
= listNodeValue(ln
);
4608 if (dobj
&& dobj
->type
!= REDIS_LIST
) {
4609 addReply(c
,shared
.wrongtypeerr
);
4613 /* Add the element to the target list (unless it's directly
4614 * passed to some BLPOP-ing client */
4615 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) {
4617 /* Create the list if the key does not exist */
4618 dobj
= createListObject();
4619 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
);
4620 incrRefCount(c
->argv
[2]);
4622 dstlist
= dobj
->ptr
;
4623 listAddNodeHead(dstlist
,ele
);
4627 /* Send the element to the client as reply as well */
4628 addReplyBulk(c
,ele
);
4630 /* Finally remove the element from the source list */
4631 listDelNode(srclist
,ln
);
4632 if (listLength(srclist
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4637 /* ==================================== Sets ================================ */
4639 static void saddCommand(redisClient
*c
) {
4642 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4644 set
= createSetObject();
4645 dictAdd(c
->db
->dict
,c
->argv
[1],set
);
4646 incrRefCount(c
->argv
[1]);
4648 if (set
->type
!= REDIS_SET
) {
4649 addReply(c
,shared
.wrongtypeerr
);
4653 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
4654 incrRefCount(c
->argv
[2]);
4656 addReply(c
,shared
.cone
);
4658 addReply(c
,shared
.czero
);
4662 static void sremCommand(redisClient
*c
) {
4665 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4666 checkType(c
,set
,REDIS_SET
)) return;
4668 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
4670 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4671 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4672 addReply(c
,shared
.cone
);
4674 addReply(c
,shared
.czero
);
4678 static void smoveCommand(redisClient
*c
) {
4679 robj
*srcset
, *dstset
;
4681 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4682 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4684 /* If the source key does not exist return 0, if it's of the wrong type
4686 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
4687 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
4690 /* Error if the destination key is not a set as well */
4691 if (dstset
&& dstset
->type
!= REDIS_SET
) {
4692 addReply(c
,shared
.wrongtypeerr
);
4695 /* Remove the element from the source set */
4696 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
4697 /* Key not found in the src set! return zero */
4698 addReply(c
,shared
.czero
);
4701 if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset
!= dstset
)
4702 deleteKey(c
->db
,c
->argv
[1]);
4704 /* Add the element to the destination set */
4706 dstset
= createSetObject();
4707 dictAdd(c
->db
->dict
,c
->argv
[2],dstset
);
4708 incrRefCount(c
->argv
[2]);
4710 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
4711 incrRefCount(c
->argv
[3]);
4712 addReply(c
,shared
.cone
);
4715 static void sismemberCommand(redisClient
*c
) {
4718 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4719 checkType(c
,set
,REDIS_SET
)) return;
4721 if (dictFind(set
->ptr
,c
->argv
[2]))
4722 addReply(c
,shared
.cone
);
4724 addReply(c
,shared
.czero
);
4727 static void scardCommand(redisClient
*c
) {
4731 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4732 checkType(c
,o
,REDIS_SET
)) return;
4735 addReplyUlong(c
,dictSize(s
));
4738 static void spopCommand(redisClient
*c
) {
4742 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4743 checkType(c
,set
,REDIS_SET
)) return;
4745 de
= dictGetRandomKey(set
->ptr
);
4747 addReply(c
,shared
.nullbulk
);
4749 robj
*ele
= dictGetEntryKey(de
);
4751 addReplyBulk(c
,ele
);
4752 dictDelete(set
->ptr
,ele
);
4753 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4754 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4759 static void srandmemberCommand(redisClient
*c
) {
4763 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4764 checkType(c
,set
,REDIS_SET
)) return;
4766 de
= dictGetRandomKey(set
->ptr
);
4768 addReply(c
,shared
.nullbulk
);
4770 robj
*ele
= dictGetEntryKey(de
);
4772 addReplyBulk(c
,ele
);
4776 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
4777 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
4779 return dictSize(*d1
)-dictSize(*d2
);
4782 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
4783 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4786 robj
*lenobj
= NULL
, *dstset
= NULL
;
4787 unsigned long j
, cardinality
= 0;
4789 for (j
= 0; j
< setsnum
; j
++) {
4793 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4794 lookupKeyRead(c
->db
,setskeys
[j
]);
4798 if (deleteKey(c
->db
,dstkey
))
4800 addReply(c
,shared
.czero
);
4802 addReply(c
,shared
.nullmultibulk
);
4806 if (setobj
->type
!= REDIS_SET
) {
4808 addReply(c
,shared
.wrongtypeerr
);
4811 dv
[j
] = setobj
->ptr
;
4813 /* Sort sets from the smallest to largest, this will improve our
4814 * algorithm's performace */
4815 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
4817 /* The first thing we should output is the total number of elements...
4818 * since this is a multi-bulk write, but at this stage we don't know
4819 * the intersection set size, so we use a trick, append an empty object
4820 * to the output list and save the pointer to later modify it with the
4823 lenobj
= createObject(REDIS_STRING
,NULL
);
4825 decrRefCount(lenobj
);
4827 /* If we have a target key where to store the resulting set
4828 * create this key with an empty set inside */
4829 dstset
= createSetObject();
4832 /* Iterate all the elements of the first (smallest) set, and test
4833 * the element against all the other sets, if at least one set does
4834 * not include the element it is discarded */
4835 di
= dictGetIterator(dv
[0]);
4837 while((de
= dictNext(di
)) != NULL
) {
4840 for (j
= 1; j
< setsnum
; j
++)
4841 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
4843 continue; /* at least one set does not contain the member */
4844 ele
= dictGetEntryKey(de
);
4846 addReplyBulk(c
,ele
);
4849 dictAdd(dstset
->ptr
,ele
,NULL
);
4853 dictReleaseIterator(di
);
4856 /* Store the resulting set into the target, if the intersection
4857 * is not an empty set. */
4858 deleteKey(c
->db
,dstkey
);
4859 if (dictSize((dict
*)dstset
->ptr
) > 0) {
4860 dictAdd(c
->db
->dict
,dstkey
,dstset
);
4861 incrRefCount(dstkey
);
4862 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
4864 decrRefCount(dstset
);
4865 addReply(c
,shared
.czero
);
4869 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
4874 static void sinterCommand(redisClient
*c
) {
4875 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
4878 static void sinterstoreCommand(redisClient
*c
) {
4879 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
4882 #define REDIS_OP_UNION 0
4883 #define REDIS_OP_DIFF 1
4884 #define REDIS_OP_INTER 2
4886 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
4887 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4890 robj
*dstset
= NULL
;
4891 int j
, cardinality
= 0;
4893 for (j
= 0; j
< setsnum
; j
++) {
4897 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4898 lookupKeyRead(c
->db
,setskeys
[j
]);
4903 if (setobj
->type
!= REDIS_SET
) {
4905 addReply(c
,shared
.wrongtypeerr
);
4908 dv
[j
] = setobj
->ptr
;
4911 /* We need a temp set object to store our union. If the dstkey
4912 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
4913 * this set object will be the resulting object to set into the target key*/
4914 dstset
= createSetObject();
4916 /* Iterate all the elements of all the sets, add every element a single
4917 * time to the result set */
4918 for (j
= 0; j
< setsnum
; j
++) {
4919 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
4920 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
4922 di
= dictGetIterator(dv
[j
]);
4924 while((de
= dictNext(di
)) != NULL
) {
4927 /* dictAdd will not add the same element multiple times */
4928 ele
= dictGetEntryKey(de
);
4929 if (op
== REDIS_OP_UNION
|| j
== 0) {
4930 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
4934 } else if (op
== REDIS_OP_DIFF
) {
4935 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
4940 dictReleaseIterator(di
);
4942 /* result set is empty? Exit asap. */
4943 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
4946 /* Output the content of the resulting set, if not in STORE mode */
4948 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
4949 di
= dictGetIterator(dstset
->ptr
);
4950 while((de
= dictNext(di
)) != NULL
) {
4953 ele
= dictGetEntryKey(de
);
4954 addReplyBulk(c
,ele
);
4956 dictReleaseIterator(di
);
4957 decrRefCount(dstset
);
4959 /* If we have a target key where to store the resulting set
4960 * create this key with the result set inside */
4961 deleteKey(c
->db
,dstkey
);
4962 if (dictSize((dict
*)dstset
->ptr
) > 0) {
4963 dictAdd(c
->db
->dict
,dstkey
,dstset
);
4964 incrRefCount(dstkey
);
4965 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
4967 decrRefCount(dstset
);
4968 addReply(c
,shared
.czero
);
4975 static void sunionCommand(redisClient
*c
) {
4976 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
4979 static void sunionstoreCommand(redisClient
*c
) {
4980 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
4983 static void sdiffCommand(redisClient
*c
) {
4984 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
4987 static void sdiffstoreCommand(redisClient
*c
) {
4988 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
4991 /* ==================================== ZSets =============================== */
4993 /* ZSETs are ordered sets using two data structures to hold the same elements
4994 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
4997 * The elements are added to an hash table mapping Redis objects to scores.
4998 * At the same time the elements are added to a skip list mapping scores
4999 * to Redis objects (so objects are sorted by scores in this "view"). */
5001 /* This skiplist implementation is almost a C translation of the original
5002 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
5003 * Alternative to Balanced Trees", modified in three ways:
5004 * a) this implementation allows for repeated values.
5005 * b) the comparison is not just by key (our 'score') but by satellite data.
5006 * c) there is a back pointer, so it's a doubly linked list with the back
5007 * pointers being only at "level 1". This allows to traverse the list
5008 * from tail to head, useful for ZREVRANGE. */
5010 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
5011 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
5013 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
5015 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
5021 static zskiplist
*zslCreate(void) {
5025 zsl
= zmalloc(sizeof(*zsl
));
5028 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
5029 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
5030 zsl
->header
->forward
[j
] = NULL
;
5032 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
5033 if (j
< ZSKIPLIST_MAXLEVEL
-1)
5034 zsl
->header
->span
[j
] = 0;
5036 zsl
->header
->backward
= NULL
;
5041 static void zslFreeNode(zskiplistNode
*node
) {
5042 decrRefCount(node
->obj
);
5043 zfree(node
->forward
);
5048 static void zslFree(zskiplist
*zsl
) {
5049 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
5051 zfree(zsl
->header
->forward
);
5052 zfree(zsl
->header
->span
);
5055 next
= node
->forward
[0];
5062 static int zslRandomLevel(void) {
5064 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
5066 return (level
<ZSKIPLIST_MAXLEVEL
) ? level
: ZSKIPLIST_MAXLEVEL
;
5069 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
5070 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5071 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
5075 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5076 /* store rank that is crossed to reach the insert position */
5077 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
5079 while (x
->forward
[i
] &&
5080 (x
->forward
[i
]->score
< score
||
5081 (x
->forward
[i
]->score
== score
&&
5082 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
5083 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
5088 /* we assume the key is not already inside, since we allow duplicated
5089 * scores, and the re-insertion of score and redis object should never
5090 * happpen since the caller of zslInsert() should test in the hash table
5091 * if the element is already inside or not. */
5092 level
= zslRandomLevel();
5093 if (level
> zsl
->level
) {
5094 for (i
= zsl
->level
; i
< level
; i
++) {
5096 update
[i
] = zsl
->header
;
5097 update
[i
]->span
[i
-1] = zsl
->length
;
5101 x
= zslCreateNode(level
,score
,obj
);
5102 for (i
= 0; i
< level
; i
++) {
5103 x
->forward
[i
] = update
[i
]->forward
[i
];
5104 update
[i
]->forward
[i
] = x
;
5106 /* update span covered by update[i] as x is inserted here */
5108 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5109 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5113 /* increment span for untouched levels */
5114 for (i
= level
; i
< zsl
->level
; i
++) {
5115 update
[i
]->span
[i
-1]++;
5118 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
5120 x
->forward
[0]->backward
= x
;
5126 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
5127 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
5129 for (i
= 0; i
< zsl
->level
; i
++) {
5130 if (update
[i
]->forward
[i
] == x
) {
5132 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5134 update
[i
]->forward
[i
] = x
->forward
[i
];
5136 /* invariant: i > 0, because update[0]->forward[0]
5137 * is always equal to x */
5138 update
[i
]->span
[i
-1] -= 1;
5141 if (x
->forward
[0]) {
5142 x
->forward
[0]->backward
= x
->backward
;
5144 zsl
->tail
= x
->backward
;
5146 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5151 /* Delete an element with matching score/object from the skiplist. */
5152 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
5153 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5157 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5158 while (x
->forward
[i
] &&
5159 (x
->forward
[i
]->score
< score
||
5160 (x
->forward
[i
]->score
== score
&&
5161 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
5165 /* We may have multiple elements with the same score, what we need
5166 * is to find the element with both the right score and object. */
5168 if (x
&& score
== x
->score
&& compareStringObjects(x
->obj
,obj
) == 0) {
5169 zslDeleteNode(zsl
, x
, update
);
5173 return 0; /* not found */
5175 return 0; /* not found */
5178 /* Delete all the elements with score between min and max from the skiplist.
5179 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5180 * Note that this function takes the reference to the hash table view of the
5181 * sorted set, in order to remove the elements from the hash table too. */
5182 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
5183 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5184 unsigned long removed
= 0;
5188 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5189 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
5193 /* We may have multiple elements with the same score, what we need
5194 * is to find the element with both the right score and object. */
5196 while (x
&& x
->score
<= max
) {
5197 zskiplistNode
*next
= x
->forward
[0];
5198 zslDeleteNode(zsl
, x
, update
);
5199 dictDelete(dict
,x
->obj
);
5204 return removed
; /* not found */
5207 /* Delete all the elements with rank between start and end from the skiplist.
5208 * Start and end are inclusive. Note that start and end need to be 1-based */
5209 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
5210 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5211 unsigned long traversed
= 0, removed
= 0;
5215 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5216 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
5217 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5225 while (x
&& traversed
<= end
) {
5226 zskiplistNode
*next
= x
->forward
[0];
5227 zslDeleteNode(zsl
, x
, update
);
5228 dictDelete(dict
,x
->obj
);
5237 /* Find the first node having a score equal or greater than the specified one.
5238 * Returns NULL if there is no match. */
5239 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
5244 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5245 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
5248 /* We may have multiple elements with the same score, what we need
5249 * is to find the element with both the right score and object. */
5250 return x
->forward
[0];
5253 /* Find the rank for an element by both score and key.
5254 * Returns 0 when the element cannot be found, rank otherwise.
5255 * Note that the rank is 1-based due to the span of zsl->header to the
5257 static unsigned long zslGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
5259 unsigned long rank
= 0;
5263 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5264 while (x
->forward
[i
] &&
5265 (x
->forward
[i
]->score
< score
||
5266 (x
->forward
[i
]->score
== score
&&
5267 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
5268 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
5272 /* x might be equal to zsl->header, so test if obj is non-NULL */
5273 if (x
->obj
&& compareStringObjects(x
->obj
,o
) == 0) {
5280 /* Finds an element by its rank. The rank argument needs to be 1-based. */
5281 zskiplistNode
* zslGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
5283 unsigned long traversed
= 0;
5287 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5288 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
5290 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5293 if (traversed
== rank
) {
5300 /* The actual Z-commands implementations */
5302 /* This generic command implements both ZADD and ZINCRBY.
5303 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
5304 * the increment if the operation is a ZINCRBY (doincrement == 1). */
5305 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
5310 zsetobj
= lookupKeyWrite(c
->db
,key
);
5311 if (zsetobj
== NULL
) {
5312 zsetobj
= createZsetObject();
5313 dictAdd(c
->db
->dict
,key
,zsetobj
);
5316 if (zsetobj
->type
!= REDIS_ZSET
) {
5317 addReply(c
,shared
.wrongtypeerr
);
5323 /* Ok now since we implement both ZADD and ZINCRBY here the code
5324 * needs to handle the two different conditions. It's all about setting
5325 * '*score', that is, the new score to set, to the right value. */
5326 score
= zmalloc(sizeof(double));
5330 /* Read the old score. If the element was not present starts from 0 */
5331 de
= dictFind(zs
->dict
,ele
);
5333 double *oldscore
= dictGetEntryVal(de
);
5334 *score
= *oldscore
+ scoreval
;
5342 /* What follows is a simple remove and re-insert operation that is common
5343 * to both ZADD and ZINCRBY... */
5344 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
5345 /* case 1: New element */
5346 incrRefCount(ele
); /* added to hash */
5347 zslInsert(zs
->zsl
,*score
,ele
);
5348 incrRefCount(ele
); /* added to skiplist */
5351 addReplyDouble(c
,*score
);
5353 addReply(c
,shared
.cone
);
5358 /* case 2: Score update operation */
5359 de
= dictFind(zs
->dict
,ele
);
5360 redisAssert(de
!= NULL
);
5361 oldscore
= dictGetEntryVal(de
);
5362 if (*score
!= *oldscore
) {
5365 /* Remove and insert the element in the skip list with new score */
5366 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
5367 redisAssert(deleted
!= 0);
5368 zslInsert(zs
->zsl
,*score
,ele
);
5370 /* Update the score in the hash table */
5371 dictReplace(zs
->dict
,ele
,score
);
5377 addReplyDouble(c
,*score
);
5379 addReply(c
,shared
.czero
);
5383 static void zaddCommand(redisClient
*c
) {
5386 scoreval
= strtod(c
->argv
[2]->ptr
,NULL
);
5387 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
5390 static void zincrbyCommand(redisClient
*c
) {
5393 scoreval
= strtod(c
->argv
[2]->ptr
,NULL
);
5394 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
5397 static void zremCommand(redisClient
*c
) {
5404 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5405 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5408 de
= dictFind(zs
->dict
,c
->argv
[2]);
5410 addReply(c
,shared
.czero
);
5413 /* Delete from the skiplist */
5414 oldscore
= dictGetEntryVal(de
);
5415 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
5416 redisAssert(deleted
!= 0);
5418 /* Delete from the hash table */
5419 dictDelete(zs
->dict
,c
->argv
[2]);
5420 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5421 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5423 addReply(c
,shared
.cone
);
5426 static void zremrangebyscoreCommand(redisClient
*c
) {
5427 double min
= strtod(c
->argv
[2]->ptr
,NULL
);
5428 double max
= strtod(c
->argv
[3]->ptr
,NULL
);
5433 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5434 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5437 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
5438 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5439 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5440 server
.dirty
+= deleted
;
5441 addReplyLong(c
,deleted
);
5444 static void zremrangebyrankCommand(redisClient
*c
) {
5445 int start
= atoi(c
->argv
[2]->ptr
);
5446 int end
= atoi(c
->argv
[3]->ptr
);
5452 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5453 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5455 llen
= zs
->zsl
->length
;
5457 /* convert negative indexes */
5458 if (start
< 0) start
= llen
+start
;
5459 if (end
< 0) end
= llen
+end
;
5460 if (start
< 0) start
= 0;
5461 if (end
< 0) end
= 0;
5463 /* indexes sanity checks */
5464 if (start
> end
|| start
>= llen
) {
5465 addReply(c
,shared
.czero
);
5468 if (end
>= llen
) end
= llen
-1;
5470 /* increment start and end because zsl*Rank functions
5471 * use 1-based rank */
5472 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
5473 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5474 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5475 server
.dirty
+= deleted
;
5476 addReplyLong(c
, deleted
);
5484 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
5485 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
5486 unsigned long size1
, size2
;
5487 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
5488 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
5489 return size1
- size2
;
5492 #define REDIS_AGGR_SUM 1
5493 #define REDIS_AGGR_MIN 2
5494 #define REDIS_AGGR_MAX 3
5496 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
5497 if (aggregate
== REDIS_AGGR_SUM
) {
5498 *target
= *target
+ val
;
5499 } else if (aggregate
== REDIS_AGGR_MIN
) {
5500 *target
= val
< *target
? val
: *target
;
5501 } else if (aggregate
== REDIS_AGGR_MAX
) {
5502 *target
= val
> *target
? val
: *target
;
5505 redisAssert(0 != 0);
5509 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
5511 int aggregate
= REDIS_AGGR_SUM
;
5518 /* expect zsetnum input keys to be given */
5519 zsetnum
= atoi(c
->argv
[2]->ptr
);
5521 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNION/ZINTER\r\n"));
5525 /* test if the expected number of keys would overflow */
5526 if (3+zsetnum
> c
->argc
) {
5527 addReply(c
,shared
.syntaxerr
);
5531 /* read keys to be used for input */
5532 src
= zmalloc(sizeof(zsetopsrc
) * zsetnum
);
5533 for (i
= 0, j
= 3; i
< zsetnum
; i
++, j
++) {
5534 robj
*zsetobj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
5538 if (zsetobj
->type
!= REDIS_ZSET
) {
5540 addReply(c
,shared
.wrongtypeerr
);
5543 src
[i
].dict
= ((zset
*)zsetobj
->ptr
)->dict
;
5546 /* default all weights to 1 */
5547 src
[i
].weight
= 1.0;
5550 /* parse optional extra arguments */
5552 int remaining
= c
->argc
- j
;
5555 if (remaining
>= (zsetnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
5557 for (i
= 0; i
< zsetnum
; i
++, j
++, remaining
--) {
5558 src
[i
].weight
= strtod(c
->argv
[j
]->ptr
, NULL
);
5560 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
5562 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
5563 aggregate
= REDIS_AGGR_SUM
;
5564 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
5565 aggregate
= REDIS_AGGR_MIN
;
5566 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
5567 aggregate
= REDIS_AGGR_MAX
;
5570 addReply(c
,shared
.syntaxerr
);
5576 addReply(c
,shared
.syntaxerr
);
5582 /* sort sets from the smallest to largest, this will improve our
5583 * algorithm's performance */
5584 qsort(src
,zsetnum
,sizeof(zsetopsrc
), qsortCompareZsetopsrcByCardinality
);
5586 dstobj
= createZsetObject();
5587 dstzset
= dstobj
->ptr
;
5589 if (op
== REDIS_OP_INTER
) {
5590 /* skip going over all entries if the smallest zset is NULL or empty */
5591 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
5592 /* precondition: as src[0].dict is non-empty and the zsets are ordered
5593 * from small to large, all src[i > 0].dict are non-empty too */
5594 di
= dictGetIterator(src
[0].dict
);
5595 while((de
= dictNext(di
)) != NULL
) {
5596 double *score
= zmalloc(sizeof(double)), value
;
5597 *score
= src
[0].weight
* (*(double*)dictGetEntryVal(de
));
5599 for (j
= 1; j
< zsetnum
; j
++) {
5600 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5602 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5603 zunionInterAggregate(score
, value
, aggregate
);
5609 /* skip entry when not present in every source dict */
5613 robj
*o
= dictGetEntryKey(de
);
5614 dictAdd(dstzset
->dict
,o
,score
);
5615 incrRefCount(o
); /* added to dictionary */
5616 zslInsert(dstzset
->zsl
,*score
,o
);
5617 incrRefCount(o
); /* added to skiplist */
5620 dictReleaseIterator(di
);
5622 } else if (op
== REDIS_OP_UNION
) {
5623 for (i
= 0; i
< zsetnum
; i
++) {
5624 if (!src
[i
].dict
) continue;
5626 di
= dictGetIterator(src
[i
].dict
);
5627 while((de
= dictNext(di
)) != NULL
) {
5628 /* skip key when already processed */
5629 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
5631 double *score
= zmalloc(sizeof(double)), value
;
5632 *score
= src
[i
].weight
* (*(double*)dictGetEntryVal(de
));
5634 /* because the zsets are sorted by size, its only possible
5635 * for sets at larger indices to hold this entry */
5636 for (j
= (i
+1); j
< zsetnum
; j
++) {
5637 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5639 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5640 zunionInterAggregate(score
, value
, aggregate
);
5644 robj
*o
= dictGetEntryKey(de
);
5645 dictAdd(dstzset
->dict
,o
,score
);
5646 incrRefCount(o
); /* added to dictionary */
5647 zslInsert(dstzset
->zsl
,*score
,o
);
5648 incrRefCount(o
); /* added to skiplist */
5650 dictReleaseIterator(di
);
5653 /* unknown operator */
5654 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
5657 deleteKey(c
->db
,dstkey
);
5658 if (dstzset
->zsl
->length
) {
5659 dictAdd(c
->db
->dict
,dstkey
,dstobj
);
5660 incrRefCount(dstkey
);
5661 addReplyLong(c
, dstzset
->zsl
->length
);
5664 decrRefCount(dstobj
);
5665 addReply(c
, shared
.czero
);
5670 static void zunionCommand(redisClient
*c
) {
5671 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
5674 static void zinterCommand(redisClient
*c
) {
5675 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
5678 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
5680 int start
= atoi(c
->argv
[2]->ptr
);
5681 int end
= atoi(c
->argv
[3]->ptr
);
5690 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
5692 } else if (c
->argc
>= 5) {
5693 addReply(c
,shared
.syntaxerr
);
5697 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL
||
5698 checkType(c
,o
,REDIS_ZSET
)) return;
5703 /* convert negative indexes */
5704 if (start
< 0) start
= llen
+start
;
5705 if (end
< 0) end
= llen
+end
;
5706 if (start
< 0) start
= 0;
5707 if (end
< 0) end
= 0;
5709 /* indexes sanity checks */
5710 if (start
> end
|| start
>= llen
) {
5711 /* Out of range start or start > end result in empty list */
5712 addReply(c
,shared
.emptymultibulk
);
5715 if (end
>= llen
) end
= llen
-1;
5716 rangelen
= (end
-start
)+1;
5718 /* check if starting point is trivial, before searching
5719 * the element in log(N) time */
5721 ln
= start
== 0 ? zsl
->tail
: zslGetElementByRank(zsl
, llen
-start
);
5724 zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1);
5727 /* Return the result in form of a multi-bulk reply */
5728 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
5729 withscores
? (rangelen
*2) : rangelen
));
5730 for (j
= 0; j
< rangelen
; j
++) {
5732 addReplyBulk(c
,ele
);
5734 addReplyDouble(c
,ln
->score
);
5735 ln
= reverse
? ln
->backward
: ln
->forward
[0];
5739 static void zrangeCommand(redisClient
*c
) {
5740 zrangeGenericCommand(c
,0);
5743 static void zrevrangeCommand(redisClient
*c
) {
5744 zrangeGenericCommand(c
,1);
5747 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
5748 * If justcount is non-zero, just the count is returned. */
5749 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
5752 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
5753 int offset
= 0, limit
= -1;
5757 /* Parse the min-max interval. If one of the values is prefixed
5758 * by the "(" character, it's considered "open". For instance
5759 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
5760 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
5761 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
5762 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
5765 min
= strtod(c
->argv
[2]->ptr
,NULL
);
5767 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
5768 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
5771 max
= strtod(c
->argv
[3]->ptr
,NULL
);
5774 /* Parse "WITHSCORES": note that if the command was called with
5775 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
5776 * enter the following paths to parse WITHSCORES and LIMIT. */
5777 if (c
->argc
== 5 || c
->argc
== 8) {
5778 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
5783 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
5787 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
5792 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
5793 addReply(c
,shared
.syntaxerr
);
5795 } else if (c
->argc
== (7 + withscores
)) {
5796 offset
= atoi(c
->argv
[5]->ptr
);
5797 limit
= atoi(c
->argv
[6]->ptr
);
5798 if (offset
< 0) offset
= 0;
5801 /* Ok, lookup the key and get the range */
5802 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
5804 addReply(c
,justcount
? shared
.czero
: shared
.nullmultibulk
);
5806 if (o
->type
!= REDIS_ZSET
) {
5807 addReply(c
,shared
.wrongtypeerr
);
5809 zset
*zsetobj
= o
->ptr
;
5810 zskiplist
*zsl
= zsetobj
->zsl
;
5812 robj
*ele
, *lenobj
= NULL
;
5813 unsigned long rangelen
= 0;
5815 /* Get the first node with the score >= min, or with
5816 * score > min if 'minex' is true. */
5817 ln
= zslFirstWithScore(zsl
,min
);
5818 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
5821 /* No element matching the speciifed interval */
5822 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
5826 /* We don't know in advance how many matching elements there
5827 * are in the list, so we push this object that will represent
5828 * the multi-bulk length in the output buffer, and will "fix"
5831 lenobj
= createObject(REDIS_STRING
,NULL
);
5833 decrRefCount(lenobj
);
5836 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
5839 ln
= ln
->forward
[0];
5842 if (limit
== 0) break;
5845 addReplyBulk(c
,ele
);
5847 addReplyDouble(c
,ln
->score
);
5849 ln
= ln
->forward
[0];
5851 if (limit
> 0) limit
--;
5854 addReplyLong(c
,(long)rangelen
);
5856 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
5857 withscores
? (rangelen
*2) : rangelen
);
5863 static void zrangebyscoreCommand(redisClient
*c
) {
5864 genericZrangebyscoreCommand(c
,0);
5867 static void zcountCommand(redisClient
*c
) {
5868 genericZrangebyscoreCommand(c
,1);
5871 static void zcardCommand(redisClient
*c
) {
5875 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5876 checkType(c
,o
,REDIS_ZSET
)) return;
5879 addReplyUlong(c
,zs
->zsl
->length
);
5882 static void zscoreCommand(redisClient
*c
) {
5887 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5888 checkType(c
,o
,REDIS_ZSET
)) return;
5891 de
= dictFind(zs
->dict
,c
->argv
[2]);
5893 addReply(c
,shared
.nullbulk
);
5895 double *score
= dictGetEntryVal(de
);
5897 addReplyDouble(c
,*score
);
5901 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
5909 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5910 checkType(c
,o
,REDIS_ZSET
)) return;
5914 de
= dictFind(zs
->dict
,c
->argv
[2]);
5916 addReply(c
,shared
.nullbulk
);
5920 score
= dictGetEntryVal(de
);
5921 rank
= zslGetRank(zsl
, *score
, c
->argv
[2]);
5924 addReplyLong(c
, zsl
->length
- rank
);
5926 addReplyLong(c
, rank
-1);
5929 addReply(c
,shared
.nullbulk
);
5933 static void zrankCommand(redisClient
*c
) {
5934 zrankGenericCommand(c
, 0);
5937 static void zrevrankCommand(redisClient
*c
) {
5938 zrankGenericCommand(c
, 1);
5941 /* =================================== Hashes =============================== */
5942 static void hsetCommand(redisClient
*c
) {
5944 robj
*o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5947 o
= createHashObject();
5948 dictAdd(c
->db
->dict
,c
->argv
[1],o
);
5949 incrRefCount(c
->argv
[1]);
5951 if (o
->type
!= REDIS_HASH
) {
5952 addReply(c
,shared
.wrongtypeerr
);
5956 /* We want to convert the zipmap into an hash table right now if the
5957 * entry to be added is too big. Note that we check if the object
5958 * is integer encoded before to try fetching the length in the test below.
5959 * This is because integers are small, but currently stringObjectLen()
5960 * performs a slow conversion: not worth it. */
5961 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
&&
5962 ((c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
&&
5963 sdslen(c
->argv
[2]->ptr
) > server
.hash_max_zipmap_value
) ||
5964 (c
->argv
[3]->encoding
== REDIS_ENCODING_RAW
&&
5965 sdslen(c
->argv
[3]->ptr
) > server
.hash_max_zipmap_value
)))
5967 convertToRealHash(o
);
5970 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
5971 unsigned char *zm
= o
->ptr
;
5972 robj
*valobj
= getDecodedObject(c
->argv
[3]);
5974 zm
= zipmapSet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
5975 valobj
->ptr
,sdslen(valobj
->ptr
),&update
);
5976 decrRefCount(valobj
);
5979 /* And here there is the second check for hash conversion. */
5980 if (zipmapLen(zm
) > server
.hash_max_zipmap_entries
)
5981 convertToRealHash(o
);
5983 tryObjectEncoding(c
->argv
[2]);
5984 /* note that c->argv[3] is already encoded, as the latest arg
5985 * of a bulk command is always integer encoded if possible. */
5986 if (dictReplace(o
->ptr
,c
->argv
[2],c
->argv
[3])) {
5987 incrRefCount(c
->argv
[2]);
5991 incrRefCount(c
->argv
[3]);
5994 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",update
== 0));
5997 static void hincrbyCommand(redisClient
*c
) {
5998 long long value
= 0, incr
= 0;
5999 robj
*o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
6002 o
= createHashObject();
6003 dictAdd(c
->db
->dict
,c
->argv
[1],o
);
6004 incrRefCount(c
->argv
[1]);
6006 if (o
->type
!= REDIS_HASH
) {
6007 addReply(c
,shared
.wrongtypeerr
);
6012 incr
= strtoll(c
->argv
[3]->ptr
, NULL
, 10);
6013 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6014 unsigned char *zm
= o
->ptr
;
6015 unsigned char *zval
;
6018 /* Find value if already present in hash */
6019 if (zipmapGet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
6021 /* strtoll needs the char* to have a trailing \0, but
6022 * the zipmap doesn't include them. */
6023 sds szval
= sdsnewlen(zval
, zvlen
);
6024 value
= strtoll(szval
,NULL
,10);
6029 sds svalue
= sdscatprintf(sdsempty(),"%lld",value
);
6030 zm
= zipmapSet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
6031 (unsigned char*)svalue
,sdslen(svalue
),NULL
);
6035 /* Check if the zipmap needs to be converted. */
6036 if (zipmapLen(zm
) > server
.hash_max_zipmap_entries
)
6037 convertToRealHash(o
);
6042 /* Find value if already present in hash */
6043 de
= dictFind(o
->ptr
,c
->argv
[2]);
6045 hval
= dictGetEntryVal(de
);
6046 if (hval
->encoding
== REDIS_ENCODING_RAW
)
6047 value
= strtoll(hval
->ptr
,NULL
,10);
6048 else if (hval
->encoding
== REDIS_ENCODING_INT
)
6049 value
= (long)hval
->ptr
;
6051 redisAssert(1 != 1);
6055 hval
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
6056 tryObjectEncoding(hval
);
6057 if (dictReplace(o
->ptr
,c
->argv
[2],hval
)) {
6058 incrRefCount(c
->argv
[2]);
6063 addReplyLongLong(c
, value
);
6066 static void hgetCommand(redisClient
*c
) {
6069 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6070 checkType(c
,o
,REDIS_HASH
)) return;
6072 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6073 unsigned char *zm
= o
->ptr
;
6078 field
= getDecodedObject(c
->argv
[2]);
6079 if (zipmapGet(zm
,field
->ptr
,sdslen(field
->ptr
), &val
,&vlen
)) {
6080 addReplySds(c
,sdscatprintf(sdsempty(),"$%u\r\n", vlen
));
6081 addReplySds(c
,sdsnewlen(val
,vlen
));
6082 addReply(c
,shared
.crlf
);
6083 decrRefCount(field
);
6086 addReply(c
,shared
.nullbulk
);
6087 decrRefCount(field
);
6091 struct dictEntry
*de
;
6093 de
= dictFind(o
->ptr
,c
->argv
[2]);
6095 addReply(c
,shared
.nullbulk
);
6097 robj
*e
= dictGetEntryVal(de
);
6104 static void hdelCommand(redisClient
*c
) {
6108 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6109 checkType(c
,o
,REDIS_HASH
)) return;
6111 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6112 robj
*field
= getDecodedObject(c
->argv
[2]);
6114 o
->ptr
= zipmapDel((unsigned char*) o
->ptr
,
6115 (unsigned char*) field
->ptr
,
6116 sdslen(field
->ptr
), &deleted
);
6117 decrRefCount(field
);
6118 if (zipmapLen((unsigned char*) o
->ptr
) == 0)
6119 deleteKey(c
->db
,c
->argv
[1]);
6121 deleted
= dictDelete((dict
*)o
->ptr
,c
->argv
[2]) == DICT_OK
;
6122 if (htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
6123 if (dictSize((dict
*)o
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
6125 if (deleted
) server
.dirty
++;
6126 addReply(c
,deleted
? shared
.cone
: shared
.czero
);
6129 static void hlenCommand(redisClient
*c
) {
6133 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6134 checkType(c
,o
,REDIS_HASH
)) return;
6136 len
= (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
6137 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
6138 addReplyUlong(c
,len
);
6141 #define REDIS_GETALL_KEYS 1
6142 #define REDIS_GETALL_VALS 2
6143 static void genericHgetallCommand(redisClient
*c
, int flags
) {
6145 unsigned long count
= 0;
6147 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL
6148 || checkType(c
,o
,REDIS_HASH
)) return;
6150 lenobj
= createObject(REDIS_STRING
,NULL
);
6152 decrRefCount(lenobj
);
6154 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6155 unsigned char *p
= zipmapRewind(o
->ptr
);
6156 unsigned char *field
, *val
;
6157 unsigned int flen
, vlen
;
6159 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
6162 if (flags
& REDIS_GETALL_KEYS
) {
6163 aux
= createStringObject((char*)field
,flen
);
6164 addReplyBulk(c
,aux
);
6168 if (flags
& REDIS_GETALL_VALS
) {
6169 aux
= createStringObject((char*)val
,vlen
);
6170 addReplyBulk(c
,aux
);
6176 dictIterator
*di
= dictGetIterator(o
->ptr
);
6179 while((de
= dictNext(di
)) != NULL
) {
6180 robj
*fieldobj
= dictGetEntryKey(de
);
6181 robj
*valobj
= dictGetEntryVal(de
);
6183 if (flags
& REDIS_GETALL_KEYS
) {
6184 addReplyBulk(c
,fieldobj
);
6187 if (flags
& REDIS_GETALL_VALS
) {
6188 addReplyBulk(c
,valobj
);
6192 dictReleaseIterator(di
);
6194 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
6197 static void hkeysCommand(redisClient
*c
) {
6198 genericHgetallCommand(c
,REDIS_GETALL_KEYS
);
6201 static void hvalsCommand(redisClient
*c
) {
6202 genericHgetallCommand(c
,REDIS_GETALL_VALS
);
6205 static void hgetallCommand(redisClient
*c
) {
6206 genericHgetallCommand(c
,REDIS_GETALL_KEYS
|REDIS_GETALL_VALS
);
6209 static void hexistsCommand(redisClient
*c
) {
6213 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6214 checkType(c
,o
,REDIS_HASH
)) return;
6216 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6218 unsigned char *zm
= o
->ptr
;
6220 field
= getDecodedObject(c
->argv
[2]);
6221 exists
= zipmapExists(zm
,field
->ptr
,sdslen(field
->ptr
));
6222 decrRefCount(field
);
6224 exists
= dictFind(o
->ptr
,c
->argv
[2]) != NULL
;
6226 addReply(c
,exists
? shared
.cone
: shared
.czero
);
6229 static void convertToRealHash(robj
*o
) {
6230 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
6231 unsigned int klen
, vlen
;
6232 dict
*dict
= dictCreate(&hashDictType
,NULL
);
6234 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
6235 p
= zipmapRewind(zm
);
6236 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
6237 robj
*keyobj
, *valobj
;
6239 keyobj
= createStringObject((char*)key
,klen
);
6240 valobj
= createStringObject((char*)val
,vlen
);
6241 tryObjectEncoding(keyobj
);
6242 tryObjectEncoding(valobj
);
6243 dictAdd(dict
,keyobj
,valobj
);
6245 o
->encoding
= REDIS_ENCODING_HT
;
6250 /* ========================= Non type-specific commands ==================== */
6252 static void flushdbCommand(redisClient
*c
) {
6253 server
.dirty
+= dictSize(c
->db
->dict
);
6254 dictEmpty(c
->db
->dict
);
6255 dictEmpty(c
->db
->expires
);
6256 addReply(c
,shared
.ok
);
6259 static void flushallCommand(redisClient
*c
) {
6260 server
.dirty
+= emptyDb();
6261 addReply(c
,shared
.ok
);
6262 if (server
.bgsavechildpid
!= -1) {
6263 kill(server
.bgsavechildpid
,SIGKILL
);
6264 rdbRemoveTempFile(server
.bgsavechildpid
);
6266 rdbSave(server
.dbfilename
);
6270 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
6271 redisSortOperation
*so
= zmalloc(sizeof(*so
));
6273 so
->pattern
= pattern
;
6277 /* Return the value associated to the key with a name obtained
6278 * substituting the first occurence of '*' in 'pattern' with 'subst' */
6279 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
6283 int prefixlen
, sublen
, postfixlen
;
6284 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
6288 char buf
[REDIS_SORTKEY_MAX
+1];
6291 /* If the pattern is "#" return the substitution object itself in order
6292 * to implement the "SORT ... GET #" feature. */
6293 spat
= pattern
->ptr
;
6294 if (spat
[0] == '#' && spat
[1] == '\0') {
6298 /* The substitution object may be specially encoded. If so we create
6299 * a decoded object on the fly. Otherwise getDecodedObject will just
6300 * increment the ref count, that we'll decrement later. */
6301 subst
= getDecodedObject(subst
);
6304 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
6305 p
= strchr(spat
,'*');
6307 decrRefCount(subst
);
6312 sublen
= sdslen(ssub
);
6313 postfixlen
= sdslen(spat
)-(prefixlen
+1);
6314 memcpy(keyname
.buf
,spat
,prefixlen
);
6315 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
6316 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
6317 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
6318 keyname
.len
= prefixlen
+sublen
+postfixlen
;
6320 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2))
6321 decrRefCount(subst
);
6323 /* printf("lookup '%s' => %p\n", keyname.buf,de); */
6324 return lookupKeyRead(db
,&keyobj
);
6327 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
6328 * the additional parameter is not standard but a BSD-specific we have to
6329 * pass sorting parameters via the global 'server' structure */
6330 static int sortCompare(const void *s1
, const void *s2
) {
6331 const redisSortObject
*so1
= s1
, *so2
= s2
;
6334 if (!server
.sort_alpha
) {
6335 /* Numeric sorting. Here it's trivial as we precomputed scores */
6336 if (so1
->u
.score
> so2
->u
.score
) {
6338 } else if (so1
->u
.score
< so2
->u
.score
) {
6344 /* Alphanumeric sorting */
6345 if (server
.sort_bypattern
) {
6346 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
6347 /* At least one compare object is NULL */
6348 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
6350 else if (so1
->u
.cmpobj
== NULL
)
6355 /* We have both the objects, use strcoll */
6356 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
6359 /* Compare elements directly */
6362 dec1
= getDecodedObject(so1
->obj
);
6363 dec2
= getDecodedObject(so2
->obj
);
6364 cmp
= strcoll(dec1
->ptr
,dec2
->ptr
);
6369 return server
.sort_desc
? -cmp
: cmp
;
6372 /* The SORT command is the most complex command in Redis. Warning: this code
6373 * is optimized for speed and a bit less for readability */
6374 static void sortCommand(redisClient
*c
) {
6377 int desc
= 0, alpha
= 0;
6378 int limit_start
= 0, limit_count
= -1, start
, end
;
6379 int j
, dontsort
= 0, vectorlen
;
6380 int getop
= 0; /* GET operation counter */
6381 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
6382 redisSortObject
*vector
; /* Resulting vector to sort */
6384 /* Lookup the key to sort. It must be of the right types */
6385 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
6386 if (sortval
== NULL
) {
6387 addReply(c
,shared
.nullmultibulk
);
6390 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
6391 sortval
->type
!= REDIS_ZSET
)
6393 addReply(c
,shared
.wrongtypeerr
);
6397 /* Create a list of operations to perform for every sorted element.
6398 * Operations can be GET/DEL/INCR/DECR */
6399 operations
= listCreate();
6400 listSetFreeMethod(operations
,zfree
);
6403 /* Now we need to protect sortval incrementing its count, in the future
6404 * SORT may have options able to overwrite/delete keys during the sorting
6405 * and the sorted key itself may get destroied */
6406 incrRefCount(sortval
);
6408 /* The SORT command has an SQL-alike syntax, parse it */
6409 while(j
< c
->argc
) {
6410 int leftargs
= c
->argc
-j
-1;
6411 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
6413 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
6415 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
6417 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
6418 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
6419 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
6421 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
6422 storekey
= c
->argv
[j
+1];
6424 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
6425 sortby
= c
->argv
[j
+1];
6426 /* If the BY pattern does not contain '*', i.e. it is constant,
6427 * we don't need to sort nor to lookup the weight keys. */
6428 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
6430 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
6431 listAddNodeTail(operations
,createSortOperation(
6432 REDIS_SORT_GET
,c
->argv
[j
+1]));
6436 decrRefCount(sortval
);
6437 listRelease(operations
);
6438 addReply(c
,shared
.syntaxerr
);
6444 /* Load the sorting vector with all the objects to sort */
6445 switch(sortval
->type
) {
6446 case REDIS_LIST
: vectorlen
= listLength((list
*)sortval
->ptr
); break;
6447 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
6448 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
6449 default: vectorlen
= 0; redisAssert(0); /* Avoid GCC warning */
6451 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
6454 if (sortval
->type
== REDIS_LIST
) {
6455 list
*list
= sortval
->ptr
;
6459 listRewind(list
,&li
);
6460 while((ln
= listNext(&li
))) {
6461 robj
*ele
= ln
->value
;
6462 vector
[j
].obj
= ele
;
6463 vector
[j
].u
.score
= 0;
6464 vector
[j
].u
.cmpobj
= NULL
;
6472 if (sortval
->type
== REDIS_SET
) {
6475 zset
*zs
= sortval
->ptr
;
6479 di
= dictGetIterator(set
);
6480 while((setele
= dictNext(di
)) != NULL
) {
6481 vector
[j
].obj
= dictGetEntryKey(setele
);
6482 vector
[j
].u
.score
= 0;
6483 vector
[j
].u
.cmpobj
= NULL
;
6486 dictReleaseIterator(di
);
6488 redisAssert(j
== vectorlen
);
6490 /* Now it's time to load the right scores in the sorting vector */
6491 if (dontsort
== 0) {
6492 for (j
= 0; j
< vectorlen
; j
++) {
6496 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
6497 if (!byval
|| byval
->type
!= REDIS_STRING
) continue;
6499 vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
6501 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
6502 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
6504 /* Don't need to decode the object if it's
6505 * integer-encoded (the only encoding supported) so
6506 * far. We can just cast it */
6507 if (byval
->encoding
== REDIS_ENCODING_INT
) {
6508 vector
[j
].u
.score
= (long)byval
->ptr
;
6510 redisAssert(1 != 1);
6515 if (vector
[j
].obj
->encoding
== REDIS_ENCODING_RAW
)
6516 vector
[j
].u
.score
= strtod(vector
[j
].obj
->ptr
,NULL
);
6518 if (vector
[j
].obj
->encoding
== REDIS_ENCODING_INT
)
6519 vector
[j
].u
.score
= (long) vector
[j
].obj
->ptr
;
6521 redisAssert(1 != 1);
6528 /* We are ready to sort the vector... perform a bit of sanity check
6529 * on the LIMIT option too. We'll use a partial version of quicksort. */
6530 start
= (limit_start
< 0) ? 0 : limit_start
;
6531 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
6532 if (start
>= vectorlen
) {
6533 start
= vectorlen
-1;
6536 if (end
>= vectorlen
) end
= vectorlen
-1;
6538 if (dontsort
== 0) {
6539 server
.sort_desc
= desc
;
6540 server
.sort_alpha
= alpha
;
6541 server
.sort_bypattern
= sortby
? 1 : 0;
6542 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
6543 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
6545 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
6548 /* Send command output to the output buffer, performing the specified
6549 * GET/DEL/INCR/DECR operations if any. */
6550 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
6551 if (storekey
== NULL
) {
6552 /* STORE option not specified, sent the sorting result to client */
6553 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
6554 for (j
= start
; j
<= end
; j
++) {
6558 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
6559 listRewind(operations
,&li
);
6560 while((ln
= listNext(&li
))) {
6561 redisSortOperation
*sop
= ln
->value
;
6562 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6565 if (sop
->type
== REDIS_SORT_GET
) {
6566 if (!val
|| val
->type
!= REDIS_STRING
) {
6567 addReply(c
,shared
.nullbulk
);
6569 addReplyBulk(c
,val
);
6572 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6577 robj
*listObject
= createListObject();
6578 list
*listPtr
= (list
*) listObject
->ptr
;
6580 /* STORE option specified, set the sorting result as a List object */
6581 for (j
= start
; j
<= end
; j
++) {
6586 listAddNodeTail(listPtr
,vector
[j
].obj
);
6587 incrRefCount(vector
[j
].obj
);
6589 listRewind(operations
,&li
);
6590 while((ln
= listNext(&li
))) {
6591 redisSortOperation
*sop
= ln
->value
;
6592 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6595 if (sop
->type
== REDIS_SORT_GET
) {
6596 if (!val
|| val
->type
!= REDIS_STRING
) {
6597 listAddNodeTail(listPtr
,createStringObject("",0));
6599 listAddNodeTail(listPtr
,val
);
6603 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6607 if (dictReplace(c
->db
->dict
,storekey
,listObject
)) {
6608 incrRefCount(storekey
);
6610 /* Note: we add 1 because the DB is dirty anyway since even if the
6611 * SORT result is empty a new key is set and maybe the old content
6613 server
.dirty
+= 1+outputlen
;
6614 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
6618 decrRefCount(sortval
);
6619 listRelease(operations
);
6620 for (j
= 0; j
< vectorlen
; j
++) {
6621 if (sortby
&& alpha
&& vector
[j
].u
.cmpobj
)
6622 decrRefCount(vector
[j
].u
.cmpobj
);
6627 /* Convert an amount of bytes into a human readable string in the form
6628 * of 100B, 2G, 100M, 4K, and so forth. */
6629 static void bytesToHuman(char *s
, unsigned long long n
) {
6634 sprintf(s
,"%lluB",n
);
6636 } else if (n
< (1024*1024)) {
6637 d
= (double)n
/(1024);
6638 sprintf(s
,"%.2fK",d
);
6639 } else if (n
< (1024LL*1024*1024)) {
6640 d
= (double)n
/(1024*1024);
6641 sprintf(s
,"%.2fM",d
);
6642 } else if (n
< (1024LL*1024*1024*1024)) {
6643 d
= (double)n
/(1024LL*1024*1024);
6644 sprintf(s
,"%.2fG",d
);
6648 /* Create the string returned by the INFO command. This is decoupled
6649 * by the INFO command itself as we need to report the same information
6650 * on memory corruption problems. */
6651 static sds
genRedisInfoString(void) {
6653 time_t uptime
= time(NULL
)-server
.stat_starttime
;
6657 bytesToHuman(hmem
,zmalloc_used_memory());
6658 info
= sdscatprintf(sdsempty(),
6659 "redis_version:%s\r\n"
6661 "multiplexing_api:%s\r\n"
6662 "process_id:%ld\r\n"
6663 "uptime_in_seconds:%ld\r\n"
6664 "uptime_in_days:%ld\r\n"
6665 "connected_clients:%d\r\n"
6666 "connected_slaves:%d\r\n"
6667 "blocked_clients:%d\r\n"
6668 "used_memory:%zu\r\n"
6669 "used_memory_human:%s\r\n"
6670 "changes_since_last_save:%lld\r\n"
6671 "bgsave_in_progress:%d\r\n"
6672 "last_save_time:%ld\r\n"
6673 "bgrewriteaof_in_progress:%d\r\n"
6674 "total_connections_received:%lld\r\n"
6675 "total_commands_processed:%lld\r\n"
6676 "expired_keys:%lld\r\n"
6677 "hash_max_zipmap_entries:%ld\r\n"
6678 "hash_max_zipmap_value:%ld\r\n"
6679 "pubsub_channels:%ld\r\n"
6680 "pubsub_patterns:%u\r\n"
6684 (sizeof(long) == 8) ? "64" : "32",
6689 listLength(server
.clients
)-listLength(server
.slaves
),
6690 listLength(server
.slaves
),
6691 server
.blpop_blocked_clients
,
6692 zmalloc_used_memory(),
6695 server
.bgsavechildpid
!= -1,
6697 server
.bgrewritechildpid
!= -1,
6698 server
.stat_numconnections
,
6699 server
.stat_numcommands
,
6700 server
.stat_expiredkeys
,
6701 server
.hash_max_zipmap_entries
,
6702 server
.hash_max_zipmap_value
,
6703 dictSize(server
.pubsub_channels
),
6704 listLength(server
.pubsub_patterns
),
6705 server
.vm_enabled
!= 0,
6706 server
.masterhost
== NULL
? "master" : "slave"
6708 if (server
.masterhost
) {
6709 info
= sdscatprintf(info
,
6710 "master_host:%s\r\n"
6711 "master_port:%d\r\n"
6712 "master_link_status:%s\r\n"
6713 "master_last_io_seconds_ago:%d\r\n"
6716 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
6718 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
6721 if (server
.vm_enabled
) {
6723 info
= sdscatprintf(info
,
6724 "vm_conf_max_memory:%llu\r\n"
6725 "vm_conf_page_size:%llu\r\n"
6726 "vm_conf_pages:%llu\r\n"
6727 "vm_stats_used_pages:%llu\r\n"
6728 "vm_stats_swapped_objects:%llu\r\n"
6729 "vm_stats_swappin_count:%llu\r\n"
6730 "vm_stats_swappout_count:%llu\r\n"
6731 "vm_stats_io_newjobs_len:%lu\r\n"
6732 "vm_stats_io_processing_len:%lu\r\n"
6733 "vm_stats_io_processed_len:%lu\r\n"
6734 "vm_stats_io_active_threads:%lu\r\n"
6735 "vm_stats_blocked_clients:%lu\r\n"
6736 ,(unsigned long long) server
.vm_max_memory
,
6737 (unsigned long long) server
.vm_page_size
,
6738 (unsigned long long) server
.vm_pages
,
6739 (unsigned long long) server
.vm_stats_used_pages
,
6740 (unsigned long long) server
.vm_stats_swapped_objects
,
6741 (unsigned long long) server
.vm_stats_swapins
,
6742 (unsigned long long) server
.vm_stats_swapouts
,
6743 (unsigned long) listLength(server
.io_newjobs
),
6744 (unsigned long) listLength(server
.io_processing
),
6745 (unsigned long) listLength(server
.io_processed
),
6746 (unsigned long) server
.io_active_threads
,
6747 (unsigned long) server
.vm_blocked_clients
6751 for (j
= 0; j
< server
.dbnum
; j
++) {
6752 long long keys
, vkeys
;
6754 keys
= dictSize(server
.db
[j
].dict
);
6755 vkeys
= dictSize(server
.db
[j
].expires
);
6756 if (keys
|| vkeys
) {
6757 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
6764 static void infoCommand(redisClient
*c
) {
6765 sds info
= genRedisInfoString();
6766 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
6767 (unsigned long)sdslen(info
)));
6768 addReplySds(c
,info
);
6769 addReply(c
,shared
.crlf
);
6772 static void monitorCommand(redisClient
*c
) {
6773 /* ignore MONITOR if aleady slave or in monitor mode */
6774 if (c
->flags
& REDIS_SLAVE
) return;
6776 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
6778 listAddNodeTail(server
.monitors
,c
);
6779 addReply(c
,shared
.ok
);
6782 /* ================================= Expire ================================= */
6783 static int removeExpire(redisDb
*db
, robj
*key
) {
6784 if (dictDelete(db
->expires
,key
) == DICT_OK
) {
6791 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
6792 if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) {
6800 /* Return the expire time of the specified key, or -1 if no expire
6801 * is associated with this key (i.e. the key is non volatile) */
6802 static time_t getExpire(redisDb
*db
, robj
*key
) {
6805 /* No expire? return ASAP */
6806 if (dictSize(db
->expires
) == 0 ||
6807 (de
= dictFind(db
->expires
,key
)) == NULL
) return -1;
6809 return (time_t) dictGetEntryVal(de
);
6812 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
6816 /* No expire? return ASAP */
6817 if (dictSize(db
->expires
) == 0 ||
6818 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
6820 /* Lookup the expire */
6821 when
= (time_t) dictGetEntryVal(de
);
6822 if (time(NULL
) <= when
) return 0;
6824 /* Delete the key */
6825 dictDelete(db
->expires
,key
);
6826 server
.stat_expiredkeys
++;
6827 return dictDelete(db
->dict
,key
) == DICT_OK
;
6830 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
6833 /* No expire? return ASAP */
6834 if (dictSize(db
->expires
) == 0 ||
6835 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
6837 /* Delete the key */
6839 server
.stat_expiredkeys
++;
6840 dictDelete(db
->expires
,key
);
6841 return dictDelete(db
->dict
,key
) == DICT_OK
;
6844 static void expireGenericCommand(redisClient
*c
, robj
*key
, time_t seconds
) {
6847 de
= dictFind(c
->db
->dict
,key
);
6849 addReply(c
,shared
.czero
);
6853 if (deleteKey(c
->db
,key
)) server
.dirty
++;
6854 addReply(c
, shared
.cone
);
6857 time_t when
= time(NULL
)+seconds
;
6858 if (setExpire(c
->db
,key
,when
)) {
6859 addReply(c
,shared
.cone
);
6862 addReply(c
,shared
.czero
);
6868 static void expireCommand(redisClient
*c
) {
6869 expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10));
6872 static void expireatCommand(redisClient
*c
) {
6873 expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10)-time(NULL
));
6876 static void ttlCommand(redisClient
*c
) {
6880 expire
= getExpire(c
->db
,c
->argv
[1]);
6882 ttl
= (int) (expire
-time(NULL
));
6883 if (ttl
< 0) ttl
= -1;
6885 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
6888 /* ================================ MULTI/EXEC ============================== */
6890 /* Client state initialization for MULTI/EXEC */
6891 static void initClientMultiState(redisClient
*c
) {
6892 c
->mstate
.commands
= NULL
;
6893 c
->mstate
.count
= 0;
6896 /* Release all the resources associated with MULTI/EXEC state */
6897 static void freeClientMultiState(redisClient
*c
) {
6900 for (j
= 0; j
< c
->mstate
.count
; j
++) {
6902 multiCmd
*mc
= c
->mstate
.commands
+j
;
6904 for (i
= 0; i
< mc
->argc
; i
++)
6905 decrRefCount(mc
->argv
[i
]);
6908 zfree(c
->mstate
.commands
);
6911 /* Add a new command into the MULTI commands queue */
6912 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
6916 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
6917 sizeof(multiCmd
)*(c
->mstate
.count
+1));
6918 mc
= c
->mstate
.commands
+c
->mstate
.count
;
6921 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
6922 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
6923 for (j
= 0; j
< c
->argc
; j
++)
6924 incrRefCount(mc
->argv
[j
]);
6928 static void multiCommand(redisClient
*c
) {
6929 c
->flags
|= REDIS_MULTI
;
6930 addReply(c
,shared
.ok
);
6933 static void discardCommand(redisClient
*c
) {
6934 if (!(c
->flags
& REDIS_MULTI
)) {
6935 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
6939 freeClientMultiState(c
);
6940 initClientMultiState(c
);
6941 c
->flags
&= (~REDIS_MULTI
);
6942 addReply(c
,shared
.ok
);
6945 static void execCommand(redisClient
*c
) {
6950 if (!(c
->flags
& REDIS_MULTI
)) {
6951 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
6955 orig_argv
= c
->argv
;
6956 orig_argc
= c
->argc
;
6957 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
6958 for (j
= 0; j
< c
->mstate
.count
; j
++) {
6959 c
->argc
= c
->mstate
.commands
[j
].argc
;
6960 c
->argv
= c
->mstate
.commands
[j
].argv
;
6961 call(c
,c
->mstate
.commands
[j
].cmd
);
6963 c
->argv
= orig_argv
;
6964 c
->argc
= orig_argc
;
6965 freeClientMultiState(c
);
6966 initClientMultiState(c
);
6967 c
->flags
&= (~REDIS_MULTI
);
6970 /* =========================== Blocking Operations ========================= */
6972 /* Currently Redis blocking operations support is limited to list POP ops,
6973 * so the current implementation is not fully generic, but it is also not
6974 * completely specific so it will not require a rewrite to support new
6975 * kind of blocking operations in the future.
6977 * Still it's important to note that list blocking operations can be already
6978 * used as a notification mechanism in order to implement other blocking
6979 * operations at application level, so there must be a very strong evidence
6980 * of usefulness and generality before new blocking operations are implemented.
6982 * This is how the current blocking POP works, we use BLPOP as example:
6983 * - If the user calls BLPOP and the key exists and contains a non empty list
6984 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
6985 * if there is not to block.
6986 * - If instead BLPOP is called and the key does not exists or the list is
6987 * empty we need to block. In order to do so we remove the notification for
6988 * new data to read in the client socket (so that we'll not serve new
6989 * requests if the blocking request is not served). Also we put the client
6990 * in a dictionary (db->blockingkeys) mapping keys to a list of clients
6991 * blocking for this keys.
6992 * - If a PUSH operation against a key with blocked clients waiting is
6993 * performed, we serve the first in the list: basically instead to push
6994 * the new element inside the list we return it to the (first / oldest)
6995 * blocking client, unblock the client, and remove it form the list.
6997 * The above comment and the source code should be enough in order to understand
6998 * the implementation and modify / fix it later.
7001 /* Set a client in blocking mode for the specified key, with the specified
7003 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
7008 c
->blockingkeys
= zmalloc(sizeof(robj
*)*numkeys
);
7009 c
->blockingkeysnum
= numkeys
;
7010 c
->blockingto
= timeout
;
7011 for (j
= 0; j
< numkeys
; j
++) {
7012 /* Add the key in the client structure, to map clients -> keys */
7013 c
->blockingkeys
[j
] = keys
[j
];
7014 incrRefCount(keys
[j
]);
7016 /* And in the other "side", to map keys -> clients */
7017 de
= dictFind(c
->db
->blockingkeys
,keys
[j
]);
7021 /* For every key we take a list of clients blocked for it */
7023 retval
= dictAdd(c
->db
->blockingkeys
,keys
[j
],l
);
7024 incrRefCount(keys
[j
]);
7025 assert(retval
== DICT_OK
);
7027 l
= dictGetEntryVal(de
);
7029 listAddNodeTail(l
,c
);
7031 /* Mark the client as a blocked client */
7032 c
->flags
|= REDIS_BLOCKED
;
7033 server
.blpop_blocked_clients
++;
7036 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
7037 static void unblockClientWaitingData(redisClient
*c
) {
7042 assert(c
->blockingkeys
!= NULL
);
7043 /* The client may wait for multiple keys, so unblock it for every key. */
7044 for (j
= 0; j
< c
->blockingkeysnum
; j
++) {
7045 /* Remove this client from the list of clients waiting for this key. */
7046 de
= dictFind(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7048 l
= dictGetEntryVal(de
);
7049 listDelNode(l
,listSearchKey(l
,c
));
7050 /* If the list is empty we need to remove it to avoid wasting memory */
7051 if (listLength(l
) == 0)
7052 dictDelete(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7053 decrRefCount(c
->blockingkeys
[j
]);
7055 /* Cleanup the client structure */
7056 zfree(c
->blockingkeys
);
7057 c
->blockingkeys
= NULL
;
7058 c
->flags
&= (~REDIS_BLOCKED
);
7059 server
.blpop_blocked_clients
--;
7060 /* We want to process data if there is some command waiting
7061 * in the input buffer. Note that this is safe even if
7062 * unblockClientWaitingData() gets called from freeClient() because
7063 * freeClient() will be smart enough to call this function
7064 * *after* c->querybuf was set to NULL. */
7065 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
7068 /* This should be called from any function PUSHing into lists.
7069 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
7070 * 'ele' is the element pushed.
7072 * If the function returns 0 there was no client waiting for a list push
7075 * If the function returns 1 there was a client waiting for a list push
7076 * against this key, the element was passed to this client thus it's not
7077 * needed to actually add it to the list and the caller should return asap. */
7078 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
7079 struct dictEntry
*de
;
7080 redisClient
*receiver
;
7084 de
= dictFind(c
->db
->blockingkeys
,key
);
7085 if (de
== NULL
) return 0;
7086 l
= dictGetEntryVal(de
);
7089 receiver
= ln
->value
;
7091 addReplySds(receiver
,sdsnew("*2\r\n"));
7092 addReplyBulk(receiver
,key
);
7093 addReplyBulk(receiver
,ele
);
7094 unblockClientWaitingData(receiver
);
7098 /* Blocking RPOP/LPOP */
7099 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
7104 for (j
= 1; j
< c
->argc
-1; j
++) {
7105 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
7107 if (o
->type
!= REDIS_LIST
) {
7108 addReply(c
,shared
.wrongtypeerr
);
7111 list
*list
= o
->ptr
;
7112 if (listLength(list
) != 0) {
7113 /* If the list contains elements fall back to the usual
7114 * non-blocking POP operation */
7115 robj
*argv
[2], **orig_argv
;
7118 /* We need to alter the command arguments before to call
7119 * popGenericCommand() as the command takes a single key. */
7120 orig_argv
= c
->argv
;
7121 orig_argc
= c
->argc
;
7122 argv
[1] = c
->argv
[j
];
7126 /* Also the return value is different, we need to output
7127 * the multi bulk reply header and the key name. The
7128 * "real" command will add the last element (the value)
7129 * for us. If this souds like an hack to you it's just
7130 * because it is... */
7131 addReplySds(c
,sdsnew("*2\r\n"));
7132 addReplyBulk(c
,argv
[1]);
7133 popGenericCommand(c
,where
);
7135 /* Fix the client structure with the original stuff */
7136 c
->argv
= orig_argv
;
7137 c
->argc
= orig_argc
;
7143 /* If the list is empty or the key does not exists we must block */
7144 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
7145 if (timeout
> 0) timeout
+= time(NULL
);
7146 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
7149 static void blpopCommand(redisClient
*c
) {
7150 blockingPopGenericCommand(c
,REDIS_HEAD
);
7153 static void brpopCommand(redisClient
*c
) {
7154 blockingPopGenericCommand(c
,REDIS_TAIL
);
7157 /* =============================== Replication ============================= */
7159 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7160 ssize_t nwritten
, ret
= size
;
7161 time_t start
= time(NULL
);
7165 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
7166 nwritten
= write(fd
,ptr
,size
);
7167 if (nwritten
== -1) return -1;
7171 if ((time(NULL
)-start
) > timeout
) {
7179 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7180 ssize_t nread
, totread
= 0;
7181 time_t start
= time(NULL
);
7185 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
7186 nread
= read(fd
,ptr
,size
);
7187 if (nread
== -1) return -1;
7192 if ((time(NULL
)-start
) > timeout
) {
7200 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7207 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
7210 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
7221 static void syncCommand(redisClient
*c
) {
7222 /* ignore SYNC if aleady slave or in monitor mode */
7223 if (c
->flags
& REDIS_SLAVE
) return;
7225 /* SYNC can't be issued when the server has pending data to send to
7226 * the client about already issued commands. We need a fresh reply
7227 * buffer registering the differences between the BGSAVE and the current
7228 * dataset, so that we can copy to other slaves if needed. */
7229 if (listLength(c
->reply
) != 0) {
7230 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
7234 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
7235 /* Here we need to check if there is a background saving operation
7236 * in progress, or if it is required to start one */
7237 if (server
.bgsavechildpid
!= -1) {
7238 /* Ok a background save is in progress. Let's check if it is a good
7239 * one for replication, i.e. if there is another slave that is
7240 * registering differences since the server forked to save */
7245 listRewind(server
.slaves
,&li
);
7246 while((ln
= listNext(&li
))) {
7248 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
7251 /* Perfect, the server is already registering differences for
7252 * another slave. Set the right state, and copy the buffer. */
7253 listRelease(c
->reply
);
7254 c
->reply
= listDup(slave
->reply
);
7255 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7256 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
7258 /* No way, we need to wait for the next BGSAVE in order to
7259 * register differences */
7260 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7261 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
7264 /* Ok we don't have a BGSAVE in progress, let's start one */
7265 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
7266 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7267 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
7268 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
7271 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7274 c
->flags
|= REDIS_SLAVE
;
7276 listAddNodeTail(server
.slaves
,c
);
7280 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
7281 redisClient
*slave
= privdata
;
7283 REDIS_NOTUSED(mask
);
7284 char buf
[REDIS_IOBUF_LEN
];
7285 ssize_t nwritten
, buflen
;
7287 if (slave
->repldboff
== 0) {
7288 /* Write the bulk write count before to transfer the DB. In theory here
7289 * we don't know how much room there is in the output buffer of the
7290 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
7291 * operations) will never be smaller than the few bytes we need. */
7294 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
7296 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
7304 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
7305 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
7307 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
7308 (buflen
== 0) ? "premature EOF" : strerror(errno
));
7312 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
7313 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
7318 slave
->repldboff
+= nwritten
;
7319 if (slave
->repldboff
== slave
->repldbsize
) {
7320 close(slave
->repldbfd
);
7321 slave
->repldbfd
= -1;
7322 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7323 slave
->replstate
= REDIS_REPL_ONLINE
;
7324 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
7325 sendReplyToClient
, slave
) == AE_ERR
) {
7329 addReplySds(slave
,sdsempty());
7330 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
7334 /* This function is called at the end of every backgrond saving.
7335 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
7336 * otherwise REDIS_ERR is passed to the function.
7338 * The goal of this function is to handle slaves waiting for a successful
7339 * background saving in order to perform non-blocking synchronization. */
7340 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
7342 int startbgsave
= 0;
7345 listRewind(server
.slaves
,&li
);
7346 while((ln
= listNext(&li
))) {
7347 redisClient
*slave
= ln
->value
;
7349 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
7351 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7352 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
7353 struct redis_stat buf
;
7355 if (bgsaveerr
!= REDIS_OK
) {
7357 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
7360 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
7361 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
7363 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
7366 slave
->repldboff
= 0;
7367 slave
->repldbsize
= buf
.st_size
;
7368 slave
->replstate
= REDIS_REPL_SEND_BULK
;
7369 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7370 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
7377 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7380 listRewind(server
.slaves
,&li
);
7381 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
7382 while((ln
= listNext(&li
))) {
7383 redisClient
*slave
= ln
->value
;
7385 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
7392 static int syncWithMaster(void) {
7393 char buf
[1024], tmpfile
[256], authcmd
[1024];
7395 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
7396 int dfd
, maxtries
= 5;
7399 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
7404 /* AUTH with the master if required. */
7405 if(server
.masterauth
) {
7406 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
7407 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
7409 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
7413 /* Read the AUTH result. */
7414 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7416 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
7420 if (buf
[0] != '+') {
7422 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
7427 /* Issue the SYNC command */
7428 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
7430 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
7434 /* Read the bulk write count */
7435 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7437 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
7441 if (buf
[0] != '$') {
7443 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
7446 dumpsize
= strtol(buf
+1,NULL
,10);
7447 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
7448 /* Read the bulk write data on a temp file */
7450 snprintf(tmpfile
,256,
7451 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
7452 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
7453 if (dfd
!= -1) break;
7458 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
7462 int nread
, nwritten
;
7464 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
7466 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
7472 nwritten
= write(dfd
,buf
,nread
);
7473 if (nwritten
== -1) {
7474 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
7482 if (rename(tmpfile
,server
.dbfilename
) == -1) {
7483 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
7489 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
7490 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
7494 server
.master
= createClient(fd
);
7495 server
.master
->flags
|= REDIS_MASTER
;
7496 server
.master
->authenticated
= 1;
7497 server
.replstate
= REDIS_REPL_CONNECTED
;
7501 static void slaveofCommand(redisClient
*c
) {
7502 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
7503 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
7504 if (server
.masterhost
) {
7505 sdsfree(server
.masterhost
);
7506 server
.masterhost
= NULL
;
7507 if (server
.master
) freeClient(server
.master
);
7508 server
.replstate
= REDIS_REPL_NONE
;
7509 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
7512 sdsfree(server
.masterhost
);
7513 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
7514 server
.masterport
= atoi(c
->argv
[2]->ptr
);
7515 if (server
.master
) freeClient(server
.master
);
7516 server
.replstate
= REDIS_REPL_CONNECT
;
7517 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
7518 server
.masterhost
, server
.masterport
);
7520 addReply(c
,shared
.ok
);
7523 /* ============================ Maxmemory directive ======================== */
7525 /* Try to free one object form the pre-allocated objects free list.
7526 * This is useful under low mem conditions as by default we take 1 million
7527 * free objects allocated. On success REDIS_OK is returned, otherwise
7529 static int tryFreeOneObjectFromFreelist(void) {
7532 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
7533 if (listLength(server
.objfreelist
)) {
7534 listNode
*head
= listFirst(server
.objfreelist
);
7535 o
= listNodeValue(head
);
7536 listDelNode(server
.objfreelist
,head
);
7537 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7541 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7546 /* This function gets called when 'maxmemory' is set on the config file to limit
7547 * the max memory used by the server, and we are out of memory.
7548 * This function will try to, in order:
7550 * - Free objects from the free list
7551 * - Try to remove keys with an EXPIRE set
7553 * It is not possible to free enough memory to reach used-memory < maxmemory
7554 * the server will start refusing commands that will enlarge even more the
7557 static void freeMemoryIfNeeded(void) {
7558 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
7559 int j
, k
, freed
= 0;
7561 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
7562 for (j
= 0; j
< server
.dbnum
; j
++) {
7564 robj
*minkey
= NULL
;
7565 struct dictEntry
*de
;
7567 if (dictSize(server
.db
[j
].expires
)) {
7569 /* From a sample of three keys drop the one nearest to
7570 * the natural expire */
7571 for (k
= 0; k
< 3; k
++) {
7574 de
= dictGetRandomKey(server
.db
[j
].expires
);
7575 t
= (time_t) dictGetEntryVal(de
);
7576 if (minttl
== -1 || t
< minttl
) {
7577 minkey
= dictGetEntryKey(de
);
7581 deleteKey(server
.db
+j
,minkey
);
7584 if (!freed
) return; /* nothing to free... */
7588 /* ============================== Append Only file ========================== */
7590 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
7591 sds buf
= sdsempty();
7597 /* The DB this command was targetting is not the same as the last command
7598 * we appendend. To issue a SELECT command is needed. */
7599 if (dictid
!= server
.appendseldb
) {
7602 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
7603 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
7604 (unsigned long)strlen(seldb
),seldb
);
7605 server
.appendseldb
= dictid
;
7608 /* "Fix" the argv vector if the command is EXPIRE. We want to translate
7609 * EXPIREs into EXPIREATs calls */
7610 if (cmd
->proc
== expireCommand
) {
7613 tmpargv
[0] = createStringObject("EXPIREAT",8);
7614 tmpargv
[1] = argv
[1];
7615 incrRefCount(argv
[1]);
7616 when
= time(NULL
)+strtol(argv
[2]->ptr
,NULL
,10);
7617 tmpargv
[2] = createObject(REDIS_STRING
,
7618 sdscatprintf(sdsempty(),"%ld",when
));
7622 /* Append the actual command */
7623 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
7624 for (j
= 0; j
< argc
; j
++) {
7627 o
= getDecodedObject(o
);
7628 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
7629 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
7630 buf
= sdscatlen(buf
,"\r\n",2);
7634 /* Free the objects from the modified argv for EXPIREAT */
7635 if (cmd
->proc
== expireCommand
) {
7636 for (j
= 0; j
< 3; j
++)
7637 decrRefCount(argv
[j
]);
7640 /* We want to perform a single write. This should be guaranteed atomic
7641 * at least if the filesystem we are writing is a real physical one.
7642 * While this will save us against the server being killed I don't think
7643 * there is much to do about the whole server stopping for power problems
7645 nwritten
= write(server
.appendfd
,buf
,sdslen(buf
));
7646 if (nwritten
!= (signed)sdslen(buf
)) {
7647 /* Ooops, we are in troubles. The best thing to do for now is
7648 * to simply exit instead to give the illusion that everything is
7649 * working as expected. */
7650 if (nwritten
== -1) {
7651 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
7653 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
7657 /* If a background append only file rewriting is in progress we want to
7658 * accumulate the differences between the child DB and the current one
7659 * in a buffer, so that when the child process will do its work we
7660 * can append the differences to the new append only file. */
7661 if (server
.bgrewritechildpid
!= -1)
7662 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
7666 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
7667 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
7668 now
-server
.lastfsync
> 1))
7670 fsync(server
.appendfd
); /* Let's try to get this data on the disk */
7671 server
.lastfsync
= now
;
7675 /* In Redis commands are always executed in the context of a client, so in
7676 * order to load the append only file we need to create a fake client. */
7677 static struct redisClient
*createFakeClient(void) {
7678 struct redisClient
*c
= zmalloc(sizeof(*c
));
7682 c
->querybuf
= sdsempty();
7686 /* We set the fake client as a slave waiting for the synchronization
7687 * so that Redis will not try to send replies to this client. */
7688 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7689 c
->reply
= listCreate();
7690 listSetFreeMethod(c
->reply
,decrRefCount
);
7691 listSetDupMethod(c
->reply
,dupClientReplyValue
);
7695 static void freeFakeClient(struct redisClient
*c
) {
7696 sdsfree(c
->querybuf
);
7697 listRelease(c
->reply
);
7701 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
7702 * error (the append only file is zero-length) REDIS_ERR is returned. On
7703 * fatal error an error message is logged and the program exists. */
7704 int loadAppendOnlyFile(char *filename
) {
7705 struct redisClient
*fakeClient
;
7706 FILE *fp
= fopen(filename
,"r");
7707 struct redis_stat sb
;
7708 unsigned long long loadedkeys
= 0;
7710 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
7714 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
7718 fakeClient
= createFakeClient();
7725 struct redisCommand
*cmd
;
7727 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
7733 if (buf
[0] != '*') goto fmterr
;
7735 argv
= zmalloc(sizeof(robj
*)*argc
);
7736 for (j
= 0; j
< argc
; j
++) {
7737 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
7738 if (buf
[0] != '$') goto fmterr
;
7739 len
= strtol(buf
+1,NULL
,10);
7740 argsds
= sdsnewlen(NULL
,len
);
7741 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
7742 argv
[j
] = createObject(REDIS_STRING
,argsds
);
7743 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
7746 /* Command lookup */
7747 cmd
= lookupCommand(argv
[0]->ptr
);
7749 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
7752 /* Try object encoding */
7753 if (cmd
->flags
& REDIS_CMD_BULK
)
7754 tryObjectEncoding(argv
[argc
-1]);
7755 /* Run the command in the context of a fake client */
7756 fakeClient
->argc
= argc
;
7757 fakeClient
->argv
= argv
;
7758 cmd
->proc(fakeClient
);
7759 /* Discard the reply objects list from the fake client */
7760 while(listLength(fakeClient
->reply
))
7761 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
7762 /* Clean up, ready for the next command */
7763 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
7765 /* Handle swapping while loading big datasets when VM is on */
7767 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
7768 while (zmalloc_used_memory() > server
.vm_max_memory
) {
7769 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
7774 freeFakeClient(fakeClient
);
7779 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
7781 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
7785 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
7789 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
7790 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
7794 /* Avoid the incr/decr ref count business if possible to help
7795 * copy-on-write (we are often in a child process when this function
7797 * Also makes sure that key objects don't get incrRefCount-ed when VM
7799 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
7800 obj
= getDecodedObject(obj
);
7803 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
));
7804 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
;
7805 if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0)
7807 if (fwrite("\r\n",2,1,fp
) == 0) goto err
;
7808 if (decrrc
) decrRefCount(obj
);
7811 if (decrrc
) decrRefCount(obj
);
7815 /* Write binary-safe string into a file in the bulkformat
7816 * $<count>\r\n<payload>\r\n */
7817 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
7820 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
);
7821 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7822 if (len
&& fwrite(s
,len
,1,fp
) == 0) return 0;
7823 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
7827 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
7828 static int fwriteBulkDouble(FILE *fp
, double d
) {
7829 char buf
[128], dbuf
[128];
7831 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
7832 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
7833 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7834 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
7838 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
7839 static int fwriteBulkLong(FILE *fp
, long l
) {
7840 char buf
[128], lbuf
[128];
7842 snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
);
7843 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2);
7844 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7845 if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0;
7849 /* Write a sequence of commands able to fully rebuild the dataset into
7850 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
7851 static int rewriteAppendOnlyFile(char *filename
) {
7852 dictIterator
*di
= NULL
;
7857 time_t now
= time(NULL
);
7859 /* Note that we have to use a different temp name here compared to the
7860 * one used by rewriteAppendOnlyFileBackground() function. */
7861 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
7862 fp
= fopen(tmpfile
,"w");
7864 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
7867 for (j
= 0; j
< server
.dbnum
; j
++) {
7868 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
7869 redisDb
*db
= server
.db
+j
;
7871 if (dictSize(d
) == 0) continue;
7872 di
= dictGetIterator(d
);
7878 /* SELECT the new DB */
7879 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
7880 if (fwriteBulkLong(fp
,j
) == 0) goto werr
;
7882 /* Iterate this DB writing every entry */
7883 while((de
= dictNext(di
)) != NULL
) {
7888 key
= dictGetEntryKey(de
);
7889 /* If the value for this key is swapped, load a preview in memory.
7890 * We use a "swapped" flag to remember if we need to free the
7891 * value object instead to just increment the ref count anyway
7892 * in order to avoid copy-on-write of pages if we are forked() */
7893 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
7894 key
->storage
== REDIS_VM_SWAPPING
) {
7895 o
= dictGetEntryVal(de
);
7898 o
= vmPreviewObject(key
);
7901 expiretime
= getExpire(db
,key
);
7903 /* Save the key and associated value */
7904 if (o
->type
== REDIS_STRING
) {
7905 /* Emit a SET command */
7906 char cmd
[]="*3\r\n$3\r\nSET\r\n";
7907 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7909 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7910 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
7911 } else if (o
->type
== REDIS_LIST
) {
7912 /* Emit the RPUSHes needed to rebuild the list */
7913 list
*list
= o
->ptr
;
7917 listRewind(list
,&li
);
7918 while((ln
= listNext(&li
))) {
7919 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
7920 robj
*eleobj
= listNodeValue(ln
);
7922 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7923 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7924 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
7926 } else if (o
->type
== REDIS_SET
) {
7927 /* Emit the SADDs needed to rebuild the set */
7929 dictIterator
*di
= dictGetIterator(set
);
7932 while((de
= dictNext(di
)) != NULL
) {
7933 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
7934 robj
*eleobj
= dictGetEntryKey(de
);
7936 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7937 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7938 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
7940 dictReleaseIterator(di
);
7941 } else if (o
->type
== REDIS_ZSET
) {
7942 /* Emit the ZADDs needed to rebuild the sorted set */
7944 dictIterator
*di
= dictGetIterator(zs
->dict
);
7947 while((de
= dictNext(di
)) != NULL
) {
7948 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
7949 robj
*eleobj
= dictGetEntryKey(de
);
7950 double *score
= dictGetEntryVal(de
);
7952 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7953 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7954 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
7955 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
7957 dictReleaseIterator(di
);
7958 } else if (o
->type
== REDIS_HASH
) {
7959 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
7961 /* Emit the HSETs needed to rebuild the hash */
7962 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
7963 unsigned char *p
= zipmapRewind(o
->ptr
);
7964 unsigned char *field
, *val
;
7965 unsigned int flen
, vlen
;
7967 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
7968 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7969 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7970 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
7972 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
7976 dictIterator
*di
= dictGetIterator(o
->ptr
);
7979 while((de
= dictNext(di
)) != NULL
) {
7980 robj
*field
= dictGetEntryKey(de
);
7981 robj
*val
= dictGetEntryVal(de
);
7983 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7984 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7985 if (fwriteBulkObject(fp
,field
) == -1) return -1;
7986 if (fwriteBulkObject(fp
,val
) == -1) return -1;
7988 dictReleaseIterator(di
);
7993 /* Save the expire time */
7994 if (expiretime
!= -1) {
7995 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
7996 /* If this key is already expired skip it */
7997 if (expiretime
< now
) continue;
7998 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7999 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8000 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
;
8002 if (swapped
) decrRefCount(o
);
8004 dictReleaseIterator(di
);
8007 /* Make sure data will not remain on the OS's output buffers */
8012 /* Use RENAME to make sure the DB file is changed atomically only
8013 * if the generate DB file is ok. */
8014 if (rename(tmpfile
,filename
) == -1) {
8015 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
8019 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
8025 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
8026 if (di
) dictReleaseIterator(di
);
8030 /* This is how rewriting of the append only file in background works:
8032 * 1) The user calls BGREWRITEAOF
8033 * 2) Redis calls this function, that forks():
8034 * 2a) the child rewrite the append only file in a temp file.
8035 * 2b) the parent accumulates differences in server.bgrewritebuf.
8036 * 3) When the child finished '2a' exists.
8037 * 4) The parent will trap the exit code, if it's OK, will append the
8038 * data accumulated into server.bgrewritebuf into the temp file, and
8039 * finally will rename(2) the temp file in the actual file name.
8040 * The the new file is reopened as the new append only file. Profit!
8042 static int rewriteAppendOnlyFileBackground(void) {
8045 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
8046 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
8047 if ((childpid
= fork()) == 0) {
8051 if (server
.vm_enabled
) vmReopenSwapFile();
8053 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
8054 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
8061 if (childpid
== -1) {
8062 redisLog(REDIS_WARNING
,
8063 "Can't rewrite append only file in background: fork: %s",
8067 redisLog(REDIS_NOTICE
,
8068 "Background append only file rewriting started by pid %d",childpid
);
8069 server
.bgrewritechildpid
= childpid
;
8070 updateDictResizePolicy();
8071 /* We set appendseldb to -1 in order to force the next call to the
8072 * feedAppendOnlyFile() to issue a SELECT command, so the differences
8073 * accumulated by the parent into server.bgrewritebuf will start
8074 * with a SELECT statement and it will be safe to merge. */
8075 server
.appendseldb
= -1;
8078 return REDIS_OK
; /* unreached */
8081 static void bgrewriteaofCommand(redisClient
*c
) {
8082 if (server
.bgrewritechildpid
!= -1) {
8083 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
8086 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
8087 char *status
= "+Background append only file rewriting started\r\n";
8088 addReplySds(c
,sdsnew(status
));
8090 addReply(c
,shared
.err
);
8094 static void aofRemoveTempFile(pid_t childpid
) {
8097 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
8101 /* Virtual Memory is composed mainly of two subsystems:
8102 * - Blocking Virutal Memory
8103 * - Threaded Virtual Memory I/O
8104 * The two parts are not fully decoupled, but functions are split among two
8105 * different sections of the source code (delimited by comments) in order to
8106 * make more clear what functionality is about the blocking VM and what about
8107 * the threaded (not blocking) VM.
8111 * Redis VM is a blocking VM (one that blocks reading swapped values from
8112 * disk into memory when a value swapped out is needed in memory) that is made
8113 * unblocking by trying to examine the command argument vector in order to
8114 * load in background values that will likely be needed in order to exec
8115 * the command. The command is executed only once all the relevant keys
8116 * are loaded into memory.
8118 * This basically is almost as simple of a blocking VM, but almost as parallel
8119 * as a fully non-blocking VM.
8122 /* =================== Virtual Memory - Blocking Side ====================== */
8124 /* substitute the first occurrence of '%p' with the process pid in the
8125 * swap file name. */
8126 static void expandVmSwapFilename(void) {
8127 char *p
= strstr(server
.vm_swap_file
,"%p");
8133 new = sdscat(new,server
.vm_swap_file
);
8134 new = sdscatprintf(new,"%ld",(long) getpid());
8135 new = sdscat(new,p
+2);
8136 zfree(server
.vm_swap_file
);
8137 server
.vm_swap_file
= new;
8140 static void vmInit(void) {
8145 if (server
.vm_max_threads
!= 0)
8146 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
8148 expandVmSwapFilename();
8149 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
8150 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
8151 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
8153 if (server
.vm_fp
== NULL
) {
8154 redisLog(REDIS_WARNING
,
8155 "Impossible to open the swap file: %s. Exiting.",
8159 server
.vm_fd
= fileno(server
.vm_fp
);
8160 server
.vm_next_page
= 0;
8161 server
.vm_near_pages
= 0;
8162 server
.vm_stats_used_pages
= 0;
8163 server
.vm_stats_swapped_objects
= 0;
8164 server
.vm_stats_swapouts
= 0;
8165 server
.vm_stats_swapins
= 0;
8166 totsize
= server
.vm_pages
*server
.vm_page_size
;
8167 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
8168 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
8169 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
8173 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
8175 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
8176 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
8177 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
8178 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
8180 /* Initialize threaded I/O (used by Virtual Memory) */
8181 server
.io_newjobs
= listCreate();
8182 server
.io_processing
= listCreate();
8183 server
.io_processed
= listCreate();
8184 server
.io_ready_clients
= listCreate();
8185 pthread_mutex_init(&server
.io_mutex
,NULL
);
8186 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
8187 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
8188 server
.io_active_threads
= 0;
8189 if (pipe(pipefds
) == -1) {
8190 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
8194 server
.io_ready_pipe_read
= pipefds
[0];
8195 server
.io_ready_pipe_write
= pipefds
[1];
8196 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
8197 /* LZF requires a lot of stack */
8198 pthread_attr_init(&server
.io_threads_attr
);
8199 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
8200 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
8201 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
8202 /* Listen for events in the threaded I/O pipe */
8203 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
8204 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
8205 oom("creating file event");
8208 /* Mark the page as used */
8209 static void vmMarkPageUsed(off_t page
) {
8210 off_t byte
= page
/8;
8212 redisAssert(vmFreePage(page
) == 1);
8213 server
.vm_bitmap
[byte
] |= 1<<bit
;
8216 /* Mark N contiguous pages as used, with 'page' being the first. */
8217 static void vmMarkPagesUsed(off_t page
, off_t count
) {
8220 for (j
= 0; j
< count
; j
++)
8221 vmMarkPageUsed(page
+j
);
8222 server
.vm_stats_used_pages
+= count
;
8223 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
8224 (long long)count
, (long long)page
);
8227 /* Mark the page as free */
8228 static void vmMarkPageFree(off_t page
) {
8229 off_t byte
= page
/8;
8231 redisAssert(vmFreePage(page
) == 0);
8232 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
8235 /* Mark N contiguous pages as free, with 'page' being the first. */
8236 static void vmMarkPagesFree(off_t page
, off_t count
) {
8239 for (j
= 0; j
< count
; j
++)
8240 vmMarkPageFree(page
+j
);
8241 server
.vm_stats_used_pages
-= count
;
8242 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
8243 (long long)count
, (long long)page
);
8246 /* Test if the page is free */
8247 static int vmFreePage(off_t page
) {
8248 off_t byte
= page
/8;
8250 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
8253 /* Find N contiguous free pages storing the first page of the cluster in *first.
8254 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
8255 * REDIS_ERR is returned.
8257 * This function uses a simple algorithm: we try to allocate
8258 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
8259 * again from the start of the swap file searching for free spaces.
8261 * If it looks pretty clear that there are no free pages near our offset
8262 * we try to find less populated places doing a forward jump of
8263 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
8264 * without hurry, and then we jump again and so forth...
8266 * This function can be improved using a free list to avoid to guess
8267 * too much, since we could collect data about freed pages.
8269 * note: I implemented this function just after watching an episode of
8270 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
8272 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
8273 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
8275 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
8276 server
.vm_near_pages
= 0;
8277 server
.vm_next_page
= 0;
8279 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
8280 base
= server
.vm_next_page
;
8282 while(offset
< server
.vm_pages
) {
8283 off_t
this = base
+offset
;
8285 /* If we overflow, restart from page zero */
8286 if (this >= server
.vm_pages
) {
8287 this -= server
.vm_pages
;
8289 /* Just overflowed, what we found on tail is no longer
8290 * interesting, as it's no longer contiguous. */
8294 if (vmFreePage(this)) {
8295 /* This is a free page */
8297 /* Already got N free pages? Return to the caller, with success */
8299 *first
= this-(n
-1);
8300 server
.vm_next_page
= this+1;
8301 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
8305 /* The current one is not a free page */
8309 /* Fast-forward if the current page is not free and we already
8310 * searched enough near this place. */
8312 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
8313 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
8315 /* Note that even if we rewind after the jump, we are don't need
8316 * to make sure numfree is set to zero as we only jump *if* it
8317 * is set to zero. */
8319 /* Otherwise just check the next page */
8326 /* Write the specified object at the specified page of the swap file */
8327 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
8328 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8329 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8330 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8331 redisLog(REDIS_WARNING
,
8332 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
8336 rdbSaveObject(server
.vm_fp
,o
);
8337 fflush(server
.vm_fp
);
8338 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8342 /* Swap the 'val' object relative to 'key' into disk. Store all the information
8343 * needed to later retrieve the object into the key object.
8344 * If we can't find enough contiguous empty pages to swap the object on disk
8345 * REDIS_ERR is returned. */
8346 static int vmSwapObjectBlocking(robj
*key
, robj
*val
) {
8347 off_t pages
= rdbSavedObjectPages(val
,NULL
);
8350 assert(key
->storage
== REDIS_VM_MEMORY
);
8351 assert(key
->refcount
== 1);
8352 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
;
8353 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
;
8354 key
->vm
.page
= page
;
8355 key
->vm
.usedpages
= pages
;
8356 key
->storage
= REDIS_VM_SWAPPED
;
8357 key
->vtype
= val
->type
;
8358 decrRefCount(val
); /* Deallocate the object from memory. */
8359 vmMarkPagesUsed(page
,pages
);
8360 redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)",
8361 (unsigned char*) key
->ptr
,
8362 (unsigned long long) page
, (unsigned long long) pages
);
8363 server
.vm_stats_swapped_objects
++;
8364 server
.vm_stats_swapouts
++;
8368 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
8371 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8372 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8373 redisLog(REDIS_WARNING
,
8374 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
8378 o
= rdbLoadObject(type
,server
.vm_fp
);
8380 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
8383 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8387 /* Load the value object relative to the 'key' object from swap to memory.
8388 * The newly allocated object is returned.
8390 * If preview is true the unserialized object is returned to the caller but
8391 * no changes are made to the key object, nor the pages are marked as freed */
8392 static robj
*vmGenericLoadObject(robj
*key
, int preview
) {
8395 redisAssert(key
->storage
== REDIS_VM_SWAPPED
|| key
->storage
== REDIS_VM_LOADING
);
8396 val
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
);
8398 key
->storage
= REDIS_VM_MEMORY
;
8399 key
->vm
.atime
= server
.unixtime
;
8400 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8401 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk",
8402 (unsigned char*) key
->ptr
);
8403 server
.vm_stats_swapped_objects
--;
8405 redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk",
8406 (unsigned char*) key
->ptr
);
8408 server
.vm_stats_swapins
++;
8412 /* Plain object loading, from swap to memory */
8413 static robj
*vmLoadObject(robj
*key
) {
8414 /* If we are loading the object in background, stop it, we
8415 * need to load this object synchronously ASAP. */
8416 if (key
->storage
== REDIS_VM_LOADING
)
8417 vmCancelThreadedIOJob(key
);
8418 return vmGenericLoadObject(key
,0);
8421 /* Just load the value on disk, without to modify the key.
8422 * This is useful when we want to perform some operation on the value
8423 * without to really bring it from swap to memory, like while saving the
8424 * dataset or rewriting the append only log. */
8425 static robj
*vmPreviewObject(robj
*key
) {
8426 return vmGenericLoadObject(key
,1);
8429 /* How a good candidate is this object for swapping?
8430 * The better candidate it is, the greater the returned value.
8432 * Currently we try to perform a fast estimation of the object size in
8433 * memory, and combine it with aging informations.
8435 * Basically swappability = idle-time * log(estimated size)
8437 * Bigger objects are preferred over smaller objects, but not
8438 * proportionally, this is why we use the logarithm. This algorithm is
8439 * just a first try and will probably be tuned later. */
8440 static double computeObjectSwappability(robj
*o
) {
8441 time_t age
= server
.unixtime
- o
->vm
.atime
;
8445 struct dictEntry
*de
;
8448 if (age
<= 0) return 0;
8451 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
8454 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
8459 listNode
*ln
= listFirst(l
);
8461 asize
= sizeof(list
);
8463 robj
*ele
= ln
->value
;
8466 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8467 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8469 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
8474 z
= (o
->type
== REDIS_ZSET
);
8475 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
8477 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8478 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
8483 de
= dictGetRandomKey(d
);
8484 ele
= dictGetEntryKey(de
);
8485 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8486 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8488 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8489 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
8493 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8494 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
8495 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
8496 unsigned int klen
, vlen
;
8497 unsigned char *key
, *val
;
8499 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
8503 asize
= len
*(klen
+vlen
+3);
8504 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
8506 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8511 de
= dictGetRandomKey(d
);
8512 ele
= dictGetEntryKey(de
);
8513 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8514 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8516 ele
= dictGetEntryVal(de
);
8517 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8518 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8520 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8525 return (double)age
*log(1+asize
);
8528 /* Try to swap an object that's a good candidate for swapping.
8529 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
8530 * to swap any object at all.
8532 * If 'usethreaded' is true, Redis will try to swap the object in background
8533 * using I/O threads. */
8534 static int vmSwapOneObject(int usethreads
) {
8536 struct dictEntry
*best
= NULL
;
8537 double best_swappability
= 0;
8538 redisDb
*best_db
= NULL
;
8541 for (j
= 0; j
< server
.dbnum
; j
++) {
8542 redisDb
*db
= server
.db
+j
;
8543 /* Why maxtries is set to 100?
8544 * Because this way (usually) we'll find 1 object even if just 1% - 2%
8545 * are swappable objects */
8548 if (dictSize(db
->dict
) == 0) continue;
8549 for (i
= 0; i
< 5; i
++) {
8551 double swappability
;
8553 if (maxtries
) maxtries
--;
8554 de
= dictGetRandomKey(db
->dict
);
8555 key
= dictGetEntryKey(de
);
8556 val
= dictGetEntryVal(de
);
8557 /* Only swap objects that are currently in memory.
8559 * Also don't swap shared objects if threaded VM is on, as we
8560 * try to ensure that the main thread does not touch the
8561 * object while the I/O thread is using it, but we can't
8562 * control other keys without adding additional mutex. */
8563 if (key
->storage
!= REDIS_VM_MEMORY
||
8564 (server
.vm_max_threads
!= 0 && val
->refcount
!= 1)) {
8565 if (maxtries
) i
--; /* don't count this try */
8568 swappability
= computeObjectSwappability(val
);
8569 if (!best
|| swappability
> best_swappability
) {
8571 best_swappability
= swappability
;
8576 if (best
== NULL
) return REDIS_ERR
;
8577 key
= dictGetEntryKey(best
);
8578 val
= dictGetEntryVal(best
);
8580 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
8581 key
->ptr
, best_swappability
);
8583 /* Unshare the key if needed */
8584 if (key
->refcount
> 1) {
8585 robj
*newkey
= dupStringObject(key
);
8587 key
= dictGetEntryKey(best
) = newkey
;
8591 vmSwapObjectThreaded(key
,val
,best_db
);
8594 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
8595 dictGetEntryVal(best
) = NULL
;
8603 static int vmSwapOneObjectBlocking() {
8604 return vmSwapOneObject(0);
8607 static int vmSwapOneObjectThreaded() {
8608 return vmSwapOneObject(1);
8611 /* Return true if it's safe to swap out objects in a given moment.
8612 * Basically we don't want to swap objects out while there is a BGSAVE
8613 * or a BGAEOREWRITE running in backgroud. */
8614 static int vmCanSwapOut(void) {
8615 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
8618 /* Delete a key if swapped. Returns 1 if the key was found, was swapped
8619 * and was deleted. Otherwise 0 is returned. */
8620 static int deleteIfSwapped(redisDb
*db
, robj
*key
) {
8624 if ((de
= dictFind(db
->dict
,key
)) == NULL
) return 0;
8625 foundkey
= dictGetEntryKey(de
);
8626 if (foundkey
->storage
== REDIS_VM_MEMORY
) return 0;
8631 /* =================== Virtual Memory - Threaded I/O ======================= */
8633 static void freeIOJob(iojob
*j
) {
8634 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
8635 j
->type
== REDIS_IOJOB_DO_SWAP
||
8636 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
8637 decrRefCount(j
->val
);
8638 /* We don't decrRefCount the j->key field as we did't incremented
8639 * the count creating IO Jobs. This is because the key field here is
8640 * just used as an indentifier and if a key is removed the Job should
8641 * never be touched again. */
8645 /* Every time a thread finished a Job, it writes a byte into the write side
8646 * of an unix pipe in order to "awake" the main thread, and this function
8648 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
8652 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
8654 REDIS_NOTUSED(mask
);
8655 REDIS_NOTUSED(privdata
);
8657 /* For every byte we read in the read side of the pipe, there is one
8658 * I/O job completed to process. */
8659 while((retval
= read(fd
,buf
,1)) == 1) {
8663 struct dictEntry
*de
;
8665 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
8667 /* Get the processed element (the oldest one) */
8669 assert(listLength(server
.io_processed
) != 0);
8670 if (toprocess
== -1) {
8671 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
8672 if (toprocess
<= 0) toprocess
= 1;
8674 ln
= listFirst(server
.io_processed
);
8676 listDelNode(server
.io_processed
,ln
);
8678 /* If this job is marked as canceled, just ignore it */
8683 /* Post process it in the main thread, as there are things we
8684 * can do just here to avoid race conditions and/or invasive locks */
8685 redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
);
8686 de
= dictFind(j
->db
->dict
,j
->key
);
8688 key
= dictGetEntryKey(de
);
8689 if (j
->type
== REDIS_IOJOB_LOAD
) {
8692 /* Key loaded, bring it at home */
8693 key
->storage
= REDIS_VM_MEMORY
;
8694 key
->vm
.atime
= server
.unixtime
;
8695 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8696 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
8697 (unsigned char*) key
->ptr
);
8698 server
.vm_stats_swapped_objects
--;
8699 server
.vm_stats_swapins
++;
8700 dictGetEntryVal(de
) = j
->val
;
8701 incrRefCount(j
->val
);
8704 /* Handle clients waiting for this key to be loaded. */
8705 handleClientsBlockedOnSwappedKey(db
,key
);
8706 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
8707 /* Now we know the amount of pages required to swap this object.
8708 * Let's find some space for it, and queue this task again
8709 * rebranded as REDIS_IOJOB_DO_SWAP. */
8710 if (!vmCanSwapOut() ||
8711 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
8713 /* Ooops... no space or we can't swap as there is
8714 * a fork()ed Redis trying to save stuff on disk. */
8716 key
->storage
= REDIS_VM_MEMORY
; /* undo operation */
8718 /* Note that we need to mark this pages as used now,
8719 * if the job will be canceled, we'll mark them as freed
8721 vmMarkPagesUsed(j
->page
,j
->pages
);
8722 j
->type
= REDIS_IOJOB_DO_SWAP
;
8727 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
8730 /* Key swapped. We can finally free some memory. */
8731 if (key
->storage
!= REDIS_VM_SWAPPING
) {
8732 printf("key->storage: %d\n",key
->storage
);
8733 printf("key->name: %s\n",(char*)key
->ptr
);
8734 printf("key->refcount: %d\n",key
->refcount
);
8735 printf("val: %p\n",(void*)j
->val
);
8736 printf("val->type: %d\n",j
->val
->type
);
8737 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
8739 redisAssert(key
->storage
== REDIS_VM_SWAPPING
);
8740 val
= dictGetEntryVal(de
);
8741 key
->vm
.page
= j
->page
;
8742 key
->vm
.usedpages
= j
->pages
;
8743 key
->storage
= REDIS_VM_SWAPPED
;
8744 key
->vtype
= j
->val
->type
;
8745 decrRefCount(val
); /* Deallocate the object from memory. */
8746 dictGetEntryVal(de
) = NULL
;
8747 redisLog(REDIS_DEBUG
,
8748 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
8749 (unsigned char*) key
->ptr
,
8750 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
8751 server
.vm_stats_swapped_objects
++;
8752 server
.vm_stats_swapouts
++;
8754 /* Put a few more swap requests in queue if we are still
8756 if (trytoswap
&& vmCanSwapOut() &&
8757 zmalloc_used_memory() > server
.vm_max_memory
)
8762 more
= listLength(server
.io_newjobs
) <
8763 (unsigned) server
.vm_max_threads
;
8765 /* Don't waste CPU time if swappable objects are rare. */
8766 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
8774 if (processed
== toprocess
) return;
8776 if (retval
< 0 && errno
!= EAGAIN
) {
8777 redisLog(REDIS_WARNING
,
8778 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
8783 static void lockThreadedIO(void) {
8784 pthread_mutex_lock(&server
.io_mutex
);
8787 static void unlockThreadedIO(void) {
8788 pthread_mutex_unlock(&server
.io_mutex
);
8791 /* Remove the specified object from the threaded I/O queue if still not
8792 * processed, otherwise make sure to flag it as canceled. */
8793 static void vmCancelThreadedIOJob(robj
*o
) {
8795 server
.io_newjobs
, /* 0 */
8796 server
.io_processing
, /* 1 */
8797 server
.io_processed
/* 2 */
8801 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
8804 /* Search for a matching key in one of the queues */
8805 for (i
= 0; i
< 3; i
++) {
8809 listRewind(lists
[i
],&li
);
8810 while ((ln
= listNext(&li
)) != NULL
) {
8811 iojob
*job
= ln
->value
;
8813 if (job
->canceled
) continue; /* Skip this, already canceled. */
8814 if (job
->key
== o
) {
8815 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n",
8816 (void*)job
, (char*)o
->ptr
, job
->type
, i
);
8817 /* Mark the pages as free since the swap didn't happened
8818 * or happened but is now discarded. */
8819 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
8820 vmMarkPagesFree(job
->page
,job
->pages
);
8821 /* Cancel the job. It depends on the list the job is
8824 case 0: /* io_newjobs */
8825 /* If the job was yet not processed the best thing to do
8826 * is to remove it from the queue at all */
8828 listDelNode(lists
[i
],ln
);
8830 case 1: /* io_processing */
8831 /* Oh Shi- the thread is messing with the Job:
8833 * Probably it's accessing the object if this is a
8834 * PREPARE_SWAP or DO_SWAP job.
8835 * If it's a LOAD job it may be reading from disk and
8836 * if we don't wait for the job to terminate before to
8837 * cancel it, maybe in a few microseconds data can be
8838 * corrupted in this pages. So the short story is:
8840 * Better to wait for the job to move into the
8841 * next queue (processed)... */
8843 /* We try again and again until the job is completed. */
8845 /* But let's wait some time for the I/O thread
8846 * to finish with this job. After all this condition
8847 * should be very rare. */
8850 case 2: /* io_processed */
8851 /* The job was already processed, that's easy...
8852 * just mark it as canceled so that we'll ignore it
8853 * when processing completed jobs. */
8857 /* Finally we have to adjust the storage type of the object
8858 * in order to "UNDO" the operaiton. */
8859 if (o
->storage
== REDIS_VM_LOADING
)
8860 o
->storage
= REDIS_VM_SWAPPED
;
8861 else if (o
->storage
== REDIS_VM_SWAPPING
)
8862 o
->storage
= REDIS_VM_MEMORY
;
8869 assert(1 != 1); /* We should never reach this */
8872 static void *IOThreadEntryPoint(void *arg
) {
8877 pthread_detach(pthread_self());
8879 /* Get a new job to process */
8881 if (listLength(server
.io_newjobs
) == 0) {
8882 /* No new jobs in queue, exit. */
8883 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
8884 (long) pthread_self());
8885 server
.io_active_threads
--;
8889 ln
= listFirst(server
.io_newjobs
);
8891 listDelNode(server
.io_newjobs
,ln
);
8892 /* Add the job in the processing queue */
8893 j
->thread
= pthread_self();
8894 listAddNodeTail(server
.io_processing
,j
);
8895 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
8897 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
8898 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
8900 /* Process the Job */
8901 if (j
->type
== REDIS_IOJOB_LOAD
) {
8902 j
->val
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
);
8903 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
8904 FILE *fp
= fopen("/dev/null","w+");
8905 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
8907 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
8908 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
8912 /* Done: insert the job into the processed queue */
8913 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
8914 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
8916 listDelNode(server
.io_processing
,ln
);
8917 listAddNodeTail(server
.io_processed
,j
);
8920 /* Signal the main thread there is new stuff to process */
8921 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
8923 return NULL
; /* never reached */
8926 static void spawnIOThread(void) {
8928 sigset_t mask
, omask
;
8932 sigaddset(&mask
,SIGCHLD
);
8933 sigaddset(&mask
,SIGHUP
);
8934 sigaddset(&mask
,SIGPIPE
);
8935 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
8936 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
8937 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
8941 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
8942 server
.io_active_threads
++;
8945 /* We need to wait for the last thread to exit before we are able to
8946 * fork() in order to BGSAVE or BGREWRITEAOF. */
8947 static void waitEmptyIOJobsQueue(void) {
8949 int io_processed_len
;
8952 if (listLength(server
.io_newjobs
) == 0 &&
8953 listLength(server
.io_processing
) == 0 &&
8954 server
.io_active_threads
== 0)
8959 /* While waiting for empty jobs queue condition we post-process some
8960 * finshed job, as I/O threads may be hanging trying to write against
8961 * the io_ready_pipe_write FD but there are so much pending jobs that
8963 io_processed_len
= listLength(server
.io_processed
);
8965 if (io_processed_len
) {
8966 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
8967 usleep(1000); /* 1 millisecond */
8969 usleep(10000); /* 10 milliseconds */
8974 static void vmReopenSwapFile(void) {
8975 /* Note: we don't close the old one as we are in the child process
8976 * and don't want to mess at all with the original file object. */
8977 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
8978 if (server
.vm_fp
== NULL
) {
8979 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
8980 server
.vm_swap_file
);
8983 server
.vm_fd
= fileno(server
.vm_fp
);
8986 /* This function must be called while with threaded IO locked */
8987 static void queueIOJob(iojob
*j
) {
8988 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
8989 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
8990 listAddNodeTail(server
.io_newjobs
,j
);
8991 if (server
.io_active_threads
< server
.vm_max_threads
)
8995 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
8998 assert(key
->storage
== REDIS_VM_MEMORY
);
8999 assert(key
->refcount
== 1);
9001 j
= zmalloc(sizeof(*j
));
9002 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
9008 j
->thread
= (pthread_t
) -1;
9009 key
->storage
= REDIS_VM_SWAPPING
;
9017 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
9019 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
9020 * If there is not already a job loading the key, it is craeted.
9021 * The key is added to the io_keys list in the client structure, and also
9022 * in the hash table mapping swapped keys to waiting clients, that is,
9023 * server.io_waited_keys. */
9024 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
9025 struct dictEntry
*de
;
9029 /* If the key does not exist or is already in RAM we don't need to
9030 * block the client at all. */
9031 de
= dictFind(c
->db
->dict
,key
);
9032 if (de
== NULL
) return 0;
9033 o
= dictGetEntryKey(de
);
9034 if (o
->storage
== REDIS_VM_MEMORY
) {
9036 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
9037 /* We were swapping the key, undo it! */
9038 vmCancelThreadedIOJob(o
);
9042 /* OK: the key is either swapped, or being loaded just now. */
9044 /* Add the key to the list of keys this client is waiting for.
9045 * This maps clients to keys they are waiting for. */
9046 listAddNodeTail(c
->io_keys
,key
);
9049 /* Add the client to the swapped keys => clients waiting map. */
9050 de
= dictFind(c
->db
->io_keys
,key
);
9054 /* For every key we take a list of clients blocked for it */
9056 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
9058 assert(retval
== DICT_OK
);
9060 l
= dictGetEntryVal(de
);
9062 listAddNodeTail(l
,c
);
9064 /* Are we already loading the key from disk? If not create a job */
9065 if (o
->storage
== REDIS_VM_SWAPPED
) {
9068 o
->storage
= REDIS_VM_LOADING
;
9069 j
= zmalloc(sizeof(*j
));
9070 j
->type
= REDIS_IOJOB_LOAD
;
9073 j
->key
->vtype
= o
->vtype
;
9074 j
->page
= o
->vm
.page
;
9077 j
->thread
= (pthread_t
) -1;
9085 /* Preload keys needed for the ZUNION and ZINTER commands. */
9086 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
) {
9088 num
= atoi(c
->argv
[2]->ptr
);
9089 for (i
= 0; i
< num
; i
++) {
9090 waitForSwappedKey(c
,c
->argv
[3+i
]);
9094 /* Is this client attempting to run a command against swapped keys?
9095 * If so, block it ASAP, load the keys in background, then resume it.
9097 * The important idea about this function is that it can fail! If keys will
9098 * still be swapped when the client is resumed, this key lookups will
9099 * just block loading keys from disk. In practical terms this should only
9100 * happen with SORT BY command or if there is a bug in this function.
9102 * Return 1 if the client is marked as blocked, 0 if the client can
9103 * continue as the keys it is going to access appear to be in memory. */
9104 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
) {
9107 if (cmd
->vm_preload_proc
!= NULL
) {
9108 cmd
->vm_preload_proc(c
);
9110 if (cmd
->vm_firstkey
== 0) return 0;
9111 last
= cmd
->vm_lastkey
;
9112 if (last
< 0) last
= c
->argc
+last
;
9113 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
)
9114 waitForSwappedKey(c
,c
->argv
[j
]);
9117 /* If the client was blocked for at least one key, mark it as blocked. */
9118 if (listLength(c
->io_keys
)) {
9119 c
->flags
|= REDIS_IO_WAIT
;
9120 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
9121 server
.vm_blocked_clients
++;
9128 /* Remove the 'key' from the list of blocked keys for a given client.
9130 * The function returns 1 when there are no longer blocking keys after
9131 * the current one was removed (and the client can be unblocked). */
9132 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
9136 struct dictEntry
*de
;
9138 /* Remove the key from the list of keys this client is waiting for. */
9139 listRewind(c
->io_keys
,&li
);
9140 while ((ln
= listNext(&li
)) != NULL
) {
9141 if (compareStringObjects(ln
->value
,key
) == 0) {
9142 listDelNode(c
->io_keys
,ln
);
9148 /* Remove the client form the key => waiting clients map. */
9149 de
= dictFind(c
->db
->io_keys
,key
);
9151 l
= dictGetEntryVal(de
);
9152 ln
= listSearchKey(l
,c
);
9155 if (listLength(l
) == 0)
9156 dictDelete(c
->db
->io_keys
,key
);
9158 return listLength(c
->io_keys
) == 0;
9161 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
9162 struct dictEntry
*de
;
9167 de
= dictFind(db
->io_keys
,key
);
9170 l
= dictGetEntryVal(de
);
9171 len
= listLength(l
);
9172 /* Note: we can't use something like while(listLength(l)) as the list
9173 * can be freed by the calling function when we remove the last element. */
9176 redisClient
*c
= ln
->value
;
9178 if (dontWaitForSwappedKey(c
,key
)) {
9179 /* Put the client in the list of clients ready to go as we
9180 * loaded all the keys about it. */
9181 listAddNodeTail(server
.io_ready_clients
,c
);
9186 /* =========================== Remote Configuration ========================= */
9188 static void configSetCommand(redisClient
*c
) {
9189 robj
*o
= getDecodedObject(c
->argv
[3]);
9190 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
9191 zfree(server
.dbfilename
);
9192 server
.dbfilename
= zstrdup(o
->ptr
);
9193 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
9194 zfree(server
.requirepass
);
9195 server
.requirepass
= zstrdup(o
->ptr
);
9196 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
9197 zfree(server
.masterauth
);
9198 server
.masterauth
= zstrdup(o
->ptr
);
9199 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
9200 server
.maxmemory
= strtoll(o
->ptr
, NULL
, 10);
9202 addReplySds(c
,sdscatprintf(sdsempty(),
9203 "-ERR not supported CONFIG parameter %s\r\n",
9204 (char*)c
->argv
[2]->ptr
));
9209 addReply(c
,shared
.ok
);
9212 static void configGetCommand(redisClient
*c
) {
9213 robj
*o
= getDecodedObject(c
->argv
[2]);
9214 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
9215 char *pattern
= o
->ptr
;
9219 decrRefCount(lenobj
);
9221 if (stringmatch(pattern
,"dbfilename",0)) {
9222 addReplyBulkCString(c
,"dbfilename");
9223 addReplyBulkCString(c
,server
.dbfilename
);
9226 if (stringmatch(pattern
,"requirepass",0)) {
9227 addReplyBulkCString(c
,"requirepass");
9228 addReplyBulkCString(c
,server
.requirepass
);
9231 if (stringmatch(pattern
,"masterauth",0)) {
9232 addReplyBulkCString(c
,"masterauth");
9233 addReplyBulkCString(c
,server
.masterauth
);
9236 if (stringmatch(pattern
,"maxmemory",0)) {
9239 snprintf(buf
,128,"%llu\n",server
.maxmemory
);
9240 addReplyBulkCString(c
,"maxmemory");
9241 addReplyBulkCString(c
,buf
);
9245 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
9248 static void configCommand(redisClient
*c
) {
9249 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
9250 if (c
->argc
!= 4) goto badarity
;
9251 configSetCommand(c
);
9252 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
9253 if (c
->argc
!= 3) goto badarity
;
9254 configGetCommand(c
);
9255 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
9256 if (c
->argc
!= 2) goto badarity
;
9257 server
.stat_numcommands
= 0;
9258 server
.stat_numconnections
= 0;
9259 server
.stat_expiredkeys
= 0;
9260 server
.stat_starttime
= time(NULL
);
9261 addReply(c
,shared
.ok
);
9263 addReplySds(c
,sdscatprintf(sdsempty(),
9264 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
9269 addReplySds(c
,sdscatprintf(sdsempty(),
9270 "-ERR Wrong number of arguments for CONFIG %s\r\n",
9271 (char*) c
->argv
[1]->ptr
));
9274 /* =========================== Pubsub implementation ======================== */
9276 static void freePubsubPattern(void *p
) {
9277 pubsubPattern
*pat
= p
;
9279 decrRefCount(pat
->pattern
);
9283 static int listMatchPubsubPattern(void *a
, void *b
) {
9284 pubsubPattern
*pa
= a
, *pb
= b
;
9286 return (pa
->client
== pb
->client
) &&
9287 (compareStringObjects(pa
->pattern
,pb
->pattern
) == 0);
9290 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
9291 * 0 if the client was already subscribed to that channel. */
9292 static int pubsubSubscribeChannel(redisClient
*c
, robj
*channel
) {
9293 struct dictEntry
*de
;
9294 list
*clients
= NULL
;
9297 /* Add the channel to the client -> channels hash table */
9298 if (dictAdd(c
->pubsub_channels
,channel
,NULL
) == DICT_OK
) {
9300 incrRefCount(channel
);
9301 /* Add the client to the channel -> list of clients hash table */
9302 de
= dictFind(server
.pubsub_channels
,channel
);
9304 clients
= listCreate();
9305 dictAdd(server
.pubsub_channels
,channel
,clients
);
9306 incrRefCount(channel
);
9308 clients
= dictGetEntryVal(de
);
9310 listAddNodeTail(clients
,c
);
9312 /* Notify the client */
9313 addReply(c
,shared
.mbulk3
);
9314 addReply(c
,shared
.subscribebulk
);
9315 addReplyBulk(c
,channel
);
9316 addReplyLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
9320 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
9321 * 0 if the client was not subscribed to the specified channel. */
9322 static int pubsubUnsubscribeChannel(redisClient
*c
, robj
*channel
, int notify
) {
9323 struct dictEntry
*de
;
9328 /* Remove the channel from the client -> channels hash table */
9329 incrRefCount(channel
); /* channel may be just a pointer to the same object
9330 we have in the hash tables. Protect it... */
9331 if (dictDelete(c
->pubsub_channels
,channel
) == DICT_OK
) {
9333 /* Remove the client from the channel -> clients list hash table */
9334 de
= dictFind(server
.pubsub_channels
,channel
);
9336 clients
= dictGetEntryVal(de
);
9337 ln
= listSearchKey(clients
,c
);
9339 listDelNode(clients
,ln
);
9340 if (listLength(clients
) == 0) {
9341 /* Free the list and associated hash entry at all if this was
9342 * the latest client, so that it will be possible to abuse
9343 * Redis PUBSUB creating millions of channels. */
9344 dictDelete(server
.pubsub_channels
,channel
);
9347 /* Notify the client */
9349 addReply(c
,shared
.mbulk3
);
9350 addReply(c
,shared
.unsubscribebulk
);
9351 addReplyBulk(c
,channel
);
9352 addReplyLong(c
,dictSize(c
->pubsub_channels
)+
9353 listLength(c
->pubsub_patterns
));
9356 decrRefCount(channel
); /* it is finally safe to release it */
9360 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */
9361 static int pubsubSubscribePattern(redisClient
*c
, robj
*pattern
) {
9364 if (listSearchKey(c
->pubsub_patterns
,pattern
) == NULL
) {
9367 listAddNodeTail(c
->pubsub_patterns
,pattern
);
9368 incrRefCount(pattern
);
9369 pat
= zmalloc(sizeof(*pat
));
9370 pat
->pattern
= getDecodedObject(pattern
);
9372 listAddNodeTail(server
.pubsub_patterns
,pat
);
9374 /* Notify the client */
9375 addReply(c
,shared
.mbulk3
);
9376 addReply(c
,shared
.psubscribebulk
);
9377 addReplyBulk(c
,pattern
);
9378 addReplyLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
9382 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
9383 * 0 if the client was not subscribed to the specified channel. */
9384 static int pubsubUnsubscribePattern(redisClient
*c
, robj
*pattern
, int notify
) {
9389 incrRefCount(pattern
); /* Protect the object. May be the same we remove */
9390 if ((ln
= listSearchKey(c
->pubsub_patterns
,pattern
)) != NULL
) {
9392 listDelNode(c
->pubsub_patterns
,ln
);
9394 pat
.pattern
= pattern
;
9395 ln
= listSearchKey(server
.pubsub_patterns
,&pat
);
9396 listDelNode(server
.pubsub_patterns
,ln
);
9398 /* Notify the client */
9400 addReply(c
,shared
.mbulk3
);
9401 addReply(c
,shared
.punsubscribebulk
);
9402 addReplyBulk(c
,pattern
);
9403 addReplyLong(c
,dictSize(c
->pubsub_channels
)+
9404 listLength(c
->pubsub_patterns
));
9406 decrRefCount(pattern
);
9410 /* Unsubscribe from all the channels. Return the number of channels the
9411 * client was subscribed from. */
9412 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
) {
9413 dictIterator
*di
= dictGetIterator(c
->pubsub_channels
);
9417 while((de
= dictNext(di
)) != NULL
) {
9418 robj
*channel
= dictGetEntryKey(de
);
9420 count
+= pubsubUnsubscribeChannel(c
,channel
,notify
);
9422 dictReleaseIterator(di
);
9426 /* Unsubscribe from all the patterns. Return the number of patterns the
9427 * client was subscribed from. */
9428 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
) {
9433 listRewind(c
->pubsub_patterns
,&li
);
9434 while ((ln
= listNext(&li
)) != NULL
) {
9435 robj
*pattern
= ln
->value
;
9437 count
+= pubsubUnsubscribePattern(c
,pattern
,notify
);
9442 /* Publish a message */
9443 static int pubsubPublishMessage(robj
*channel
, robj
*message
) {
9445 struct dictEntry
*de
;
9449 /* Send to clients listening for that channel */
9450 de
= dictFind(server
.pubsub_channels
,channel
);
9452 list
*list
= dictGetEntryVal(de
);
9456 listRewind(list
,&li
);
9457 while ((ln
= listNext(&li
)) != NULL
) {
9458 redisClient
*c
= ln
->value
;
9460 addReply(c
,shared
.mbulk3
);
9461 addReply(c
,shared
.messagebulk
);
9462 addReplyBulk(c
,channel
);
9463 addReplyBulk(c
,message
);
9467 /* Send to clients listening to matching channels */
9468 if (listLength(server
.pubsub_patterns
)) {
9469 listRewind(server
.pubsub_patterns
,&li
);
9470 channel
= getDecodedObject(channel
);
9471 while ((ln
= listNext(&li
)) != NULL
) {
9472 pubsubPattern
*pat
= ln
->value
;
9474 if (stringmatchlen((char*)pat
->pattern
->ptr
,
9475 sdslen(pat
->pattern
->ptr
),
9476 (char*)channel
->ptr
,
9477 sdslen(channel
->ptr
),0)) {
9478 addReply(pat
->client
,shared
.mbulk3
);
9479 addReply(pat
->client
,shared
.messagebulk
);
9480 addReplyBulk(pat
->client
,channel
);
9481 addReplyBulk(pat
->client
,message
);
9485 decrRefCount(channel
);
9490 static void subscribeCommand(redisClient
*c
) {
9493 for (j
= 1; j
< c
->argc
; j
++)
9494 pubsubSubscribeChannel(c
,c
->argv
[j
]);
9497 static void unsubscribeCommand(redisClient
*c
) {
9499 pubsubUnsubscribeAllChannels(c
,1);
9504 for (j
= 1; j
< c
->argc
; j
++)
9505 pubsubUnsubscribeChannel(c
,c
->argv
[j
],1);
9509 static void psubscribeCommand(redisClient
*c
) {
9512 for (j
= 1; j
< c
->argc
; j
++)
9513 pubsubSubscribePattern(c
,c
->argv
[j
]);
9516 static void punsubscribeCommand(redisClient
*c
) {
9518 pubsubUnsubscribeAllPatterns(c
,1);
9523 for (j
= 1; j
< c
->argc
; j
++)
9524 pubsubUnsubscribePattern(c
,c
->argv
[j
],1);
9528 static void publishCommand(redisClient
*c
) {
9529 int receivers
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]);
9530 addReplyLong(c
,receivers
);
9533 /* ================================= Debugging ============================== */
9535 static void debugCommand(redisClient
*c
) {
9536 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
9538 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
9539 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
9540 addReply(c
,shared
.err
);
9544 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
9545 addReply(c
,shared
.err
);
9548 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
9549 addReply(c
,shared
.ok
);
9550 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
9552 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
9553 addReply(c
,shared
.err
);
9556 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
9557 addReply(c
,shared
.ok
);
9558 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
9559 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9563 addReply(c
,shared
.nokeyerr
);
9566 key
= dictGetEntryKey(de
);
9567 val
= dictGetEntryVal(de
);
9568 if (!server
.vm_enabled
|| (key
->storage
== REDIS_VM_MEMORY
||
9569 key
->storage
== REDIS_VM_SWAPPING
)) {
9573 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
9574 strenc
= strencoding
[val
->encoding
];
9576 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
9579 addReplySds(c
,sdscatprintf(sdsempty(),
9580 "+Key at:%p refcount:%d, value at:%p refcount:%d "
9581 "encoding:%s serializedlength:%lld\r\n",
9582 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
,
9583 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
9585 addReplySds(c
,sdscatprintf(sdsempty(),
9586 "+Key at:%p refcount:%d, value swapped at: page %llu "
9587 "using %llu pages\r\n",
9588 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
,
9589 (unsigned long long) key
->vm
.usedpages
));
9591 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapin") && c
->argc
== 3) {
9592 lookupKeyRead(c
->db
,c
->argv
[2]);
9593 addReply(c
,shared
.ok
);
9594 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
9595 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9598 if (!server
.vm_enabled
) {
9599 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
9603 addReply(c
,shared
.nokeyerr
);
9606 key
= dictGetEntryKey(de
);
9607 val
= dictGetEntryVal(de
);
9608 /* If the key is shared we want to create a copy */
9609 if (key
->refcount
> 1) {
9610 robj
*newkey
= dupStringObject(key
);
9612 key
= dictGetEntryKey(de
) = newkey
;
9615 if (key
->storage
!= REDIS_VM_MEMORY
) {
9616 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
9617 } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
9618 dictGetEntryVal(de
) = NULL
;
9619 addReply(c
,shared
.ok
);
9621 addReply(c
,shared
.err
);
9624 addReplySds(c
,sdsnew(
9625 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n"));
9629 static void _redisAssert(char *estr
, char *file
, int line
) {
9630 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
9631 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true\n",file
,line
,estr
);
9632 #ifdef HAVE_BACKTRACE
9633 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
9638 /* =================================== Main! ================================ */
9641 int linuxOvercommitMemoryValue(void) {
9642 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
9646 if (fgets(buf
,64,fp
) == NULL
) {
9655 void linuxOvercommitMemoryWarning(void) {
9656 if (linuxOvercommitMemoryValue() == 0) {
9657 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low condition memory. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
9660 #endif /* __linux__ */
9662 static void daemonize(void) {
9666 if (fork() != 0) exit(0); /* parent exits */
9667 setsid(); /* create a new session */
9669 /* Every output goes to /dev/null. If Redis is daemonized but
9670 * the 'logfile' is set to 'stdout' in the configuration file
9671 * it will not log at all. */
9672 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
9673 dup2(fd
, STDIN_FILENO
);
9674 dup2(fd
, STDOUT_FILENO
);
9675 dup2(fd
, STDERR_FILENO
);
9676 if (fd
> STDERR_FILENO
) close(fd
);
9678 /* Try to write the pid file */
9679 fp
= fopen(server
.pidfile
,"w");
9681 fprintf(fp
,"%d\n",getpid());
9686 static void version() {
9687 printf("Redis server version %s\n", REDIS_VERSION
);
9691 static void usage() {
9692 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
9693 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
9697 int main(int argc
, char **argv
) {
9702 if (strcmp(argv
[1], "-v") == 0 ||
9703 strcmp(argv
[1], "--version") == 0) version();
9704 if (strcmp(argv
[1], "--help") == 0) usage();
9705 resetServerSaveParams();
9706 loadServerConfig(argv
[1]);
9707 } else if ((argc
> 2)) {
9710 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
9712 if (server
.daemonize
) daemonize();
9714 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
9716 linuxOvercommitMemoryWarning();
9719 if (server
.appendonly
) {
9720 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
9721 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
9723 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
9724 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
9726 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
9727 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
9729 aeDeleteEventLoop(server
.el
);
9733 /* ============================= Backtrace support ========================= */
9735 #ifdef HAVE_BACKTRACE
9736 static char *findFuncName(void *pointer
, unsigned long *offset
);
9738 static void *getMcontextEip(ucontext_t
*uc
) {
9739 #if defined(__FreeBSD__)
9740 return (void*) uc
->uc_mcontext
.mc_eip
;
9741 #elif defined(__dietlibc__)
9742 return (void*) uc
->uc_mcontext
.eip
;
9743 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
9745 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
9747 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
9749 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
9750 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
9751 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
9753 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
9755 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
9756 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
9757 #elif defined(__ia64__) /* Linux IA64 */
9758 return (void*) uc
->uc_mcontext
.sc_ip
;
9764 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
9766 char **messages
= NULL
;
9767 int i
, trace_size
= 0;
9768 unsigned long offset
=0;
9769 ucontext_t
*uc
= (ucontext_t
*) secret
;
9771 REDIS_NOTUSED(info
);
9773 redisLog(REDIS_WARNING
,
9774 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
9775 infostring
= genRedisInfoString();
9776 redisLog(REDIS_WARNING
, "%s",infostring
);
9777 /* It's not safe to sdsfree() the returned string under memory
9778 * corruption conditions. Let it leak as we are going to abort */
9780 trace_size
= backtrace(trace
, 100);
9781 /* overwrite sigaction with caller's address */
9782 if (getMcontextEip(uc
) != NULL
) {
9783 trace
[1] = getMcontextEip(uc
);
9785 messages
= backtrace_symbols(trace
, trace_size
);
9787 for (i
=1; i
<trace_size
; ++i
) {
9788 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
9790 p
= strchr(messages
[i
],'+');
9791 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
9792 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
9794 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
9797 /* free(messages); Don't call free() with possibly corrupted memory. */
9801 static void setupSigSegvAction(void) {
9802 struct sigaction act
;
9804 sigemptyset (&act
.sa_mask
);
9805 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
9806 * is used. Otherwise, sa_handler is used */
9807 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
9808 act
.sa_sigaction
= segvHandler
;
9809 sigaction (SIGSEGV
, &act
, NULL
);
9810 sigaction (SIGBUS
, &act
, NULL
);
9811 sigaction (SIGFPE
, &act
, NULL
);
9812 sigaction (SIGILL
, &act
, NULL
);
9813 sigaction (SIGBUS
, &act
, NULL
);
9817 #include "staticsymbols.h"
9818 /* This function try to convert a pointer into a function name. It's used in
9819 * oreder to provide a backtrace under segmentation fault that's able to
9820 * display functions declared as static (otherwise the backtrace is useless). */
9821 static char *findFuncName(void *pointer
, unsigned long *offset
){
9823 unsigned long off
, minoff
= 0;
9825 /* Try to match against the Symbol with the smallest offset */
9826 for (i
=0; symsTable
[i
].pointer
; i
++) {
9827 unsigned long lp
= (unsigned long) pointer
;
9829 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
9830 off
=lp
-symsTable
[i
].pointer
;
9831 if (ret
< 0 || off
< minoff
) {
9837 if (ret
== -1) return NULL
;
9839 return symsTable
[ret
].name
;
9841 #else /* HAVE_BACKTRACE */
9842 static void setupSigSegvAction(void) {
9844 #endif /* HAVE_BACKTRACE */