2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "1.3.8"
40 #define __USE_POSIX199309
47 #endif /* HAVE_BACKTRACE */
55 #include <arpa/inet.h>
59 #include <sys/resource.h>
66 #include "solarisfixes.h"
70 #include "ae.h" /* Event driven programming library */
71 #include "sds.h" /* Dynamic safe strings */
72 #include "anet.h" /* Networking the easy way */
73 #include "dict.h" /* Hash tables */
74 #include "adlist.h" /* Linked lists */
75 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
76 #include "lzf.h" /* LZF compression library */
77 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
84 /* Static server configuration */
85 #define REDIS_SERVERPORT 6379 /* TCP port */
86 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
87 #define REDIS_IOBUF_LEN 1024
88 #define REDIS_LOADBUF_LEN 1024
89 #define REDIS_STATIC_ARGS 8
90 #define REDIS_DEFAULT_DBNUM 16
91 #define REDIS_CONFIGLINE_MAX 1024
92 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
93 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
94 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* try to expire 10 keys/loop */
95 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
96 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
98 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
99 #define REDIS_WRITEV_THRESHOLD 3
100 /* Max number of iovecs used for each writev call */
101 #define REDIS_WRITEV_IOVEC_COUNT 256
103 /* Hash table parameters */
104 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
107 #define REDIS_CMD_BULK 1 /* Bulk write command */
108 #define REDIS_CMD_INLINE 2 /* Inline command */
109 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
110 this flags will return an error when the 'maxmemory' option is set in the
111 config file and the server is using more than maxmemory bytes of memory.
112 In short this commands are denied on low memory conditions. */
113 #define REDIS_CMD_DENYOOM 4
114 #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */
117 #define REDIS_STRING 0
123 /* Objects encoding. Some kind of objects like Strings and Hashes can be
124 * internally represented in multiple ways. The 'encoding' field of the object
125 * is set to one of this fields for this object. */
126 #define REDIS_ENCODING_RAW 0 /* Raw representation */
127 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
128 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
129 #define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
131 static char* strencoding
[] = {
132 "raw", "int", "zipmap", "hashtable"
135 /* Object types only used for dumping to disk */
136 #define REDIS_EXPIRETIME 253
137 #define REDIS_SELECTDB 254
138 #define REDIS_EOF 255
140 /* Defines related to the dump file format. To store 32 bits lengths for short
141 * keys requires a lot of space, so we check the most significant 2 bits of
142 * the first byte to interpreter the length:
144 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
145 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
146 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
147 * 11|000000 this means: specially encoded object will follow. The six bits
148 * number specify the kind of object that follows.
149 * See the REDIS_RDB_ENC_* defines.
151 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
152 * values, will fit inside. */
153 #define REDIS_RDB_6BITLEN 0
154 #define REDIS_RDB_14BITLEN 1
155 #define REDIS_RDB_32BITLEN 2
156 #define REDIS_RDB_ENCVAL 3
157 #define REDIS_RDB_LENERR UINT_MAX
159 /* When a length of a string object stored on disk has the first two bits
160 * set, the remaining two bits specify a special encoding for the object
161 * accordingly to the following defines: */
162 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
163 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
164 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
165 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
167 /* Virtual memory object->where field. */
168 #define REDIS_VM_MEMORY 0 /* The object is on memory */
169 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
170 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
171 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
173 /* Virtual memory static configuration stuff.
174 * Check vmFindContiguousPages() to know more about this magic numbers. */
175 #define REDIS_VM_MAX_NEAR_PAGES 65536
176 #define REDIS_VM_MAX_RANDOM_JUMP 4096
177 #define REDIS_VM_MAX_THREADS 32
178 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
179 /* The following is the *percentage* of completed I/O jobs to process when the
180 * handelr is called. While Virtual Memory I/O operations are performed by
181 * threads, this operations must be processed by the main thread when completed
182 * in order to take effect. */
183 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
186 #define REDIS_SLAVE 1 /* This client is a slave server */
187 #define REDIS_MASTER 2 /* This client is a master server */
188 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
189 #define REDIS_MULTI 8 /* This client is in a MULTI context */
190 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
191 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
193 /* Slave replication state - slave side */
194 #define REDIS_REPL_NONE 0 /* No active replication */
195 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
196 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
198 /* Slave replication state - from the point of view of master
199 * Note that in SEND_BULK and ONLINE state the slave receives new updates
200 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
201 * to start the next background saving in order to send updates to it. */
202 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
203 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
204 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
205 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
207 /* List related stuff */
211 /* Sort operations */
212 #define REDIS_SORT_GET 0
213 #define REDIS_SORT_ASC 1
214 #define REDIS_SORT_DESC 2
215 #define REDIS_SORTKEY_MAX 1024
218 #define REDIS_DEBUG 0
219 #define REDIS_VERBOSE 1
220 #define REDIS_NOTICE 2
221 #define REDIS_WARNING 3
223 /* Anti-warning macro... */
224 #define REDIS_NOTUSED(V) ((void) V)
226 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
227 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
229 /* Append only defines */
230 #define APPENDFSYNC_NO 0
231 #define APPENDFSYNC_ALWAYS 1
232 #define APPENDFSYNC_EVERYSEC 2
234 /* Hashes related defaults */
235 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
236 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
238 /* We can print the stacktrace, so our assert is defined this way: */
239 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
240 static void _redisAssert(char *estr
, char *file
, int line
);
242 /*================================= Data types ============================== */
244 /* A redis object, that is a type able to hold a string / list / set */
246 /* The VM object structure */
247 struct redisObjectVM
{
248 off_t page
; /* the page at witch the object is stored on disk */
249 off_t usedpages
; /* number of pages used on disk */
250 time_t atime
; /* Last access time */
253 /* The actual Redis Object */
254 typedef struct redisObject
{
257 unsigned char encoding
;
258 unsigned char storage
; /* If this object is a key, where is the value?
259 * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
260 unsigned char vtype
; /* If this object is a key, and value is swapped out,
261 * this is the type of the swapped out object. */
263 /* VM fields, this are only allocated if VM is active, otherwise the
264 * object allocation function will just allocate
265 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
266 * Redis without VM active will not have any overhead. */
267 struct redisObjectVM vm
;
270 /* Macro used to initalize a Redis object allocated on the stack.
271 * Note that this macro is taken near the structure definition to make sure
272 * we'll update it when the structure is changed, to avoid bugs like
273 * bug #85 introduced exactly in this way. */
274 #define initStaticStringObject(_var,_ptr) do { \
276 _var.type = REDIS_STRING; \
277 _var.encoding = REDIS_ENCODING_RAW; \
279 if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \
282 typedef struct redisDb
{
283 dict
*dict
; /* The keyspace for this DB */
284 dict
*expires
; /* Timeout of keys with a timeout set */
285 dict
*blockingkeys
; /* Keys with clients waiting for data (BLPOP) */
286 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
290 /* Client MULTI/EXEC state */
291 typedef struct multiCmd
{
294 struct redisCommand
*cmd
;
297 typedef struct multiState
{
298 multiCmd
*commands
; /* Array of MULTI commands */
299 int count
; /* Total number of MULTI commands */
302 /* With multiplexing we need to take per-clinet state.
303 * Clients are taken in a liked list. */
304 typedef struct redisClient
{
309 robj
**argv
, **mbargv
;
311 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
312 int multibulk
; /* multi bulk command format active */
315 time_t lastinteraction
; /* time of the last interaction, used for timeout */
316 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
317 int slaveseldb
; /* slave selected db, if this client is a slave */
318 int authenticated
; /* when requirepass is non-NULL */
319 int replstate
; /* replication state if this is a slave */
320 int repldbfd
; /* replication DB file descriptor */
321 long repldboff
; /* replication DB file offset */
322 off_t repldbsize
; /* replication DB file size */
323 multiState mstate
; /* MULTI/EXEC state */
324 robj
**blockingkeys
; /* The key we are waiting to terminate a blocking
325 * operation such as BLPOP. Otherwise NULL. */
326 int blockingkeysnum
; /* Number of blocking keys */
327 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
328 * is >= blockingto then the operation timed out. */
329 list
*io_keys
; /* Keys this client is waiting to be loaded from the
330 * swap file in order to continue. */
331 dict
*pubsub_channels
; /* channels a client is interested in (SUBSCRIBE) */
332 list
*pubsub_patterns
; /* patterns a client is interested in (SUBSCRIBE) */
340 /* Global server state structure */
345 long long dirty
; /* changes to DB from the last save */
347 list
*slaves
, *monitors
;
348 char neterr
[ANET_ERR_LEN
];
350 int cronloops
; /* number of times the cron function run */
351 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
352 time_t lastsave
; /* Unix time of last save succeeede */
353 /* Fields used only for stats */
354 time_t stat_starttime
; /* server start time */
355 long long stat_numcommands
; /* number of processed commands */
356 long long stat_numconnections
; /* number of connections received */
357 long long stat_expiredkeys
; /* number of expired keys */
370 pid_t bgsavechildpid
;
371 pid_t bgrewritechildpid
;
372 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
373 struct saveparam
*saveparams
;
378 char *appendfilename
;
382 /* Replication related */
387 redisClient
*master
; /* client that is master for this slave */
389 unsigned int maxclients
;
390 unsigned long long maxmemory
;
391 unsigned int blpop_blocked_clients
;
392 unsigned int vm_blocked_clients
;
393 /* Sort parameters - qsort_r() is only available under BSD so we
394 * have to take this state global, in order to pass it to sortCompare() */
398 /* Virtual memory configuration */
403 unsigned long long vm_max_memory
;
405 size_t hash_max_zipmap_entries
;
406 size_t hash_max_zipmap_value
;
407 /* Virtual memory state */
410 off_t vm_next_page
; /* Next probably empty page */
411 off_t vm_near_pages
; /* Number of pages allocated sequentially */
412 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
413 time_t unixtime
; /* Unix time sampled every second. */
414 /* Virtual memory I/O threads stuff */
415 /* An I/O thread process an element taken from the io_jobs queue and
416 * put the result of the operation in the io_done list. While the
417 * job is being processed, it's put on io_processing queue. */
418 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
419 list
*io_processing
; /* List of VM I/O jobs being processed */
420 list
*io_processed
; /* List of VM I/O jobs already processed */
421 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
422 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
423 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
424 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
425 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
426 int io_active_threads
; /* Number of running I/O threads */
427 int vm_max_threads
; /* Max number of I/O threads running at the same time */
428 /* Our main thread is blocked on the event loop, locking for sockets ready
429 * to be read or written, so when a threaded I/O operation is ready to be
430 * processed by the main thread, the I/O thread will use a unix pipe to
431 * awake the main thread. The followings are the two pipe FDs. */
432 int io_ready_pipe_read
;
433 int io_ready_pipe_write
;
434 /* Virtual memory stats */
435 unsigned long long vm_stats_used_pages
;
436 unsigned long long vm_stats_swapped_objects
;
437 unsigned long long vm_stats_swapouts
;
438 unsigned long long vm_stats_swapins
;
440 dict
*pubsub_channels
; /* Map channels to list of subscribed clients */
441 list
*pubsub_patterns
; /* A list of pubsub_patterns */
446 typedef struct pubsubPattern
{
451 typedef void redisCommandProc(redisClient
*c
);
452 struct redisCommand
{
454 redisCommandProc
*proc
;
457 /* Use a function to determine which keys need to be loaded
458 * in the background prior to executing this command. Takes precedence
459 * over vm_firstkey and others, ignored when NULL */
460 redisCommandProc
*vm_preload_proc
;
461 /* What keys should be loaded in background when calling this command? */
462 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
463 int vm_lastkey
; /* THe last argument that's a key */
464 int vm_keystep
; /* The step between first and last key */
467 struct redisFunctionSym
{
469 unsigned long pointer
;
472 typedef struct _redisSortObject
{
480 typedef struct _redisSortOperation
{
483 } redisSortOperation
;
485 /* ZSETs use a specialized version of Skiplists */
487 typedef struct zskiplistNode
{
488 struct zskiplistNode
**forward
;
489 struct zskiplistNode
*backward
;
495 typedef struct zskiplist
{
496 struct zskiplistNode
*header
, *tail
;
497 unsigned long length
;
501 typedef struct zset
{
506 /* Our shared "common" objects */
508 #define REDIS_SHARED_INTEGERS 10000
509 struct sharedObjectsStruct
{
510 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
511 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
512 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
513 *outofrangeerr
, *plus
,
514 *select0
, *select1
, *select2
, *select3
, *select4
,
515 *select5
, *select6
, *select7
, *select8
, *select9
,
516 *messagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
,
517 *psubscribebulk
, *punsubscribebulk
, *integers
[REDIS_SHARED_INTEGERS
];
520 /* Global vars that are actally used as constants. The following double
521 * values are used for double on-disk serialization, and are initialized
522 * at runtime to avoid strange compiler optimizations. */
524 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
526 /* VM threaded I/O request message */
527 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
528 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
529 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
530 typedef struct iojob
{
531 int type
; /* Request type, REDIS_IOJOB_* */
532 redisDb
*db
;/* Redis database */
533 robj
*key
; /* This I/O request is about swapping this key */
534 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
535 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
536 off_t page
; /* Swap page where to read/write the object */
537 off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */
538 int canceled
; /* True if this command was canceled by blocking side of VM */
539 pthread_t thread
; /* ID of the thread processing this entry */
542 /*================================ Prototypes =============================== */
544 static void freeStringObject(robj
*o
);
545 static void freeListObject(robj
*o
);
546 static void freeSetObject(robj
*o
);
547 static void decrRefCount(void *o
);
548 static robj
*createObject(int type
, void *ptr
);
549 static void freeClient(redisClient
*c
);
550 static int rdbLoad(char *filename
);
551 static void addReply(redisClient
*c
, robj
*obj
);
552 static void addReplySds(redisClient
*c
, sds s
);
553 static void incrRefCount(robj
*o
);
554 static int rdbSaveBackground(char *filename
);
555 static robj
*createStringObject(char *ptr
, size_t len
);
556 static robj
*dupStringObject(robj
*o
);
557 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
);
558 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
559 static int syncWithMaster(void);
560 static robj
*tryObjectEncoding(robj
*o
);
561 static robj
*getDecodedObject(robj
*o
);
562 static int removeExpire(redisDb
*db
, robj
*key
);
563 static int expireIfNeeded(redisDb
*db
, robj
*key
);
564 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
565 static int deleteIfSwapped(redisDb
*db
, robj
*key
);
566 static int deleteKey(redisDb
*db
, robj
*key
);
567 static time_t getExpire(redisDb
*db
, robj
*key
);
568 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
569 static void updateSlavesWaitingBgsave(int bgsaveerr
);
570 static void freeMemoryIfNeeded(void);
571 static int processCommand(redisClient
*c
);
572 static void setupSigSegvAction(void);
573 static void rdbRemoveTempFile(pid_t childpid
);
574 static void aofRemoveTempFile(pid_t childpid
);
575 static size_t stringObjectLen(robj
*o
);
576 static void processInputBuffer(redisClient
*c
);
577 static zskiplist
*zslCreate(void);
578 static void zslFree(zskiplist
*zsl
);
579 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
580 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
581 static void initClientMultiState(redisClient
*c
);
582 static void freeClientMultiState(redisClient
*c
);
583 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
584 static void unblockClientWaitingData(redisClient
*c
);
585 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
586 static void vmInit(void);
587 static void vmMarkPagesFree(off_t page
, off_t count
);
588 static robj
*vmLoadObject(robj
*key
);
589 static robj
*vmPreviewObject(robj
*key
);
590 static int vmSwapOneObjectBlocking(void);
591 static int vmSwapOneObjectThreaded(void);
592 static int vmCanSwapOut(void);
593 static int tryFreeOneObjectFromFreelist(void);
594 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
595 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
596 static void vmCancelThreadedIOJob(robj
*o
);
597 static void lockThreadedIO(void);
598 static void unlockThreadedIO(void);
599 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
600 static void freeIOJob(iojob
*j
);
601 static void queueIOJob(iojob
*j
);
602 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
603 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
604 static void waitEmptyIOJobsQueue(void);
605 static void vmReopenSwapFile(void);
606 static int vmFreePage(off_t page
);
607 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
);
608 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
);
609 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
610 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
611 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
612 static struct redisCommand
*lookupCommand(char *name
);
613 static void call(redisClient
*c
, struct redisCommand
*cmd
);
614 static void resetClient(redisClient
*c
);
615 static void convertToRealHash(robj
*o
);
616 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
);
617 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
);
618 static void freePubsubPattern(void *p
);
619 static int listMatchPubsubPattern(void *a
, void *b
);
620 static int compareStringObjects(robj
*a
, robj
*b
);
623 static void authCommand(redisClient
*c
);
624 static void pingCommand(redisClient
*c
);
625 static void echoCommand(redisClient
*c
);
626 static void setCommand(redisClient
*c
);
627 static void setnxCommand(redisClient
*c
);
628 static void getCommand(redisClient
*c
);
629 static void delCommand(redisClient
*c
);
630 static void existsCommand(redisClient
*c
);
631 static void incrCommand(redisClient
*c
);
632 static void decrCommand(redisClient
*c
);
633 static void incrbyCommand(redisClient
*c
);
634 static void decrbyCommand(redisClient
*c
);
635 static void selectCommand(redisClient
*c
);
636 static void randomkeyCommand(redisClient
*c
);
637 static void keysCommand(redisClient
*c
);
638 static void dbsizeCommand(redisClient
*c
);
639 static void lastsaveCommand(redisClient
*c
);
640 static void saveCommand(redisClient
*c
);
641 static void bgsaveCommand(redisClient
*c
);
642 static void bgrewriteaofCommand(redisClient
*c
);
643 static void shutdownCommand(redisClient
*c
);
644 static void moveCommand(redisClient
*c
);
645 static void renameCommand(redisClient
*c
);
646 static void renamenxCommand(redisClient
*c
);
647 static void lpushCommand(redisClient
*c
);
648 static void rpushCommand(redisClient
*c
);
649 static void lpopCommand(redisClient
*c
);
650 static void rpopCommand(redisClient
*c
);
651 static void llenCommand(redisClient
*c
);
652 static void lindexCommand(redisClient
*c
);
653 static void lrangeCommand(redisClient
*c
);
654 static void ltrimCommand(redisClient
*c
);
655 static void typeCommand(redisClient
*c
);
656 static void lsetCommand(redisClient
*c
);
657 static void saddCommand(redisClient
*c
);
658 static void sremCommand(redisClient
*c
);
659 static void smoveCommand(redisClient
*c
);
660 static void sismemberCommand(redisClient
*c
);
661 static void scardCommand(redisClient
*c
);
662 static void spopCommand(redisClient
*c
);
663 static void srandmemberCommand(redisClient
*c
);
664 static void sinterCommand(redisClient
*c
);
665 static void sinterstoreCommand(redisClient
*c
);
666 static void sunionCommand(redisClient
*c
);
667 static void sunionstoreCommand(redisClient
*c
);
668 static void sdiffCommand(redisClient
*c
);
669 static void sdiffstoreCommand(redisClient
*c
);
670 static void syncCommand(redisClient
*c
);
671 static void flushdbCommand(redisClient
*c
);
672 static void flushallCommand(redisClient
*c
);
673 static void sortCommand(redisClient
*c
);
674 static void lremCommand(redisClient
*c
);
675 static void rpoplpushcommand(redisClient
*c
);
676 static void infoCommand(redisClient
*c
);
677 static void mgetCommand(redisClient
*c
);
678 static void monitorCommand(redisClient
*c
);
679 static void expireCommand(redisClient
*c
);
680 static void expireatCommand(redisClient
*c
);
681 static void getsetCommand(redisClient
*c
);
682 static void ttlCommand(redisClient
*c
);
683 static void slaveofCommand(redisClient
*c
);
684 static void debugCommand(redisClient
*c
);
685 static void msetCommand(redisClient
*c
);
686 static void msetnxCommand(redisClient
*c
);
687 static void zaddCommand(redisClient
*c
);
688 static void zincrbyCommand(redisClient
*c
);
689 static void zrangeCommand(redisClient
*c
);
690 static void zrangebyscoreCommand(redisClient
*c
);
691 static void zcountCommand(redisClient
*c
);
692 static void zrevrangeCommand(redisClient
*c
);
693 static void zcardCommand(redisClient
*c
);
694 static void zremCommand(redisClient
*c
);
695 static void zscoreCommand(redisClient
*c
);
696 static void zremrangebyscoreCommand(redisClient
*c
);
697 static void multiCommand(redisClient
*c
);
698 static void execCommand(redisClient
*c
);
699 static void discardCommand(redisClient
*c
);
700 static void blpopCommand(redisClient
*c
);
701 static void brpopCommand(redisClient
*c
);
702 static void appendCommand(redisClient
*c
);
703 static void substrCommand(redisClient
*c
);
704 static void zrankCommand(redisClient
*c
);
705 static void zrevrankCommand(redisClient
*c
);
706 static void hsetCommand(redisClient
*c
);
707 static void hmsetCommand(redisClient
*c
);
708 static void hgetCommand(redisClient
*c
);
709 static void hdelCommand(redisClient
*c
);
710 static void hlenCommand(redisClient
*c
);
711 static void zremrangebyrankCommand(redisClient
*c
);
712 static void zunionCommand(redisClient
*c
);
713 static void zinterCommand(redisClient
*c
);
714 static void hkeysCommand(redisClient
*c
);
715 static void hvalsCommand(redisClient
*c
);
716 static void hgetallCommand(redisClient
*c
);
717 static void hexistsCommand(redisClient
*c
);
718 static void configCommand(redisClient
*c
);
719 static void hincrbyCommand(redisClient
*c
);
720 static void subscribeCommand(redisClient
*c
);
721 static void unsubscribeCommand(redisClient
*c
);
722 static void psubscribeCommand(redisClient
*c
);
723 static void punsubscribeCommand(redisClient
*c
);
724 static void publishCommand(redisClient
*c
);
726 /*================================= Globals ================================= */
729 static struct redisServer server
; /* server global state */
730 static struct redisCommand cmdTable
[] = {
731 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
732 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
733 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
734 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
735 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
736 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
737 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
738 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
739 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
740 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
741 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
742 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
743 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
744 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
745 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
746 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
747 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
748 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
749 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
750 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
751 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
752 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
753 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
754 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
755 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
756 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
757 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
758 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
759 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
760 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
761 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
762 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
763 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
764 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
765 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
766 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
767 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
768 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
769 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
770 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
771 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
772 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
773 {"zunion",zunionCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
774 {"zinter",zinterCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
775 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
776 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
777 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
778 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
779 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
780 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
781 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
782 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
783 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
784 {"hmset",hmsetCommand
,-4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
785 {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
786 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
787 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
788 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
789 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
790 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
791 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
792 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
793 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
794 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
795 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
796 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
797 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
798 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
799 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
800 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
801 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
802 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
803 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
804 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
805 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
806 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
807 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
808 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
809 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
810 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
811 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
812 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
813 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
814 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
815 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
816 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
817 {"exec",execCommand
,1,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
818 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
819 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
820 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
821 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
822 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
823 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
824 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
825 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
826 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
827 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
828 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
829 {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
830 {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
831 {"psubscribe",psubscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
832 {"punsubscribe",punsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
833 {"publish",publishCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_FORCE_REPLICATION
,NULL
,0,0,0},
834 {NULL
,NULL
,0,0,NULL
,0,0,0}
837 /*============================ Utility functions ============================ */
839 /* Glob-style pattern matching. */
840 static int stringmatchlen(const char *pattern
, int patternLen
,
841 const char *string
, int stringLen
, int nocase
)
846 while (pattern
[1] == '*') {
851 return 1; /* match */
853 if (stringmatchlen(pattern
+1, patternLen
-1,
854 string
, stringLen
, nocase
))
855 return 1; /* match */
859 return 0; /* no match */
863 return 0; /* no match */
873 not = pattern
[0] == '^';
880 if (pattern
[0] == '\\') {
883 if (pattern
[0] == string
[0])
885 } else if (pattern
[0] == ']') {
887 } else if (patternLen
== 0) {
891 } else if (pattern
[1] == '-' && patternLen
>= 3) {
892 int start
= pattern
[0];
893 int end
= pattern
[2];
901 start
= tolower(start
);
907 if (c
>= start
&& c
<= end
)
911 if (pattern
[0] == string
[0])
914 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
924 return 0; /* no match */
930 if (patternLen
>= 2) {
937 if (pattern
[0] != string
[0])
938 return 0; /* no match */
940 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
941 return 0; /* no match */
949 if (stringLen
== 0) {
950 while(*pattern
== '*') {
957 if (patternLen
== 0 && stringLen
== 0)
962 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
963 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
966 static void redisLog(int level
, const char *fmt
, ...) {
970 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
974 if (level
>= server
.verbosity
) {
980 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
981 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
982 vfprintf(fp
, fmt
, ap
);
988 if (server
.logfile
) fclose(fp
);
991 /*====================== Hash table type implementation ==================== */
993 /* This is an hash table type that uses the SDS dynamic strings libary as
994 * keys and radis objects as values (objects can hold SDS strings,
997 static void dictVanillaFree(void *privdata
, void *val
)
999 DICT_NOTUSED(privdata
);
1003 static void dictListDestructor(void *privdata
, void *val
)
1005 DICT_NOTUSED(privdata
);
1006 listRelease((list
*)val
);
1009 static int sdsDictKeyCompare(void *privdata
, const void *key1
,
1013 DICT_NOTUSED(privdata
);
1015 l1
= sdslen((sds
)key1
);
1016 l2
= sdslen((sds
)key2
);
1017 if (l1
!= l2
) return 0;
1018 return memcmp(key1
, key2
, l1
) == 0;
1021 static void dictRedisObjectDestructor(void *privdata
, void *val
)
1023 DICT_NOTUSED(privdata
);
1025 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
1029 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1032 const robj
*o1
= key1
, *o2
= key2
;
1033 return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1036 static unsigned int dictObjHash(const void *key
) {
1037 const robj
*o
= key
;
1038 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1041 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1044 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1047 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1048 o2
->encoding
== REDIS_ENCODING_INT
&&
1049 o1
->ptr
== o2
->ptr
) return 1;
1051 o1
= getDecodedObject(o1
);
1052 o2
= getDecodedObject(o2
);
1053 cmp
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1059 static unsigned int dictEncObjHash(const void *key
) {
1060 robj
*o
= (robj
*) key
;
1062 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1063 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1065 if (o
->encoding
== REDIS_ENCODING_INT
) {
1069 len
= snprintf(buf
,32,"%ld",(long)o
->ptr
);
1070 return dictGenHashFunction((unsigned char*)buf
, len
);
1074 o
= getDecodedObject(o
);
1075 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1082 /* Sets type and expires */
1083 static dictType setDictType
= {
1084 dictEncObjHash
, /* hash function */
1087 dictEncObjKeyCompare
, /* key compare */
1088 dictRedisObjectDestructor
, /* key destructor */
1089 NULL
/* val destructor */
1092 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1093 static dictType zsetDictType
= {
1094 dictEncObjHash
, /* hash function */
1097 dictEncObjKeyCompare
, /* key compare */
1098 dictRedisObjectDestructor
, /* key destructor */
1099 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1103 static dictType dbDictType
= {
1104 dictObjHash
, /* hash function */
1107 dictObjKeyCompare
, /* key compare */
1108 dictRedisObjectDestructor
, /* key destructor */
1109 dictRedisObjectDestructor
/* val destructor */
1113 static dictType keyptrDictType
= {
1114 dictObjHash
, /* hash function */
1117 dictObjKeyCompare
, /* key compare */
1118 dictRedisObjectDestructor
, /* key destructor */
1119 NULL
/* val destructor */
1122 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1123 static dictType hashDictType
= {
1124 dictEncObjHash
, /* hash function */
1127 dictEncObjKeyCompare
, /* key compare */
1128 dictRedisObjectDestructor
, /* key destructor */
1129 dictRedisObjectDestructor
/* val destructor */
1132 /* Keylist hash table type has unencoded redis objects as keys and
1133 * lists as values. It's used for blocking operations (BLPOP) and to
1134 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1135 static dictType keylistDictType
= {
1136 dictObjHash
, /* hash function */
1139 dictObjKeyCompare
, /* key compare */
1140 dictRedisObjectDestructor
, /* key destructor */
1141 dictListDestructor
/* val destructor */
1144 static void version();
1146 /* ========================= Random utility functions ======================= */
1148 /* Redis generally does not try to recover from out of memory conditions
1149 * when allocating objects or strings, it is not clear if it will be possible
1150 * to report this condition to the client since the networking layer itself
1151 * is based on heap allocation for send buffers, so we simply abort.
1152 * At least the code will be simpler to read... */
1153 static void oom(const char *msg
) {
1154 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1159 /* ====================== Redis server networking stuff ===================== */
1160 static void closeTimedoutClients(void) {
1163 time_t now
= time(NULL
);
1166 listRewind(server
.clients
,&li
);
1167 while ((ln
= listNext(&li
)) != NULL
) {
1168 c
= listNodeValue(ln
);
1169 if (server
.maxidletime
&&
1170 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1171 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1172 dictSize(c
->pubsub_channels
) == 0 && /* no timeout for pubsub */
1173 listLength(c
->pubsub_patterns
) == 0 &&
1174 (now
- c
->lastinteraction
> server
.maxidletime
))
1176 redisLog(REDIS_VERBOSE
,"Closing idle client");
1178 } else if (c
->flags
& REDIS_BLOCKED
) {
1179 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1180 addReply(c
,shared
.nullmultibulk
);
1181 unblockClientWaitingData(c
);
1187 static int htNeedsResize(dict
*dict
) {
1188 long long size
, used
;
1190 size
= dictSlots(dict
);
1191 used
= dictSize(dict
);
1192 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1193 (used
*100/size
< REDIS_HT_MINFILL
));
1196 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1197 * we resize the hash table to save memory */
1198 static void tryResizeHashTables(void) {
1201 for (j
= 0; j
< server
.dbnum
; j
++) {
1202 if (htNeedsResize(server
.db
[j
].dict
)) {
1203 redisLog(REDIS_VERBOSE
,"The hash table %d is too sparse, resize it...",j
);
1204 dictResize(server
.db
[j
].dict
);
1205 redisLog(REDIS_VERBOSE
,"Hash table %d resized.",j
);
1207 if (htNeedsResize(server
.db
[j
].expires
))
1208 dictResize(server
.db
[j
].expires
);
1212 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1213 void backgroundSaveDoneHandler(int statloc
) {
1214 int exitcode
= WEXITSTATUS(statloc
);
1215 int bysignal
= WIFSIGNALED(statloc
);
1217 if (!bysignal
&& exitcode
== 0) {
1218 redisLog(REDIS_NOTICE
,
1219 "Background saving terminated with success");
1221 server
.lastsave
= time(NULL
);
1222 } else if (!bysignal
&& exitcode
!= 0) {
1223 redisLog(REDIS_WARNING
, "Background saving error");
1225 redisLog(REDIS_WARNING
,
1226 "Background saving terminated by signal %d", WTERMSIG(statloc
));
1227 rdbRemoveTempFile(server
.bgsavechildpid
);
1229 server
.bgsavechildpid
= -1;
1230 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1231 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1232 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1235 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1237 void backgroundRewriteDoneHandler(int statloc
) {
1238 int exitcode
= WEXITSTATUS(statloc
);
1239 int bysignal
= WIFSIGNALED(statloc
);
1241 if (!bysignal
&& exitcode
== 0) {
1245 redisLog(REDIS_NOTICE
,
1246 "Background append only file rewriting terminated with success");
1247 /* Now it's time to flush the differences accumulated by the parent */
1248 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1249 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1251 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1254 /* Flush our data... */
1255 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1256 (signed) sdslen(server
.bgrewritebuf
)) {
1257 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1261 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1262 /* Now our work is to rename the temp file into the stable file. And
1263 * switch the file descriptor used by the server for append only. */
1264 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1265 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1269 /* Mission completed... almost */
1270 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1271 if (server
.appendfd
!= -1) {
1272 /* If append only is actually enabled... */
1273 close(server
.appendfd
);
1274 server
.appendfd
= fd
;
1276 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1277 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1279 /* If append only is disabled we just generate a dump in this
1280 * format. Why not? */
1283 } else if (!bysignal
&& exitcode
!= 0) {
1284 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1286 redisLog(REDIS_WARNING
,
1287 "Background append only file rewriting terminated by signal %d",
1291 sdsfree(server
.bgrewritebuf
);
1292 server
.bgrewritebuf
= sdsempty();
1293 aofRemoveTempFile(server
.bgrewritechildpid
);
1294 server
.bgrewritechildpid
= -1;
1297 /* This function is called once a background process of some kind terminates,
1298 * as we want to avoid resizing the hash tables when there is a child in order
1299 * to play well with copy-on-write (otherwise when a resize happens lots of
1300 * memory pages are copied). The goal of this function is to update the ability
1301 * for dict.c to resize the hash tables accordingly to the fact we have o not
1302 * running childs. */
1303 static void updateDictResizePolicy(void) {
1304 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1)
1307 dictDisableResize();
1310 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1311 int j
, loops
= server
.cronloops
++;
1312 REDIS_NOTUSED(eventLoop
);
1314 REDIS_NOTUSED(clientData
);
1316 /* We take a cached value of the unix time in the global state because
1317 * with virtual memory and aging there is to store the current time
1318 * in objects at every object access, and accuracy is not needed.
1319 * To access a global var is faster than calling time(NULL) */
1320 server
.unixtime
= time(NULL
);
1322 /* Show some info about non-empty databases */
1323 for (j
= 0; j
< server
.dbnum
; j
++) {
1324 long long size
, used
, vkeys
;
1326 size
= dictSlots(server
.db
[j
].dict
);
1327 used
= dictSize(server
.db
[j
].dict
);
1328 vkeys
= dictSize(server
.db
[j
].expires
);
1329 if (!(loops
% 50) && (used
|| vkeys
)) {
1330 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1331 /* dictPrintStats(server.dict); */
1335 /* We don't want to resize the hash tables while a bacground saving
1336 * is in progress: the saving child is created using fork() that is
1337 * implemented with a copy-on-write semantic in most modern systems, so
1338 * if we resize the HT while there is the saving child at work actually
1339 * a lot of memory movements in the parent will cause a lot of pages
1341 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1 &&
1344 tryResizeHashTables();
1347 /* Show information about connected clients */
1348 if (!(loops
% 50)) {
1349 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use",
1350 listLength(server
.clients
)-listLength(server
.slaves
),
1351 listLength(server
.slaves
),
1352 zmalloc_used_memory());
1355 /* Close connections of timedout clients */
1356 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1357 closeTimedoutClients();
1359 /* Check if a background saving or AOF rewrite in progress terminated */
1360 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1364 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1365 if (pid
== server
.bgsavechildpid
) {
1366 backgroundSaveDoneHandler(statloc
);
1368 backgroundRewriteDoneHandler(statloc
);
1370 updateDictResizePolicy();
1373 /* If there is not a background saving in progress check if
1374 * we have to save now */
1375 time_t now
= time(NULL
);
1376 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1377 struct saveparam
*sp
= server
.saveparams
+j
;
1379 if (server
.dirty
>= sp
->changes
&&
1380 now
-server
.lastsave
> sp
->seconds
) {
1381 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1382 sp
->changes
, sp
->seconds
);
1383 rdbSaveBackground(server
.dbfilename
);
1389 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1390 * will use few CPU cycles if there are few expiring keys, otherwise
1391 * it will get more aggressive to avoid that too much memory is used by
1392 * keys that can be removed from the keyspace. */
1393 for (j
= 0; j
< server
.dbnum
; j
++) {
1395 redisDb
*db
= server
.db
+j
;
1397 /* Continue to expire if at the end of the cycle more than 25%
1398 * of the keys were expired. */
1400 long num
= dictSize(db
->expires
);
1401 time_t now
= time(NULL
);
1404 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1405 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1410 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1411 t
= (time_t) dictGetEntryVal(de
);
1413 deleteKey(db
,dictGetEntryKey(de
));
1415 server
.stat_expiredkeys
++;
1418 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1421 /* Swap a few keys on disk if we are over the memory limit and VM
1422 * is enbled. Try to free objects from the free list first. */
1423 if (vmCanSwapOut()) {
1424 while (server
.vm_enabled
&& zmalloc_used_memory() >
1425 server
.vm_max_memory
)
1429 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1430 retval
= (server
.vm_max_threads
== 0) ?
1431 vmSwapOneObjectBlocking() :
1432 vmSwapOneObjectThreaded();
1433 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1434 zmalloc_used_memory() >
1435 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1437 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1439 /* Note that when using threade I/O we free just one object,
1440 * because anyway when the I/O thread in charge to swap this
1441 * object out will finish, the handler of completed jobs
1442 * will try to swap more objects if we are still out of memory. */
1443 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1447 /* Check if we should connect to a MASTER */
1448 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1449 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1450 if (syncWithMaster() == REDIS_OK
) {
1451 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1457 /* This function gets called every time Redis is entering the
1458 * main loop of the event driven library, that is, before to sleep
1459 * for ready file descriptors. */
1460 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1461 REDIS_NOTUSED(eventLoop
);
1463 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1467 listRewind(server
.io_ready_clients
,&li
);
1468 while((ln
= listNext(&li
))) {
1469 redisClient
*c
= ln
->value
;
1470 struct redisCommand
*cmd
;
1472 /* Resume the client. */
1473 listDelNode(server
.io_ready_clients
,ln
);
1474 c
->flags
&= (~REDIS_IO_WAIT
);
1475 server
.vm_blocked_clients
--;
1476 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1477 readQueryFromClient
, c
);
1478 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1479 assert(cmd
!= NULL
);
1482 /* There may be more data to process in the input buffer. */
1483 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1484 processInputBuffer(c
);
1489 static void createSharedObjects(void) {
1492 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1493 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1494 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1495 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1496 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1497 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1498 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1499 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1500 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1501 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1502 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1503 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1504 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1505 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1506 "-ERR no such key\r\n"));
1507 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1508 "-ERR syntax error\r\n"));
1509 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1510 "-ERR source and destination objects are the same\r\n"));
1511 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1512 "-ERR index out of range\r\n"));
1513 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1514 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1515 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1516 shared
.select0
= createStringObject("select 0\r\n",10);
1517 shared
.select1
= createStringObject("select 1\r\n",10);
1518 shared
.select2
= createStringObject("select 2\r\n",10);
1519 shared
.select3
= createStringObject("select 3\r\n",10);
1520 shared
.select4
= createStringObject("select 4\r\n",10);
1521 shared
.select5
= createStringObject("select 5\r\n",10);
1522 shared
.select6
= createStringObject("select 6\r\n",10);
1523 shared
.select7
= createStringObject("select 7\r\n",10);
1524 shared
.select8
= createStringObject("select 8\r\n",10);
1525 shared
.select9
= createStringObject("select 9\r\n",10);
1526 shared
.messagebulk
= createStringObject("$7\r\nmessage\r\n",13);
1527 shared
.subscribebulk
= createStringObject("$9\r\nsubscribe\r\n",15);
1528 shared
.unsubscribebulk
= createStringObject("$11\r\nunsubscribe\r\n",18);
1529 shared
.psubscribebulk
= createStringObject("$10\r\npsubscribe\r\n",17);
1530 shared
.punsubscribebulk
= createStringObject("$12\r\npunsubscribe\r\n",19);
1531 shared
.mbulk3
= createStringObject("*3\r\n",4);
1532 for (j
= 0; j
< REDIS_SHARED_INTEGERS
; j
++) {
1533 shared
.integers
[j
] = createObject(REDIS_STRING
,(void*)(long)j
);
1534 shared
.integers
[j
]->encoding
= REDIS_ENCODING_INT
;
1538 static void appendServerSaveParams(time_t seconds
, int changes
) {
1539 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1540 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1541 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1542 server
.saveparamslen
++;
1545 static void resetServerSaveParams() {
1546 zfree(server
.saveparams
);
1547 server
.saveparams
= NULL
;
1548 server
.saveparamslen
= 0;
1551 static void initServerConfig() {
1552 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1553 server
.port
= REDIS_SERVERPORT
;
1554 server
.verbosity
= REDIS_VERBOSE
;
1555 server
.maxidletime
= REDIS_MAXIDLETIME
;
1556 server
.saveparams
= NULL
;
1557 server
.logfile
= NULL
; /* NULL = log on standard output */
1558 server
.bindaddr
= NULL
;
1559 server
.glueoutputbuf
= 1;
1560 server
.daemonize
= 0;
1561 server
.appendonly
= 0;
1562 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1563 server
.lastfsync
= time(NULL
);
1564 server
.appendfd
= -1;
1565 server
.appendseldb
= -1; /* Make sure the first time will not match */
1566 server
.pidfile
= zstrdup("/var/run/redis.pid");
1567 server
.dbfilename
= zstrdup("dump.rdb");
1568 server
.appendfilename
= zstrdup("appendonly.aof");
1569 server
.requirepass
= NULL
;
1570 server
.shareobjects
= 0;
1571 server
.rdbcompression
= 1;
1572 server
.maxclients
= 0;
1573 server
.blpop_blocked_clients
= 0;
1574 server
.maxmemory
= 0;
1575 server
.vm_enabled
= 0;
1576 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1577 server
.vm_page_size
= 256; /* 256 bytes per page */
1578 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1579 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1580 server
.vm_max_threads
= 4;
1581 server
.vm_blocked_clients
= 0;
1582 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1583 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1585 resetServerSaveParams();
1587 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1588 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1589 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1590 /* Replication related */
1592 server
.masterauth
= NULL
;
1593 server
.masterhost
= NULL
;
1594 server
.masterport
= 6379;
1595 server
.master
= NULL
;
1596 server
.replstate
= REDIS_REPL_NONE
;
1598 /* Double constants initialization */
1600 R_PosInf
= 1.0/R_Zero
;
1601 R_NegInf
= -1.0/R_Zero
;
1602 R_Nan
= R_Zero
/R_Zero
;
1605 static void initServer() {
1608 signal(SIGHUP
, SIG_IGN
);
1609 signal(SIGPIPE
, SIG_IGN
);
1610 setupSigSegvAction();
1612 server
.devnull
= fopen("/dev/null","w");
1613 if (server
.devnull
== NULL
) {
1614 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1617 server
.clients
= listCreate();
1618 server
.slaves
= listCreate();
1619 server
.monitors
= listCreate();
1620 server
.objfreelist
= listCreate();
1621 createSharedObjects();
1622 server
.el
= aeCreateEventLoop();
1623 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1624 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1625 if (server
.fd
== -1) {
1626 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1629 for (j
= 0; j
< server
.dbnum
; j
++) {
1630 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1631 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1632 server
.db
[j
].blockingkeys
= dictCreate(&keylistDictType
,NULL
);
1633 if (server
.vm_enabled
)
1634 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1635 server
.db
[j
].id
= j
;
1637 server
.pubsub_channels
= dictCreate(&keylistDictType
,NULL
);
1638 server
.pubsub_patterns
= listCreate();
1639 listSetFreeMethod(server
.pubsub_patterns
,freePubsubPattern
);
1640 listSetMatchMethod(server
.pubsub_patterns
,listMatchPubsubPattern
);
1641 server
.cronloops
= 0;
1642 server
.bgsavechildpid
= -1;
1643 server
.bgrewritechildpid
= -1;
1644 server
.bgrewritebuf
= sdsempty();
1645 server
.lastsave
= time(NULL
);
1647 server
.stat_numcommands
= 0;
1648 server
.stat_numconnections
= 0;
1649 server
.stat_expiredkeys
= 0;
1650 server
.stat_starttime
= time(NULL
);
1651 server
.unixtime
= time(NULL
);
1652 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1653 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1654 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1656 if (server
.appendonly
) {
1657 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1658 if (server
.appendfd
== -1) {
1659 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1665 if (server
.vm_enabled
) vmInit();
1668 /* Empty the whole database */
1669 static long long emptyDb() {
1671 long long removed
= 0;
1673 for (j
= 0; j
< server
.dbnum
; j
++) {
1674 removed
+= dictSize(server
.db
[j
].dict
);
1675 dictEmpty(server
.db
[j
].dict
);
1676 dictEmpty(server
.db
[j
].expires
);
1681 static int yesnotoi(char *s
) {
1682 if (!strcasecmp(s
,"yes")) return 1;
1683 else if (!strcasecmp(s
,"no")) return 0;
1687 /* I agree, this is a very rudimental way to load a configuration...
1688 will improve later if the config gets more complex */
1689 static void loadServerConfig(char *filename
) {
1691 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1694 char *errormsg
= "Fatal error, can't open config file '%s'";
1695 char *errorbuf
= zmalloc(sizeof(char)*(strlen(errormsg
)+strlen(filename
)));
1696 sprintf(errorbuf
, errormsg
, filename
);
1698 if (filename
[0] == '-' && filename
[1] == '\0')
1701 if ((fp
= fopen(filename
,"r")) == NULL
) {
1702 redisLog(REDIS_WARNING
, errorbuf
);
1707 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1713 line
= sdstrim(line
," \t\r\n");
1715 /* Skip comments and blank lines*/
1716 if (line
[0] == '#' || line
[0] == '\0') {
1721 /* Split into arguments */
1722 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1723 sdstolower(argv
[0]);
1725 /* Execute config directives */
1726 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1727 server
.maxidletime
= atoi(argv
[1]);
1728 if (server
.maxidletime
< 0) {
1729 err
= "Invalid timeout value"; goto loaderr
;
1731 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1732 server
.port
= atoi(argv
[1]);
1733 if (server
.port
< 1 || server
.port
> 65535) {
1734 err
= "Invalid port"; goto loaderr
;
1736 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1737 server
.bindaddr
= zstrdup(argv
[1]);
1738 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1739 int seconds
= atoi(argv
[1]);
1740 int changes
= atoi(argv
[2]);
1741 if (seconds
< 1 || changes
< 0) {
1742 err
= "Invalid save parameters"; goto loaderr
;
1744 appendServerSaveParams(seconds
,changes
);
1745 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1746 if (chdir(argv
[1]) == -1) {
1747 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1748 argv
[1], strerror(errno
));
1751 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1752 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1753 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1754 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1755 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1757 err
= "Invalid log level. Must be one of debug, notice, warning";
1760 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1763 server
.logfile
= zstrdup(argv
[1]);
1764 if (!strcasecmp(server
.logfile
,"stdout")) {
1765 zfree(server
.logfile
);
1766 server
.logfile
= NULL
;
1768 if (server
.logfile
) {
1769 /* Test if we are able to open the file. The server will not
1770 * be able to abort just for this problem later... */
1771 logfp
= fopen(server
.logfile
,"a");
1772 if (logfp
== NULL
) {
1773 err
= sdscatprintf(sdsempty(),
1774 "Can't open the log file: %s", strerror(errno
));
1779 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1780 server
.dbnum
= atoi(argv
[1]);
1781 if (server
.dbnum
< 1) {
1782 err
= "Invalid number of databases"; goto loaderr
;
1784 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1785 loadServerConfig(argv
[1]);
1786 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1787 server
.maxclients
= atoi(argv
[1]);
1788 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1789 server
.maxmemory
= strtoll(argv
[1], NULL
, 10);
1790 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1791 server
.masterhost
= sdsnew(argv
[1]);
1792 server
.masterport
= atoi(argv
[2]);
1793 server
.replstate
= REDIS_REPL_CONNECT
;
1794 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1795 server
.masterauth
= zstrdup(argv
[1]);
1796 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1797 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1798 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1800 } else if (!strcasecmp(argv
[0],"shareobjects") && argc
== 2) {
1801 if ((server
.shareobjects
= yesnotoi(argv
[1])) == -1) {
1802 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1804 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1805 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1806 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1808 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1809 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1810 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1812 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1813 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1814 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1816 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
1817 if (!strcasecmp(argv
[1],"no")) {
1818 server
.appendfsync
= APPENDFSYNC_NO
;
1819 } else if (!strcasecmp(argv
[1],"always")) {
1820 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1821 } else if (!strcasecmp(argv
[1],"everysec")) {
1822 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1824 err
= "argument must be 'no', 'always' or 'everysec'";
1827 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
1828 server
.requirepass
= zstrdup(argv
[1]);
1829 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
1830 zfree(server
.pidfile
);
1831 server
.pidfile
= zstrdup(argv
[1]);
1832 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
1833 zfree(server
.dbfilename
);
1834 server
.dbfilename
= zstrdup(argv
[1]);
1835 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
1836 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
1837 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1839 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
1840 zfree(server
.vm_swap_file
);
1841 server
.vm_swap_file
= zstrdup(argv
[1]);
1842 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
1843 server
.vm_max_memory
= strtoll(argv
[1], NULL
, 10);
1844 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
1845 server
.vm_page_size
= strtoll(argv
[1], NULL
, 10);
1846 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
1847 server
.vm_pages
= strtoll(argv
[1], NULL
, 10);
1848 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1849 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1850 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
1851 server
.hash_max_zipmap_entries
= strtol(argv
[1], NULL
, 10);
1852 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
1853 server
.hash_max_zipmap_value
= strtol(argv
[1], NULL
, 10);
1854 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1855 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1857 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
1859 for (j
= 0; j
< argc
; j
++)
1864 if (fp
!= stdin
) fclose(fp
);
1868 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
1869 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
1870 fprintf(stderr
, ">>> '%s'\n", line
);
1871 fprintf(stderr
, "%s\n", err
);
1875 static void freeClientArgv(redisClient
*c
) {
1878 for (j
= 0; j
< c
->argc
; j
++)
1879 decrRefCount(c
->argv
[j
]);
1880 for (j
= 0; j
< c
->mbargc
; j
++)
1881 decrRefCount(c
->mbargv
[j
]);
1886 static void freeClient(redisClient
*c
) {
1889 /* Note that if the client we are freeing is blocked into a blocking
1890 * call, we have to set querybuf to NULL *before* to call
1891 * unblockClientWaitingData() to avoid processInputBuffer() will get
1892 * called. Also it is important to remove the file events after
1893 * this, because this call adds the READABLE event. */
1894 sdsfree(c
->querybuf
);
1896 if (c
->flags
& REDIS_BLOCKED
)
1897 unblockClientWaitingData(c
);
1899 /* Unsubscribe from all the pubsub channels */
1900 pubsubUnsubscribeAllChannels(c
,0);
1901 pubsubUnsubscribeAllPatterns(c
,0);
1902 dictRelease(c
->pubsub_channels
);
1903 listRelease(c
->pubsub_patterns
);
1904 /* Obvious cleanup */
1905 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
1906 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
1907 listRelease(c
->reply
);
1910 /* Remove from the list of clients */
1911 ln
= listSearchKey(server
.clients
,c
);
1912 redisAssert(ln
!= NULL
);
1913 listDelNode(server
.clients
,ln
);
1914 /* Remove from the list of clients waiting for swapped keys */
1915 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
1916 ln
= listSearchKey(server
.io_ready_clients
,c
);
1918 listDelNode(server
.io_ready_clients
,ln
);
1919 server
.vm_blocked_clients
--;
1922 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
1923 ln
= listFirst(c
->io_keys
);
1924 dontWaitForSwappedKey(c
,ln
->value
);
1926 listRelease(c
->io_keys
);
1927 /* Master/slave cleanup */
1928 if (c
->flags
& REDIS_SLAVE
) {
1929 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
1931 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
1932 ln
= listSearchKey(l
,c
);
1933 redisAssert(ln
!= NULL
);
1936 if (c
->flags
& REDIS_MASTER
) {
1937 server
.master
= NULL
;
1938 server
.replstate
= REDIS_REPL_CONNECT
;
1940 /* Release memory */
1943 freeClientMultiState(c
);
1947 #define GLUEREPLY_UP_TO (1024)
1948 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
1950 char buf
[GLUEREPLY_UP_TO
];
1955 listRewind(c
->reply
,&li
);
1956 while((ln
= listNext(&li
))) {
1960 objlen
= sdslen(o
->ptr
);
1961 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
1962 memcpy(buf
+copylen
,o
->ptr
,objlen
);
1964 listDelNode(c
->reply
,ln
);
1966 if (copylen
== 0) return;
1970 /* Now the output buffer is empty, add the new single element */
1971 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
1972 listAddNodeHead(c
->reply
,o
);
1975 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
1976 redisClient
*c
= privdata
;
1977 int nwritten
= 0, totwritten
= 0, objlen
;
1980 REDIS_NOTUSED(mask
);
1982 /* Use writev() if we have enough buffers to send */
1983 if (!server
.glueoutputbuf
&&
1984 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
1985 !(c
->flags
& REDIS_MASTER
))
1987 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
1991 while(listLength(c
->reply
)) {
1992 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
1993 glueReplyBuffersIfNeeded(c
);
1995 o
= listNodeValue(listFirst(c
->reply
));
1996 objlen
= sdslen(o
->ptr
);
1999 listDelNode(c
->reply
,listFirst(c
->reply
));
2003 if (c
->flags
& REDIS_MASTER
) {
2004 /* Don't reply to a master */
2005 nwritten
= objlen
- c
->sentlen
;
2007 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
2008 if (nwritten
<= 0) break;
2010 c
->sentlen
+= nwritten
;
2011 totwritten
+= nwritten
;
2012 /* If we fully sent the object on head go to the next one */
2013 if (c
->sentlen
== objlen
) {
2014 listDelNode(c
->reply
,listFirst(c
->reply
));
2017 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
2018 * bytes, in a single threaded server it's a good idea to serve
2019 * other clients as well, even if a very large request comes from
2020 * super fast link that is always able to accept data (in real world
2021 * scenario think about 'KEYS *' against the loopback interfae) */
2022 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
2024 if (nwritten
== -1) {
2025 if (errno
== EAGAIN
) {
2028 redisLog(REDIS_VERBOSE
,
2029 "Error writing to client: %s", strerror(errno
));
2034 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
2035 if (listLength(c
->reply
) == 0) {
2037 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2041 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
2043 redisClient
*c
= privdata
;
2044 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
2046 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
2047 int offset
, ion
= 0;
2049 REDIS_NOTUSED(mask
);
2052 while (listLength(c
->reply
)) {
2053 offset
= c
->sentlen
;
2057 /* fill-in the iov[] array */
2058 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
2059 o
= listNodeValue(node
);
2060 objlen
= sdslen(o
->ptr
);
2062 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
2065 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2066 break; /* no more iovecs */
2068 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2069 iov
[ion
].iov_len
= objlen
- offset
;
2070 willwrite
+= objlen
- offset
;
2071 offset
= 0; /* just for the first item */
2078 /* write all collected blocks at once */
2079 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2080 if (errno
!= EAGAIN
) {
2081 redisLog(REDIS_VERBOSE
,
2082 "Error writing to client: %s", strerror(errno
));
2089 totwritten
+= nwritten
;
2090 offset
= c
->sentlen
;
2092 /* remove written robjs from c->reply */
2093 while (nwritten
&& listLength(c
->reply
)) {
2094 o
= listNodeValue(listFirst(c
->reply
));
2095 objlen
= sdslen(o
->ptr
);
2097 if(nwritten
>= objlen
- offset
) {
2098 listDelNode(c
->reply
, listFirst(c
->reply
));
2099 nwritten
-= objlen
- offset
;
2103 c
->sentlen
+= nwritten
;
2111 c
->lastinteraction
= time(NULL
);
2113 if (listLength(c
->reply
) == 0) {
2115 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2119 static struct redisCommand
*lookupCommand(char *name
) {
2121 while(cmdTable
[j
].name
!= NULL
) {
2122 if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
];
2128 /* resetClient prepare the client to process the next command */
2129 static void resetClient(redisClient
*c
) {
2135 /* Call() is the core of Redis execution of a command */
2136 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2139 dirty
= server
.dirty
;
2141 dirty
= server
.dirty
-dirty
;
2143 if (server
.appendonly
&& dirty
)
2144 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2145 if ((dirty
|| cmd
->flags
& REDIS_CMD_FORCE_REPLICATION
) &&
2146 listLength(server
.slaves
))
2147 replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
);
2148 if (listLength(server
.monitors
))
2149 replicationFeedSlaves(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
);
2150 server
.stat_numcommands
++;
2153 /* If this function gets called we already read a whole
2154 * command, argments are in the client argv/argc fields.
2155 * processCommand() execute the command or prepare the
2156 * server for a bulk read from the client.
2158 * If 1 is returned the client is still alive and valid and
2159 * and other operations can be performed by the caller. Otherwise
2160 * if 0 is returned the client was destroied (i.e. after QUIT). */
2161 static int processCommand(redisClient
*c
) {
2162 struct redisCommand
*cmd
;
2164 /* Free some memory if needed (maxmemory setting) */
2165 if (server
.maxmemory
) freeMemoryIfNeeded();
2167 /* Handle the multi bulk command type. This is an alternative protocol
2168 * supported by Redis in order to receive commands that are composed of
2169 * multiple binary-safe "bulk" arguments. The latency of processing is
2170 * a bit higher but this allows things like multi-sets, so if this
2171 * protocol is used only for MSET and similar commands this is a big win. */
2172 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2173 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2174 if (c
->multibulk
<= 0) {
2178 decrRefCount(c
->argv
[c
->argc
-1]);
2182 } else if (c
->multibulk
) {
2183 if (c
->bulklen
== -1) {
2184 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2185 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2189 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2190 decrRefCount(c
->argv
[0]);
2191 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2193 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2198 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2202 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2203 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2207 if (c
->multibulk
== 0) {
2211 /* Here we need to swap the multi-bulk argc/argv with the
2212 * normal argc/argv of the client structure. */
2214 c
->argv
= c
->mbargv
;
2215 c
->mbargv
= auxargv
;
2218 c
->argc
= c
->mbargc
;
2219 c
->mbargc
= auxargc
;
2221 /* We need to set bulklen to something different than -1
2222 * in order for the code below to process the command without
2223 * to try to read the last argument of a bulk command as
2224 * a special argument. */
2226 /* continue below and process the command */
2233 /* -- end of multi bulk commands processing -- */
2235 /* The QUIT command is handled as a special case. Normal command
2236 * procs are unable to close the client connection safely */
2237 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2242 /* Now lookup the command and check ASAP about trivial error conditions
2243 * such wrong arity, bad command name and so forth. */
2244 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2247 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2248 (char*)c
->argv
[0]->ptr
));
2251 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2252 (c
->argc
< -cmd
->arity
)) {
2254 sdscatprintf(sdsempty(),
2255 "-ERR wrong number of arguments for '%s' command\r\n",
2259 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2260 /* This is a bulk command, we have to read the last argument yet. */
2261 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2263 decrRefCount(c
->argv
[c
->argc
-1]);
2264 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2266 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2271 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2272 /* It is possible that the bulk read is already in the
2273 * buffer. Check this condition and handle it accordingly.
2274 * This is just a fast path, alternative to call processInputBuffer().
2275 * It's a good idea since the code is small and this condition
2276 * happens most of the times. */
2277 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2278 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2280 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2282 /* Otherwise return... there is to read the last argument
2283 * from the socket. */
2287 /* Let's try to encode the bulk object to save space. */
2288 if (cmd
->flags
& REDIS_CMD_BULK
)
2289 c
->argv
[c
->argc
-1] = tryObjectEncoding(c
->argv
[c
->argc
-1]);
2291 /* Check if the user is authenticated */
2292 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2293 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2298 /* Handle the maxmemory directive */
2299 if (server
.maxmemory
&& (cmd
->flags
& REDIS_CMD_DENYOOM
) &&
2300 zmalloc_used_memory() > server
.maxmemory
)
2302 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2307 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
2308 if (dictSize(c
->pubsub_channels
) > 0 &&
2309 cmd
->proc
!= subscribeCommand
&& cmd
->proc
!= unsubscribeCommand
&&
2310 cmd
->proc
!= psubscribeCommand
&& cmd
->proc
!= punsubscribeCommand
) {
2311 addReplySds(c
,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n"));
2316 /* Exec the command */
2317 if (c
->flags
& REDIS_MULTI
&& cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
) {
2318 queueMultiCommand(c
,cmd
);
2319 addReply(c
,shared
.queued
);
2321 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2322 blockClientOnSwappedKeys(cmd
,c
)) return 1;
2326 /* Prepare the client for the next command */
2331 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
) {
2336 /* We need 1+(ARGS*3) objects since commands are using the new protocol
2337 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
2338 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
2339 robj
*static_outv
[REDIS_STATIC_ARGS
*3+1];
2342 if (argc
<= REDIS_STATIC_ARGS
) {
2345 outv
= zmalloc(sizeof(robj
*)*(argc
*3+1));
2348 lenobj
= createObject(REDIS_STRING
,
2349 sdscatprintf(sdsempty(), "*%d\r\n", argc
));
2350 lenobj
->refcount
= 0;
2351 outv
[outc
++] = lenobj
;
2352 for (j
= 0; j
< argc
; j
++) {
2353 lenobj
= createObject(REDIS_STRING
,
2354 sdscatprintf(sdsempty(),"$%lu\r\n",
2355 (unsigned long) stringObjectLen(argv
[j
])));
2356 lenobj
->refcount
= 0;
2357 outv
[outc
++] = lenobj
;
2358 outv
[outc
++] = argv
[j
];
2359 outv
[outc
++] = shared
.crlf
;
2362 /* Increment all the refcounts at start and decrement at end in order to
2363 * be sure to free objects if there is no slave in a replication state
2364 * able to be feed with commands */
2365 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2366 listRewind(slaves
,&li
);
2367 while((ln
= listNext(&li
))) {
2368 redisClient
*slave
= ln
->value
;
2370 /* Don't feed slaves that are still waiting for BGSAVE to start */
2371 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2373 /* Feed all the other slaves, MONITORs and so on */
2374 if (slave
->slaveseldb
!= dictid
) {
2378 case 0: selectcmd
= shared
.select0
; break;
2379 case 1: selectcmd
= shared
.select1
; break;
2380 case 2: selectcmd
= shared
.select2
; break;
2381 case 3: selectcmd
= shared
.select3
; break;
2382 case 4: selectcmd
= shared
.select4
; break;
2383 case 5: selectcmd
= shared
.select5
; break;
2384 case 6: selectcmd
= shared
.select6
; break;
2385 case 7: selectcmd
= shared
.select7
; break;
2386 case 8: selectcmd
= shared
.select8
; break;
2387 case 9: selectcmd
= shared
.select9
; break;
2389 selectcmd
= createObject(REDIS_STRING
,
2390 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2391 selectcmd
->refcount
= 0;
2394 addReply(slave
,selectcmd
);
2395 slave
->slaveseldb
= dictid
;
2397 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2399 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2400 if (outv
!= static_outv
) zfree(outv
);
2403 static void processInputBuffer(redisClient
*c
) {
2405 /* Before to process the input buffer, make sure the client is not
2406 * waitig for a blocking operation such as BLPOP. Note that the first
2407 * iteration the client is never blocked, otherwise the processInputBuffer
2408 * would not be called at all, but after the execution of the first commands
2409 * in the input buffer the client may be blocked, and the "goto again"
2410 * will try to reiterate. The following line will make it return asap. */
2411 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2412 if (c
->bulklen
== -1) {
2413 /* Read the first line of the query */
2414 char *p
= strchr(c
->querybuf
,'\n');
2421 query
= c
->querybuf
;
2422 c
->querybuf
= sdsempty();
2423 querylen
= 1+(p
-(query
));
2424 if (sdslen(query
) > querylen
) {
2425 /* leave data after the first line of the query in the buffer */
2426 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2428 *p
= '\0'; /* remove "\n" */
2429 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2430 sdsupdatelen(query
);
2432 /* Now we can split the query in arguments */
2433 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2436 if (c
->argv
) zfree(c
->argv
);
2437 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2439 for (j
= 0; j
< argc
; j
++) {
2440 if (sdslen(argv
[j
])) {
2441 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2449 /* Execute the command. If the client is still valid
2450 * after processCommand() return and there is something
2451 * on the query buffer try to process the next command. */
2452 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2454 /* Nothing to process, argc == 0. Just process the query
2455 * buffer if it's not empty or return to the caller */
2456 if (sdslen(c
->querybuf
)) goto again
;
2459 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2460 redisLog(REDIS_VERBOSE
, "Client protocol error");
2465 /* Bulk read handling. Note that if we are at this point
2466 the client already sent a command terminated with a newline,
2467 we are reading the bulk data that is actually the last
2468 argument of the command. */
2469 int qbl
= sdslen(c
->querybuf
);
2471 if (c
->bulklen
<= qbl
) {
2472 /* Copy everything but the final CRLF as final argument */
2473 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2475 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2476 /* Process the command. If the client is still valid after
2477 * the processing and there is more data in the buffer
2478 * try to parse it. */
2479 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2485 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2486 redisClient
*c
= (redisClient
*) privdata
;
2487 char buf
[REDIS_IOBUF_LEN
];
2490 REDIS_NOTUSED(mask
);
2492 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2494 if (errno
== EAGAIN
) {
2497 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2501 } else if (nread
== 0) {
2502 redisLog(REDIS_VERBOSE
, "Client closed connection");
2507 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2508 c
->lastinteraction
= time(NULL
);
2512 processInputBuffer(c
);
2515 static int selectDb(redisClient
*c
, int id
) {
2516 if (id
< 0 || id
>= server
.dbnum
)
2518 c
->db
= &server
.db
[id
];
2522 static void *dupClientReplyValue(void *o
) {
2523 incrRefCount((robj
*)o
);
2527 static int listMatchObjects(void *a
, void *b
) {
2528 return compareStringObjects(a
,b
) == 0;
2531 static redisClient
*createClient(int fd
) {
2532 redisClient
*c
= zmalloc(sizeof(*c
));
2534 anetNonBlock(NULL
,fd
);
2535 anetTcpNoDelay(NULL
,fd
);
2536 if (!c
) return NULL
;
2539 c
->querybuf
= sdsempty();
2548 c
->lastinteraction
= time(NULL
);
2549 c
->authenticated
= 0;
2550 c
->replstate
= REDIS_REPL_NONE
;
2551 c
->reply
= listCreate();
2552 listSetFreeMethod(c
->reply
,decrRefCount
);
2553 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2554 c
->blockingkeys
= NULL
;
2555 c
->blockingkeysnum
= 0;
2556 c
->io_keys
= listCreate();
2557 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2558 c
->pubsub_channels
= dictCreate(&setDictType
,NULL
);
2559 c
->pubsub_patterns
= listCreate();
2560 listSetFreeMethod(c
->pubsub_patterns
,decrRefCount
);
2561 listSetMatchMethod(c
->pubsub_patterns
,listMatchObjects
);
2562 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2563 readQueryFromClient
, c
) == AE_ERR
) {
2567 listAddNodeTail(server
.clients
,c
);
2568 initClientMultiState(c
);
2572 static void addReply(redisClient
*c
, robj
*obj
) {
2573 if (listLength(c
->reply
) == 0 &&
2574 (c
->replstate
== REDIS_REPL_NONE
||
2575 c
->replstate
== REDIS_REPL_ONLINE
) &&
2576 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2577 sendReplyToClient
, c
) == AE_ERR
) return;
2579 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2580 obj
= dupStringObject(obj
);
2581 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2583 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2586 static void addReplySds(redisClient
*c
, sds s
) {
2587 robj
*o
= createObject(REDIS_STRING
,s
);
2592 static void addReplyDouble(redisClient
*c
, double d
) {
2595 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2596 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2597 (unsigned long) strlen(buf
),buf
));
2600 static void addReplyLong(redisClient
*c
, long l
) {
2605 addReply(c
,shared
.czero
);
2607 } else if (l
== 1) {
2608 addReply(c
,shared
.cone
);
2611 len
= snprintf(buf
,sizeof(buf
),":%ld\r\n",l
);
2612 addReplySds(c
,sdsnewlen(buf
,len
));
2615 static void addReplyLongLong(redisClient
*c
, long long ll
) {
2620 addReply(c
,shared
.czero
);
2622 } else if (ll
== 1) {
2623 addReply(c
,shared
.cone
);
2626 len
= snprintf(buf
,sizeof(buf
),":%lld\r\n",ll
);
2627 addReplySds(c
,sdsnewlen(buf
,len
));
2630 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2635 addReply(c
,shared
.czero
);
2637 } else if (ul
== 1) {
2638 addReply(c
,shared
.cone
);
2641 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2642 addReplySds(c
,sdsnewlen(buf
,len
));
2645 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2648 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2649 len
= sdslen(obj
->ptr
);
2651 long n
= (long)obj
->ptr
;
2653 /* Compute how many bytes will take this integer as a radix 10 string */
2659 while((n
= n
/10) != 0) {
2663 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",(unsigned long)len
));
2666 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2667 addReplyBulkLen(c
,obj
);
2669 addReply(c
,shared
.crlf
);
2672 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2673 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2675 addReply(c
,shared
.nullbulk
);
2677 robj
*o
= createStringObject(s
,strlen(s
));
2683 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2688 REDIS_NOTUSED(mask
);
2689 REDIS_NOTUSED(privdata
);
2691 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2692 if (cfd
== AE_ERR
) {
2693 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2696 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2697 if ((c
= createClient(cfd
)) == NULL
) {
2698 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2699 close(cfd
); /* May be already closed, just ingore errors */
2702 /* If maxclient directive is set and this is one client more... close the
2703 * connection. Note that we create the client instead to check before
2704 * for this condition, since now the socket is already set in nonblocking
2705 * mode and we can send an error for free using the Kernel I/O */
2706 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2707 char *err
= "-ERR max number of clients reached\r\n";
2709 /* That's a best effort error message, don't check write errors */
2710 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2711 /* Nothing to do, Just to avoid the warning... */
2716 server
.stat_numconnections
++;
2719 /* ======================= Redis objects implementation ===================== */
2721 static robj
*createObject(int type
, void *ptr
) {
2724 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2725 if (listLength(server
.objfreelist
)) {
2726 listNode
*head
= listFirst(server
.objfreelist
);
2727 o
= listNodeValue(head
);
2728 listDelNode(server
.objfreelist
,head
);
2729 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2731 if (server
.vm_enabled
) {
2732 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2733 o
= zmalloc(sizeof(*o
));
2735 o
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
));
2739 o
->encoding
= REDIS_ENCODING_RAW
;
2742 if (server
.vm_enabled
) {
2743 /* Note that this code may run in the context of an I/O thread
2744 * and accessing to server.unixtime in theory is an error
2745 * (no locks). But in practice this is safe, and even if we read
2746 * garbage Redis will not fail, as it's just a statistical info */
2747 o
->vm
.atime
= server
.unixtime
;
2748 o
->storage
= REDIS_VM_MEMORY
;
2753 static robj
*createStringObject(char *ptr
, size_t len
) {
2754 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
2757 static robj
*dupStringObject(robj
*o
) {
2758 assert(o
->encoding
== REDIS_ENCODING_RAW
);
2759 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
2762 static robj
*createListObject(void) {
2763 list
*l
= listCreate();
2765 listSetFreeMethod(l
,decrRefCount
);
2766 return createObject(REDIS_LIST
,l
);
2769 static robj
*createSetObject(void) {
2770 dict
*d
= dictCreate(&setDictType
,NULL
);
2771 return createObject(REDIS_SET
,d
);
2774 static robj
*createHashObject(void) {
2775 /* All the Hashes start as zipmaps. Will be automatically converted
2776 * into hash tables if there are enough elements or big elements
2778 unsigned char *zm
= zipmapNew();
2779 robj
*o
= createObject(REDIS_HASH
,zm
);
2780 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
2784 static robj
*createZsetObject(void) {
2785 zset
*zs
= zmalloc(sizeof(*zs
));
2787 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
2788 zs
->zsl
= zslCreate();
2789 return createObject(REDIS_ZSET
,zs
);
2792 static void freeStringObject(robj
*o
) {
2793 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2798 static void freeListObject(robj
*o
) {
2799 listRelease((list
*) o
->ptr
);
2802 static void freeSetObject(robj
*o
) {
2803 dictRelease((dict
*) o
->ptr
);
2806 static void freeZsetObject(robj
*o
) {
2809 dictRelease(zs
->dict
);
2814 static void freeHashObject(robj
*o
) {
2815 switch (o
->encoding
) {
2816 case REDIS_ENCODING_HT
:
2817 dictRelease((dict
*) o
->ptr
);
2819 case REDIS_ENCODING_ZIPMAP
:
2828 static void incrRefCount(robj
*o
) {
2832 static void decrRefCount(void *obj
) {
2835 /* Object is a key of a swapped out value, or in the process of being
2837 if (server
.vm_enabled
&&
2838 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
2840 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
);
2841 redisAssert(o
->type
== REDIS_STRING
);
2842 freeStringObject(o
);
2843 vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
);
2844 pthread_mutex_lock(&server
.obj_freelist_mutex
);
2845 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2846 !listAddNodeHead(server
.objfreelist
,o
))
2848 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2849 server
.vm_stats_swapped_objects
--;
2852 /* Object is in memory, or in the process of being swapped out. */
2853 if (--(o
->refcount
) == 0) {
2854 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
2855 vmCancelThreadedIOJob(obj
);
2857 case REDIS_STRING
: freeStringObject(o
); break;
2858 case REDIS_LIST
: freeListObject(o
); break;
2859 case REDIS_SET
: freeSetObject(o
); break;
2860 case REDIS_ZSET
: freeZsetObject(o
); break;
2861 case REDIS_HASH
: freeHashObject(o
); break;
2862 default: redisAssert(0); break;
2864 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2865 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2866 !listAddNodeHead(server
.objfreelist
,o
))
2868 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2872 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
2873 dictEntry
*de
= dictFind(db
->dict
,key
);
2875 robj
*key
= dictGetEntryKey(de
);
2876 robj
*val
= dictGetEntryVal(de
);
2878 if (server
.vm_enabled
) {
2879 if (key
->storage
== REDIS_VM_MEMORY
||
2880 key
->storage
== REDIS_VM_SWAPPING
)
2882 /* If we were swapping the object out, stop it, this key
2884 if (key
->storage
== REDIS_VM_SWAPPING
)
2885 vmCancelThreadedIOJob(key
);
2886 /* Update the access time of the key for the aging algorithm. */
2887 key
->vm
.atime
= server
.unixtime
;
2889 int notify
= (key
->storage
== REDIS_VM_LOADING
);
2891 /* Our value was swapped on disk. Bring it at home. */
2892 redisAssert(val
== NULL
);
2893 val
= vmLoadObject(key
);
2894 dictGetEntryVal(de
) = val
;
2896 /* Clients blocked by the VM subsystem may be waiting for
2898 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
2907 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
2908 expireIfNeeded(db
,key
);
2909 return lookupKey(db
,key
);
2912 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
2913 deleteIfVolatile(db
,key
);
2914 return lookupKey(db
,key
);
2917 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
2918 robj
*o
= lookupKeyRead(c
->db
, key
);
2919 if (!o
) addReply(c
,reply
);
2923 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
2924 robj
*o
= lookupKeyWrite(c
->db
, key
);
2925 if (!o
) addReply(c
,reply
);
2929 static int checkType(redisClient
*c
, robj
*o
, int type
) {
2930 if (o
->type
!= type
) {
2931 addReply(c
,shared
.wrongtypeerr
);
2937 static int deleteKey(redisDb
*db
, robj
*key
) {
2940 /* We need to protect key from destruction: after the first dictDelete()
2941 * it may happen that 'key' is no longer valid if we don't increment
2942 * it's count. This may happen when we get the object reference directly
2943 * from the hash table with dictRandomKey() or dict iterators */
2945 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
);
2946 retval
= dictDelete(db
->dict
,key
);
2949 return retval
== DICT_OK
;
2952 /* Check if the nul-terminated string 's' can be represented by a long
2953 * (that is, is a number that fits into long without any other space or
2954 * character before or after the digits).
2956 * If so, the function returns REDIS_OK and *longval is set to the value
2957 * of the number. Otherwise REDIS_ERR is returned */
2958 static int isStringRepresentableAsLong(sds s
, long *longval
) {
2959 char buf
[32], *endptr
;
2963 value
= strtol(s
, &endptr
, 10);
2964 if (endptr
[0] != '\0') return REDIS_ERR
;
2965 slen
= snprintf(buf
,32,"%ld",value
);
2967 /* If the number converted back into a string is not identical
2968 * then it's not possible to encode the string as integer */
2969 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
2970 if (longval
) *longval
= value
;
2974 /* Try to encode a string object in order to save space */
2975 static robj
*tryObjectEncoding(robj
*o
) {
2979 if (o
->encoding
!= REDIS_ENCODING_RAW
)
2980 return o
; /* Already encoded */
2982 /* It's not safe to encode shared objects: shared objects can be shared
2983 * everywhere in the "object space" of Redis. Encoded objects can only
2984 * appear as "values" (and not, for instance, as keys) */
2985 if (o
->refcount
> 1) return o
;
2987 /* Currently we try to encode only strings */
2988 redisAssert(o
->type
== REDIS_STRING
);
2990 /* Check if we can represent this string as a long integer */
2991 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return o
;
2993 /* Ok, this object can be encoded */
2994 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
2996 incrRefCount(shared
.integers
[value
]);
2997 return shared
.integers
[value
];
2999 o
->encoding
= REDIS_ENCODING_INT
;
3001 o
->ptr
= (void*) value
;
3006 /* Get a decoded version of an encoded object (returned as a new object).
3007 * If the object is already raw-encoded just increment the ref count. */
3008 static robj
*getDecodedObject(robj
*o
) {
3011 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3015 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
3018 snprintf(buf
,32,"%ld",(long)o
->ptr
);
3019 dec
= createStringObject(buf
,strlen(buf
));
3022 redisAssert(1 != 1);
3026 /* Compare two string objects via strcmp() or alike.
3027 * Note that the objects may be integer-encoded. In such a case we
3028 * use snprintf() to get a string representation of the numbers on the stack
3029 * and compare the strings, it's much faster than calling getDecodedObject().
3031 * Important note: if objects are not integer encoded, but binary-safe strings,
3032 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
3034 static int compareStringObjects(robj
*a
, robj
*b
) {
3035 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
3036 char bufa
[128], bufb
[128], *astr
, *bstr
;
3039 if (a
== b
) return 0;
3040 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
3041 snprintf(bufa
,sizeof(bufa
),"%ld",(long) a
->ptr
);
3047 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
3048 snprintf(bufb
,sizeof(bufb
),"%ld",(long) b
->ptr
);
3054 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
3057 static size_t stringObjectLen(robj
*o
) {
3058 redisAssert(o
->type
== REDIS_STRING
);
3059 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3060 return sdslen(o
->ptr
);
3064 return snprintf(buf
,32,"%ld",(long)o
->ptr
);
3068 static int getDoubleFromObject(redisClient
*c
, robj
*o
, double *value
) {
3072 if (o
&& o
->type
!= REDIS_STRING
) {
3073 addReplySds(c
,sdsnew("-ERR value is not a double\r\n"));
3079 else if (o
->encoding
== REDIS_ENCODING_RAW
)
3080 parsedValue
= strtod(o
->ptr
, &eptr
);
3081 else if (o
->encoding
== REDIS_ENCODING_INT
)
3082 parsedValue
= (long)o
->ptr
;
3084 redisAssert(1 != 1);
3086 if (eptr
!= NULL
&& *eptr
!= '\0') {
3087 addReplySds(c
,sdsnew("-ERR value is not a double\r\n"));
3091 *value
= parsedValue
;
3096 static int getLongLongFromObject(redisClient
*c
, robj
*o
, long long *value
) {
3097 long long parsedValue
;
3100 if (o
&& o
->type
!= REDIS_STRING
) {
3101 addReplySds(c
,sdsnew("-ERR value is not an integer\r\n"));
3107 else if (o
->encoding
== REDIS_ENCODING_RAW
)
3108 parsedValue
= strtoll(o
->ptr
, &eptr
, 10);
3109 else if (o
->encoding
== REDIS_ENCODING_INT
)
3110 parsedValue
= (long)o
->ptr
;
3112 redisAssert(1 != 1);
3114 if (eptr
!= NULL
&& *eptr
!= '\0') {
3115 addReplySds(c
,sdsnew("-ERR value is not an integer\r\n"));
3119 *value
= parsedValue
;
3124 static int getLongFromObject(redisClient
*c
, robj
*o
, long *value
) {
3125 long long actualValue
;
3127 if (getLongLongFromObject(c
, o
, &actualValue
) != REDIS_OK
) return REDIS_ERR
;
3129 if (actualValue
< LONG_MIN
|| actualValue
> LONG_MAX
) {
3130 addReplySds(c
,sdsnew("-ERR value is out of range\r\n"));
3134 *value
= actualValue
;
3139 /*============================ RDB saving/loading =========================== */
3141 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3142 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3146 static int rdbSaveTime(FILE *fp
, time_t t
) {
3147 int32_t t32
= (int32_t) t
;
3148 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3152 /* check rdbLoadLen() comments for more info */
3153 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3154 unsigned char buf
[2];
3157 /* Save a 6 bit len */
3158 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3159 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3160 } else if (len
< (1<<14)) {
3161 /* Save a 14 bit len */
3162 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3164 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3166 /* Save a 32 bit len */
3167 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3168 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3170 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3175 /* String objects in the form "2391" "-100" without any space and with a
3176 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3177 * encoded as integers to save space */
3178 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3180 char *endptr
, buf
[32];
3182 /* Check if it's possible to encode this value as a number */
3183 value
= strtoll(s
, &endptr
, 10);
3184 if (endptr
[0] != '\0') return 0;
3185 snprintf(buf
,32,"%lld",value
);
3187 /* If the number converted back into a string is not identical
3188 * then it's not possible to encode the string as integer */
3189 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3191 /* Finally check if it fits in our ranges */
3192 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3193 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3194 enc
[1] = value
&0xFF;
3196 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3197 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3198 enc
[1] = value
&0xFF;
3199 enc
[2] = (value
>>8)&0xFF;
3201 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3202 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3203 enc
[1] = value
&0xFF;
3204 enc
[2] = (value
>>8)&0xFF;
3205 enc
[3] = (value
>>16)&0xFF;
3206 enc
[4] = (value
>>24)&0xFF;
3213 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3214 size_t comprlen
, outlen
;
3218 /* We require at least four bytes compression for this to be worth it */
3219 if (len
<= 4) return 0;
3221 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3222 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3223 if (comprlen
== 0) {
3227 /* Data compressed! Let's save it on disk */
3228 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3229 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3230 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3231 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3232 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3241 /* Save a string objet as [len][data] on disk. If the object is a string
3242 * representation of an integer value we try to safe it in a special form */
3243 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3246 /* Try integer encoding */
3248 unsigned char buf
[5];
3249 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3250 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3255 /* Try LZF compression - under 20 bytes it's unable to compress even
3256 * aaaaaaaaaaaaaaaaaa so skip it */
3257 if (server
.rdbcompression
&& len
> 20) {
3260 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3261 if (retval
== -1) return -1;
3262 if (retval
> 0) return 0;
3263 /* retval == 0 means data can't be compressed, save the old way */
3266 /* Store verbatim */
3267 if (rdbSaveLen(fp
,len
) == -1) return -1;
3268 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3272 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3273 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3276 /* Avoid incr/decr ref count business when possible.
3277 * This plays well with copy-on-write given that we are probably
3278 * in a child process (BGSAVE). Also this makes sure key objects
3279 * of swapped objects are not incRefCount-ed (an assert does not allow
3280 * this in order to avoid bugs) */
3281 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
3282 obj
= getDecodedObject(obj
);
3283 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3286 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3291 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3292 * 8 bit integer specifing the length of the representation.
3293 * This 8 bit integer has special values in order to specify the following
3299 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3300 unsigned char buf
[128];
3306 } else if (!isfinite(val
)) {
3308 buf
[0] = (val
< 0) ? 255 : 254;
3310 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3311 buf
[0] = strlen((char*)buf
+1);
3314 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3318 /* Save a Redis object. */
3319 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3320 if (o
->type
== REDIS_STRING
) {
3321 /* Save a string value */
3322 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3323 } else if (o
->type
== REDIS_LIST
) {
3324 /* Save a list value */
3325 list
*list
= o
->ptr
;
3329 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3330 listRewind(list
,&li
);
3331 while((ln
= listNext(&li
))) {
3332 robj
*eleobj
= listNodeValue(ln
);
3334 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3336 } else if (o
->type
== REDIS_SET
) {
3337 /* Save a set value */
3339 dictIterator
*di
= dictGetIterator(set
);
3342 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3343 while((de
= dictNext(di
)) != NULL
) {
3344 robj
*eleobj
= dictGetEntryKey(de
);
3346 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3348 dictReleaseIterator(di
);
3349 } else if (o
->type
== REDIS_ZSET
) {
3350 /* Save a set value */
3352 dictIterator
*di
= dictGetIterator(zs
->dict
);
3355 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3356 while((de
= dictNext(di
)) != NULL
) {
3357 robj
*eleobj
= dictGetEntryKey(de
);
3358 double *score
= dictGetEntryVal(de
);
3360 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3361 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3363 dictReleaseIterator(di
);
3364 } else if (o
->type
== REDIS_HASH
) {
3365 /* Save a hash value */
3366 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3367 unsigned char *p
= zipmapRewind(o
->ptr
);
3368 unsigned int count
= zipmapLen(o
->ptr
);
3369 unsigned char *key
, *val
;
3370 unsigned int klen
, vlen
;
3372 if (rdbSaveLen(fp
,count
) == -1) return -1;
3373 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3374 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3375 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3378 dictIterator
*di
= dictGetIterator(o
->ptr
);
3381 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3382 while((de
= dictNext(di
)) != NULL
) {
3383 robj
*key
= dictGetEntryKey(de
);
3384 robj
*val
= dictGetEntryVal(de
);
3386 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3387 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3389 dictReleaseIterator(di
);
3397 /* Return the length the object will have on disk if saved with
3398 * the rdbSaveObject() function. Currently we use a trick to get
3399 * this length with very little changes to the code. In the future
3400 * we could switch to a faster solution. */
3401 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3402 if (fp
== NULL
) fp
= server
.devnull
;
3404 assert(rdbSaveObject(fp
,o
) != 1);
3408 /* Return the number of pages required to save this object in the swap file */
3409 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3410 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3412 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3415 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3416 static int rdbSave(char *filename
) {
3417 dictIterator
*di
= NULL
;
3422 time_t now
= time(NULL
);
3424 /* Wait for I/O therads to terminate, just in case this is a
3425 * foreground-saving, to avoid seeking the swap file descriptor at the
3427 if (server
.vm_enabled
)
3428 waitEmptyIOJobsQueue();
3430 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3431 fp
= fopen(tmpfile
,"w");
3433 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3436 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3437 for (j
= 0; j
< server
.dbnum
; j
++) {
3438 redisDb
*db
= server
.db
+j
;
3440 if (dictSize(d
) == 0) continue;
3441 di
= dictGetIterator(d
);
3447 /* Write the SELECT DB opcode */
3448 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3449 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3451 /* Iterate this DB writing every entry */
3452 while((de
= dictNext(di
)) != NULL
) {
3453 robj
*key
= dictGetEntryKey(de
);
3454 robj
*o
= dictGetEntryVal(de
);
3455 time_t expiretime
= getExpire(db
,key
);
3457 /* Save the expire time */
3458 if (expiretime
!= -1) {
3459 /* If this key is already expired skip it */
3460 if (expiretime
< now
) continue;
3461 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3462 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3464 /* Save the key and associated value. This requires special
3465 * handling if the value is swapped out. */
3466 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
3467 key
->storage
== REDIS_VM_SWAPPING
) {
3468 /* Save type, key, value */
3469 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3470 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3471 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3473 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3475 /* Get a preview of the object in memory */
3476 po
= vmPreviewObject(key
);
3477 /* Save type, key, value */
3478 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
;
3479 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3480 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3481 /* Remove the loaded object from memory */
3485 dictReleaseIterator(di
);
3488 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3490 /* Make sure data will not remain on the OS's output buffers */
3495 /* Use RENAME to make sure the DB file is changed atomically only
3496 * if the generate DB file is ok. */
3497 if (rename(tmpfile
,filename
) == -1) {
3498 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3502 redisLog(REDIS_NOTICE
,"DB saved on disk");
3504 server
.lastsave
= time(NULL
);
3510 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3511 if (di
) dictReleaseIterator(di
);
3515 static int rdbSaveBackground(char *filename
) {
3518 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3519 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3520 if ((childpid
= fork()) == 0) {
3522 if (server
.vm_enabled
) vmReopenSwapFile();
3524 if (rdbSave(filename
) == REDIS_OK
) {
3531 if (childpid
== -1) {
3532 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3536 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3537 server
.bgsavechildpid
= childpid
;
3538 updateDictResizePolicy();
3541 return REDIS_OK
; /* unreached */
3544 static void rdbRemoveTempFile(pid_t childpid
) {
3547 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
3551 static int rdbLoadType(FILE *fp
) {
3553 if (fread(&type
,1,1,fp
) == 0) return -1;
3557 static time_t rdbLoadTime(FILE *fp
) {
3559 if (fread(&t32
,4,1,fp
) == 0) return -1;
3560 return (time_t) t32
;
3563 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3564 * of this file for a description of how this are stored on disk.
3566 * isencoded is set to 1 if the readed length is not actually a length but
3567 * an "encoding type", check the above comments for more info */
3568 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
3569 unsigned char buf
[2];
3573 if (isencoded
) *isencoded
= 0;
3574 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3575 type
= (buf
[0]&0xC0)>>6;
3576 if (type
== REDIS_RDB_6BITLEN
) {
3577 /* Read a 6 bit len */
3579 } else if (type
== REDIS_RDB_ENCVAL
) {
3580 /* Read a 6 bit len encoding type */
3581 if (isencoded
) *isencoded
= 1;
3583 } else if (type
== REDIS_RDB_14BITLEN
) {
3584 /* Read a 14 bit len */
3585 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3586 return ((buf
[0]&0x3F)<<8)|buf
[1];
3588 /* Read a 32 bit len */
3589 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
3594 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
) {
3595 unsigned char enc
[4];
3598 if (enctype
== REDIS_RDB_ENC_INT8
) {
3599 if (fread(enc
,1,1,fp
) == 0) return NULL
;
3600 val
= (signed char)enc
[0];
3601 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
3603 if (fread(enc
,2,1,fp
) == 0) return NULL
;
3604 v
= enc
[0]|(enc
[1]<<8);
3606 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
3608 if (fread(enc
,4,1,fp
) == 0) return NULL
;
3609 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
3612 val
= 0; /* anti-warning */
3615 return createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",val
));
3618 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
3619 unsigned int len
, clen
;
3620 unsigned char *c
= NULL
;
3623 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3624 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3625 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
3626 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
3627 if (fread(c
,clen
,1,fp
) == 0) goto err
;
3628 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
3630 return createObject(REDIS_STRING
,val
);
3637 static robj
*rdbLoadStringObject(FILE*fp
) {
3642 len
= rdbLoadLen(fp
,&isencoded
);
3645 case REDIS_RDB_ENC_INT8
:
3646 case REDIS_RDB_ENC_INT16
:
3647 case REDIS_RDB_ENC_INT32
:
3648 return rdbLoadIntegerObject(fp
,len
);
3649 case REDIS_RDB_ENC_LZF
:
3650 return rdbLoadLzfStringObject(fp
);
3656 if (len
== REDIS_RDB_LENERR
) return NULL
;
3657 val
= sdsnewlen(NULL
,len
);
3658 if (len
&& fread(val
,len
,1,fp
) == 0) {
3662 return createObject(REDIS_STRING
,val
);
3665 /* For information about double serialization check rdbSaveDoubleValue() */
3666 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
3670 if (fread(&len
,1,1,fp
) == 0) return -1;
3672 case 255: *val
= R_NegInf
; return 0;
3673 case 254: *val
= R_PosInf
; return 0;
3674 case 253: *val
= R_Nan
; return 0;
3676 if (fread(buf
,len
,1,fp
) == 0) return -1;
3678 sscanf(buf
, "%lg", val
);
3683 /* Load a Redis object of the specified type from the specified file.
3684 * On success a newly allocated object is returned, otherwise NULL. */
3685 static robj
*rdbLoadObject(int type
, FILE *fp
) {
3688 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
3689 if (type
== REDIS_STRING
) {
3690 /* Read string value */
3691 if ((o
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3692 o
= tryObjectEncoding(o
);
3693 } else if (type
== REDIS_LIST
|| type
== REDIS_SET
) {
3694 /* Read list/set value */
3697 if ((listlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3698 o
= (type
== REDIS_LIST
) ? createListObject() : createSetObject();
3699 /* It's faster to expand the dict to the right size asap in order
3700 * to avoid rehashing */
3701 if (type
== REDIS_SET
&& listlen
> DICT_HT_INITIAL_SIZE
)
3702 dictExpand(o
->ptr
,listlen
);
3703 /* Load every single element of the list/set */
3707 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3708 ele
= tryObjectEncoding(ele
);
3709 if (type
== REDIS_LIST
) {
3710 listAddNodeTail((list
*)o
->ptr
,ele
);
3712 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
3715 } else if (type
== REDIS_ZSET
) {
3716 /* Read list/set value */
3720 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3721 o
= createZsetObject();
3723 /* Load every single element of the list/set */
3726 double *score
= zmalloc(sizeof(double));
3728 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3729 ele
= tryObjectEncoding(ele
);
3730 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
3731 dictAdd(zs
->dict
,ele
,score
);
3732 zslInsert(zs
->zsl
,*score
,ele
);
3733 incrRefCount(ele
); /* added to skiplist */
3735 } else if (type
== REDIS_HASH
) {
3738 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3739 o
= createHashObject();
3740 /* Too many entries? Use an hash table. */
3741 if (hashlen
> server
.hash_max_zipmap_entries
)
3742 convertToRealHash(o
);
3743 /* Load every key/value, then set it into the zipmap or hash
3744 * table, as needed. */
3748 if ((key
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3749 if ((val
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3750 /* If we are using a zipmap and there are too big values
3751 * the object is converted to real hash table encoding. */
3752 if (o
->encoding
!= REDIS_ENCODING_HT
&&
3753 (sdslen(key
->ptr
) > server
.hash_max_zipmap_value
||
3754 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
))
3756 convertToRealHash(o
);
3759 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3760 unsigned char *zm
= o
->ptr
;
3762 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
3763 val
->ptr
,sdslen(val
->ptr
),NULL
);
3768 key
= tryObjectEncoding(key
);
3769 val
= tryObjectEncoding(val
);
3770 dictAdd((dict
*)o
->ptr
,key
,val
);
3779 static int rdbLoad(char *filename
) {
3781 robj
*keyobj
= NULL
;
3783 int type
, retval
, rdbver
;
3784 dict
*d
= server
.db
[0].dict
;
3785 redisDb
*db
= server
.db
+0;
3787 time_t expiretime
= -1, now
= time(NULL
);
3788 long long loadedkeys
= 0;
3790 fp
= fopen(filename
,"r");
3791 if (!fp
) return REDIS_ERR
;
3792 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
3794 if (memcmp(buf
,"REDIS",5) != 0) {
3796 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
3799 rdbver
= atoi(buf
+5);
3802 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
3809 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3810 if (type
== REDIS_EXPIRETIME
) {
3811 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
3812 /* We read the time so we need to read the object type again */
3813 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3815 if (type
== REDIS_EOF
) break;
3816 /* Handle SELECT DB opcode as a special case */
3817 if (type
== REDIS_SELECTDB
) {
3818 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
3820 if (dbid
>= (unsigned)server
.dbnum
) {
3821 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
3824 db
= server
.db
+dbid
;
3829 if ((keyobj
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
3831 if ((o
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
3832 /* Add the new object in the hash table */
3833 retval
= dictAdd(d
,keyobj
,o
);
3834 if (retval
== DICT_ERR
) {
3835 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj
->ptr
);
3838 /* Set the expire time if needed */
3839 if (expiretime
!= -1) {
3840 setExpire(db
,keyobj
,expiretime
);
3841 /* Delete this key if already expired */
3842 if (expiretime
< now
) deleteKey(db
,keyobj
);
3846 /* Handle swapping while loading big datasets when VM is on */
3848 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
3849 while (zmalloc_used_memory() > server
.vm_max_memory
) {
3850 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
3857 eoferr
: /* unexpected end of file is handled here with a fatal exit */
3858 if (keyobj
) decrRefCount(keyobj
);
3859 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
3861 return REDIS_ERR
; /* Just to avoid warning */
3864 /*================================== Commands =============================== */
3866 static void authCommand(redisClient
*c
) {
3867 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
3868 c
->authenticated
= 1;
3869 addReply(c
,shared
.ok
);
3871 c
->authenticated
= 0;
3872 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
3876 static void pingCommand(redisClient
*c
) {
3877 addReply(c
,shared
.pong
);
3880 static void echoCommand(redisClient
*c
) {
3881 addReplyBulk(c
,c
->argv
[1]);
3884 /*=================================== Strings =============================== */
3886 static void setGenericCommand(redisClient
*c
, int nx
) {
3889 if (nx
) deleteIfVolatile(c
->db
,c
->argv
[1]);
3890 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3891 if (retval
== DICT_ERR
) {
3893 /* If the key is about a swapped value, we want a new key object
3894 * to overwrite the old. So we delete the old key in the database.
3895 * This will also make sure that swap pages about the old object
3896 * will be marked as free. */
3897 if (server
.vm_enabled
&& deleteIfSwapped(c
->db
,c
->argv
[1]))
3898 incrRefCount(c
->argv
[1]);
3899 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3900 incrRefCount(c
->argv
[2]);
3902 addReply(c
,shared
.czero
);
3906 incrRefCount(c
->argv
[1]);
3907 incrRefCount(c
->argv
[2]);
3910 removeExpire(c
->db
,c
->argv
[1]);
3911 addReply(c
, nx
? shared
.cone
: shared
.ok
);
3914 static void setCommand(redisClient
*c
) {
3915 setGenericCommand(c
,0);
3918 static void setnxCommand(redisClient
*c
) {
3919 setGenericCommand(c
,1);
3922 static int getGenericCommand(redisClient
*c
) {
3925 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
3928 if (o
->type
!= REDIS_STRING
) {
3929 addReply(c
,shared
.wrongtypeerr
);
3937 static void getCommand(redisClient
*c
) {
3938 getGenericCommand(c
);
3941 static void getsetCommand(redisClient
*c
) {
3942 if (getGenericCommand(c
) == REDIS_ERR
) return;
3943 if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) {
3944 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3946 incrRefCount(c
->argv
[1]);
3948 incrRefCount(c
->argv
[2]);
3950 removeExpire(c
->db
,c
->argv
[1]);
3953 static void mgetCommand(redisClient
*c
) {
3956 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
3957 for (j
= 1; j
< c
->argc
; j
++) {
3958 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
3960 addReply(c
,shared
.nullbulk
);
3962 if (o
->type
!= REDIS_STRING
) {
3963 addReply(c
,shared
.nullbulk
);
3971 static void msetGenericCommand(redisClient
*c
, int nx
) {
3972 int j
, busykeys
= 0;
3974 if ((c
->argc
% 2) == 0) {
3975 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
3978 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
3979 * set nothing at all if at least one already key exists. */
3981 for (j
= 1; j
< c
->argc
; j
+= 2) {
3982 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
3988 addReply(c
, shared
.czero
);
3992 for (j
= 1; j
< c
->argc
; j
+= 2) {
3995 c
->argv
[j
+1] = tryObjectEncoding(c
->argv
[j
+1]);
3996 retval
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
3997 if (retval
== DICT_ERR
) {
3998 dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
3999 incrRefCount(c
->argv
[j
+1]);
4001 incrRefCount(c
->argv
[j
]);
4002 incrRefCount(c
->argv
[j
+1]);
4004 removeExpire(c
->db
,c
->argv
[j
]);
4006 server
.dirty
+= (c
->argc
-1)/2;
4007 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4010 static void msetCommand(redisClient
*c
) {
4011 msetGenericCommand(c
,0);
4014 static void msetnxCommand(redisClient
*c
) {
4015 msetGenericCommand(c
,1);
4018 static void incrDecrCommand(redisClient
*c
, long long incr
) {
4023 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4025 if (getLongLongFromObject(c
, o
, &value
) != REDIS_OK
) return;
4028 o
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
4029 o
= tryObjectEncoding(o
);
4030 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],o
);
4031 if (retval
== DICT_ERR
) {
4032 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4033 removeExpire(c
->db
,c
->argv
[1]);
4035 incrRefCount(c
->argv
[1]);
4038 addReply(c
,shared
.colon
);
4040 addReply(c
,shared
.crlf
);
4043 static void incrCommand(redisClient
*c
) {
4044 incrDecrCommand(c
,1);
4047 static void decrCommand(redisClient
*c
) {
4048 incrDecrCommand(c
,-1);
4051 static void incrbyCommand(redisClient
*c
) {
4054 if (getLongLongFromObject(c
, c
->argv
[2], &incr
) != REDIS_OK
) return;
4056 incrDecrCommand(c
,incr
);
4059 static void decrbyCommand(redisClient
*c
) {
4062 if (getLongLongFromObject(c
, c
->argv
[2], &incr
) != REDIS_OK
) return;
4064 incrDecrCommand(c
,-incr
);
4067 static void appendCommand(redisClient
*c
) {
4072 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4074 /* Create the key */
4075 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4076 incrRefCount(c
->argv
[1]);
4077 incrRefCount(c
->argv
[2]);
4078 totlen
= stringObjectLen(c
->argv
[2]);
4082 de
= dictFind(c
->db
->dict
,c
->argv
[1]);
4085 o
= dictGetEntryVal(de
);
4086 if (o
->type
!= REDIS_STRING
) {
4087 addReply(c
,shared
.wrongtypeerr
);
4090 /* If the object is specially encoded or shared we have to make
4092 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
4093 robj
*decoded
= getDecodedObject(o
);
4095 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
4096 decrRefCount(decoded
);
4097 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4100 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
4101 o
->ptr
= sdscatlen(o
->ptr
,
4102 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
4104 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
4105 (unsigned long) c
->argv
[2]->ptr
);
4107 totlen
= sdslen(o
->ptr
);
4110 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
4113 static void substrCommand(redisClient
*c
) {
4115 long start
= atoi(c
->argv
[2]->ptr
);
4116 long end
= atoi(c
->argv
[3]->ptr
);
4117 size_t rangelen
, strlen
;
4120 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4121 checkType(c
,o
,REDIS_STRING
)) return;
4123 o
= getDecodedObject(o
);
4124 strlen
= sdslen(o
->ptr
);
4126 /* convert negative indexes */
4127 if (start
< 0) start
= strlen
+start
;
4128 if (end
< 0) end
= strlen
+end
;
4129 if (start
< 0) start
= 0;
4130 if (end
< 0) end
= 0;
4132 /* indexes sanity checks */
4133 if (start
> end
|| (size_t)start
>= strlen
) {
4134 /* Out of range start or start > end result in null reply */
4135 addReply(c
,shared
.nullbulk
);
4139 if ((size_t)end
>= strlen
) end
= strlen
-1;
4140 rangelen
= (end
-start
)+1;
4142 /* Return the result */
4143 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4144 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4145 addReplySds(c
,range
);
4146 addReply(c
,shared
.crlf
);
4150 /* ========================= Type agnostic commands ========================= */
4152 static void delCommand(redisClient
*c
) {
4155 for (j
= 1; j
< c
->argc
; j
++) {
4156 if (deleteKey(c
->db
,c
->argv
[j
])) {
4161 addReplyLong(c
,deleted
);
4164 static void existsCommand(redisClient
*c
) {
4165 addReply(c
,lookupKeyRead(c
->db
,c
->argv
[1]) ? shared
.cone
: shared
.czero
);
4168 static void selectCommand(redisClient
*c
) {
4169 int id
= atoi(c
->argv
[1]->ptr
);
4171 if (selectDb(c
,id
) == REDIS_ERR
) {
4172 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4174 addReply(c
,shared
.ok
);
4178 static void randomkeyCommand(redisClient
*c
) {
4182 de
= dictGetRandomKey(c
->db
->dict
);
4183 if (!de
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break;
4186 addReply(c
,shared
.plus
);
4187 addReply(c
,shared
.crlf
);
4189 addReply(c
,shared
.plus
);
4190 addReply(c
,dictGetEntryKey(de
));
4191 addReply(c
,shared
.crlf
);
4195 static void keysCommand(redisClient
*c
) {
4198 sds pattern
= c
->argv
[1]->ptr
;
4199 int plen
= sdslen(pattern
);
4200 unsigned long numkeys
= 0;
4201 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4203 di
= dictGetIterator(c
->db
->dict
);
4205 decrRefCount(lenobj
);
4206 while((de
= dictNext(di
)) != NULL
) {
4207 robj
*keyobj
= dictGetEntryKey(de
);
4209 sds key
= keyobj
->ptr
;
4210 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4211 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4212 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4213 addReplyBulk(c
,keyobj
);
4218 dictReleaseIterator(di
);
4219 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4222 static void dbsizeCommand(redisClient
*c
) {
4224 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4227 static void lastsaveCommand(redisClient
*c
) {
4229 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4232 static void typeCommand(redisClient
*c
) {
4236 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4241 case REDIS_STRING
: type
= "+string"; break;
4242 case REDIS_LIST
: type
= "+list"; break;
4243 case REDIS_SET
: type
= "+set"; break;
4244 case REDIS_ZSET
: type
= "+zset"; break;
4245 case REDIS_HASH
: type
= "+hash"; break;
4246 default: type
= "+unknown"; break;
4249 addReplySds(c
,sdsnew(type
));
4250 addReply(c
,shared
.crlf
);
4253 static void saveCommand(redisClient
*c
) {
4254 if (server
.bgsavechildpid
!= -1) {
4255 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4258 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4259 addReply(c
,shared
.ok
);
4261 addReply(c
,shared
.err
);
4265 static void bgsaveCommand(redisClient
*c
) {
4266 if (server
.bgsavechildpid
!= -1) {
4267 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4270 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4271 char *status
= "+Background saving started\r\n";
4272 addReplySds(c
,sdsnew(status
));
4274 addReply(c
,shared
.err
);
4278 static void shutdownCommand(redisClient
*c
) {
4279 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4280 /* Kill the saving child if there is a background saving in progress.
4281 We want to avoid race conditions, for instance our saving child may
4282 overwrite the synchronous saving did by SHUTDOWN. */
4283 if (server
.bgsavechildpid
!= -1) {
4284 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4285 kill(server
.bgsavechildpid
,SIGKILL
);
4286 rdbRemoveTempFile(server
.bgsavechildpid
);
4288 if (server
.appendonly
) {
4289 /* Append only file: fsync() the AOF and exit */
4290 fsync(server
.appendfd
);
4291 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4294 /* Snapshotting. Perform a SYNC SAVE and exit */
4295 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4296 if (server
.daemonize
)
4297 unlink(server
.pidfile
);
4298 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4299 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4300 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4303 /* Ooops.. error saving! The best we can do is to continue
4304 * operating. Note that if there was a background saving process,
4305 * in the next cron() Redis will be notified that the background
4306 * saving aborted, handling special stuff like slaves pending for
4307 * synchronization... */
4308 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4310 sdsnew("-ERR can't quit, problems saving the DB\r\n"));
4315 static void renameGenericCommand(redisClient
*c
, int nx
) {
4318 /* To use the same key as src and dst is probably an error */
4319 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4320 addReply(c
,shared
.sameobjecterr
);
4324 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4328 deleteIfVolatile(c
->db
,c
->argv
[2]);
4329 if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) {
4332 addReply(c
,shared
.czero
);
4335 dictReplace(c
->db
->dict
,c
->argv
[2],o
);
4337 incrRefCount(c
->argv
[2]);
4339 deleteKey(c
->db
,c
->argv
[1]);
4341 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4344 static void renameCommand(redisClient
*c
) {
4345 renameGenericCommand(c
,0);
4348 static void renamenxCommand(redisClient
*c
) {
4349 renameGenericCommand(c
,1);
4352 static void moveCommand(redisClient
*c
) {
4357 /* Obtain source and target DB pointers */
4360 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4361 addReply(c
,shared
.outofrangeerr
);
4365 selectDb(c
,srcid
); /* Back to the source DB */
4367 /* If the user is moving using as target the same
4368 * DB as the source DB it is probably an error. */
4370 addReply(c
,shared
.sameobjecterr
);
4374 /* Check if the element exists and get a reference */
4375 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4377 addReply(c
,shared
.czero
);
4381 /* Try to add the element to the target DB */
4382 deleteIfVolatile(dst
,c
->argv
[1]);
4383 if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) {
4384 addReply(c
,shared
.czero
);
4387 incrRefCount(c
->argv
[1]);
4390 /* OK! key moved, free the entry in the source DB */
4391 deleteKey(src
,c
->argv
[1]);
4393 addReply(c
,shared
.cone
);
4396 /* =================================== Lists ================================ */
4397 static void pushGenericCommand(redisClient
*c
, int where
) {
4401 lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4403 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4404 addReply(c
,shared
.cone
);
4407 lobj
= createListObject();
4409 if (where
== REDIS_HEAD
) {
4410 listAddNodeHead(list
,c
->argv
[2]);
4412 listAddNodeTail(list
,c
->argv
[2]);
4414 dictAdd(c
->db
->dict
,c
->argv
[1],lobj
);
4415 incrRefCount(c
->argv
[1]);
4416 incrRefCount(c
->argv
[2]);
4418 if (lobj
->type
!= REDIS_LIST
) {
4419 addReply(c
,shared
.wrongtypeerr
);
4422 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4423 addReply(c
,shared
.cone
);
4427 if (where
== REDIS_HEAD
) {
4428 listAddNodeHead(list
,c
->argv
[2]);
4430 listAddNodeTail(list
,c
->argv
[2]);
4432 incrRefCount(c
->argv
[2]);
4435 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",listLength(list
)));
4438 static void lpushCommand(redisClient
*c
) {
4439 pushGenericCommand(c
,REDIS_HEAD
);
4442 static void rpushCommand(redisClient
*c
) {
4443 pushGenericCommand(c
,REDIS_TAIL
);
4446 static void llenCommand(redisClient
*c
) {
4450 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4451 checkType(c
,o
,REDIS_LIST
)) return;
4454 addReplyUlong(c
,listLength(l
));
4457 static void lindexCommand(redisClient
*c
) {
4459 int index
= atoi(c
->argv
[2]->ptr
);
4463 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4464 checkType(c
,o
,REDIS_LIST
)) return;
4467 ln
= listIndex(list
, index
);
4469 addReply(c
,shared
.nullbulk
);
4471 robj
*ele
= listNodeValue(ln
);
4472 addReplyBulk(c
,ele
);
4476 static void lsetCommand(redisClient
*c
) {
4478 int index
= atoi(c
->argv
[2]->ptr
);
4482 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
||
4483 checkType(c
,o
,REDIS_LIST
)) return;
4486 ln
= listIndex(list
, index
);
4488 addReply(c
,shared
.outofrangeerr
);
4490 robj
*ele
= listNodeValue(ln
);
4493 listNodeValue(ln
) = c
->argv
[3];
4494 incrRefCount(c
->argv
[3]);
4495 addReply(c
,shared
.ok
);
4500 static void popGenericCommand(redisClient
*c
, int where
) {
4505 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4506 checkType(c
,o
,REDIS_LIST
)) return;
4509 if (where
== REDIS_HEAD
)
4510 ln
= listFirst(list
);
4512 ln
= listLast(list
);
4515 addReply(c
,shared
.nullbulk
);
4517 robj
*ele
= listNodeValue(ln
);
4518 addReplyBulk(c
,ele
);
4519 listDelNode(list
,ln
);
4520 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4525 static void lpopCommand(redisClient
*c
) {
4526 popGenericCommand(c
,REDIS_HEAD
);
4529 static void rpopCommand(redisClient
*c
) {
4530 popGenericCommand(c
,REDIS_TAIL
);
4533 static void lrangeCommand(redisClient
*c
) {
4535 int start
= atoi(c
->argv
[2]->ptr
);
4536 int end
= atoi(c
->argv
[3]->ptr
);
4543 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
4544 || checkType(c
,o
,REDIS_LIST
)) return;
4546 llen
= listLength(list
);
4548 /* convert negative indexes */
4549 if (start
< 0) start
= llen
+start
;
4550 if (end
< 0) end
= llen
+end
;
4551 if (start
< 0) start
= 0;
4552 if (end
< 0) end
= 0;
4554 /* indexes sanity checks */
4555 if (start
> end
|| start
>= llen
) {
4556 /* Out of range start or start > end result in empty list */
4557 addReply(c
,shared
.emptymultibulk
);
4560 if (end
>= llen
) end
= llen
-1;
4561 rangelen
= (end
-start
)+1;
4563 /* Return the result in form of a multi-bulk reply */
4564 ln
= listIndex(list
, start
);
4565 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
4566 for (j
= 0; j
< rangelen
; j
++) {
4567 ele
= listNodeValue(ln
);
4568 addReplyBulk(c
,ele
);
4573 static void ltrimCommand(redisClient
*c
) {
4575 int start
= atoi(c
->argv
[2]->ptr
);
4576 int end
= atoi(c
->argv
[3]->ptr
);
4578 int j
, ltrim
, rtrim
;
4582 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
4583 checkType(c
,o
,REDIS_LIST
)) return;
4585 llen
= listLength(list
);
4587 /* convert negative indexes */
4588 if (start
< 0) start
= llen
+start
;
4589 if (end
< 0) end
= llen
+end
;
4590 if (start
< 0) start
= 0;
4591 if (end
< 0) end
= 0;
4593 /* indexes sanity checks */
4594 if (start
> end
|| start
>= llen
) {
4595 /* Out of range start or start > end result in empty list */
4599 if (end
>= llen
) end
= llen
-1;
4604 /* Remove list elements to perform the trim */
4605 for (j
= 0; j
< ltrim
; j
++) {
4606 ln
= listFirst(list
);
4607 listDelNode(list
,ln
);
4609 for (j
= 0; j
< rtrim
; j
++) {
4610 ln
= listLast(list
);
4611 listDelNode(list
,ln
);
4613 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4615 addReply(c
,shared
.ok
);
4618 static void lremCommand(redisClient
*c
) {
4621 listNode
*ln
, *next
;
4622 int toremove
= atoi(c
->argv
[2]->ptr
);
4626 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4627 checkType(c
,o
,REDIS_LIST
)) return;
4631 toremove
= -toremove
;
4634 ln
= fromtail
? list
->tail
: list
->head
;
4636 robj
*ele
= listNodeValue(ln
);
4638 next
= fromtail
? ln
->prev
: ln
->next
;
4639 if (compareStringObjects(ele
,c
->argv
[3]) == 0) {
4640 listDelNode(list
,ln
);
4643 if (toremove
&& removed
== toremove
) break;
4647 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4648 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
4651 /* This is the semantic of this command:
4652 * RPOPLPUSH srclist dstlist:
4653 * IF LLEN(srclist) > 0
4654 * element = RPOP srclist
4655 * LPUSH dstlist element
4662 * The idea is to be able to get an element from a list in a reliable way
4663 * since the element is not just returned but pushed against another list
4664 * as well. This command was originally proposed by Ezra Zygmuntowicz.
4666 static void rpoplpushcommand(redisClient
*c
) {
4671 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4672 checkType(c
,sobj
,REDIS_LIST
)) return;
4673 srclist
= sobj
->ptr
;
4674 ln
= listLast(srclist
);
4677 addReply(c
,shared
.nullbulk
);
4679 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4680 robj
*ele
= listNodeValue(ln
);
4683 if (dobj
&& dobj
->type
!= REDIS_LIST
) {
4684 addReply(c
,shared
.wrongtypeerr
);
4688 /* Add the element to the target list (unless it's directly
4689 * passed to some BLPOP-ing client */
4690 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) {
4692 /* Create the list if the key does not exist */
4693 dobj
= createListObject();
4694 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
);
4695 incrRefCount(c
->argv
[2]);
4697 dstlist
= dobj
->ptr
;
4698 listAddNodeHead(dstlist
,ele
);
4702 /* Send the element to the client as reply as well */
4703 addReplyBulk(c
,ele
);
4705 /* Finally remove the element from the source list */
4706 listDelNode(srclist
,ln
);
4707 if (listLength(srclist
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4712 /* ==================================== Sets ================================ */
4714 static void saddCommand(redisClient
*c
) {
4717 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4719 set
= createSetObject();
4720 dictAdd(c
->db
->dict
,c
->argv
[1],set
);
4721 incrRefCount(c
->argv
[1]);
4723 if (set
->type
!= REDIS_SET
) {
4724 addReply(c
,shared
.wrongtypeerr
);
4728 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
4729 incrRefCount(c
->argv
[2]);
4731 addReply(c
,shared
.cone
);
4733 addReply(c
,shared
.czero
);
4737 static void sremCommand(redisClient
*c
) {
4740 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4741 checkType(c
,set
,REDIS_SET
)) return;
4743 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
4745 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4746 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4747 addReply(c
,shared
.cone
);
4749 addReply(c
,shared
.czero
);
4753 static void smoveCommand(redisClient
*c
) {
4754 robj
*srcset
, *dstset
;
4756 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4757 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4759 /* If the source key does not exist return 0, if it's of the wrong type
4761 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
4762 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
4765 /* Error if the destination key is not a set as well */
4766 if (dstset
&& dstset
->type
!= REDIS_SET
) {
4767 addReply(c
,shared
.wrongtypeerr
);
4770 /* Remove the element from the source set */
4771 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
4772 /* Key not found in the src set! return zero */
4773 addReply(c
,shared
.czero
);
4776 if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset
!= dstset
)
4777 deleteKey(c
->db
,c
->argv
[1]);
4779 /* Add the element to the destination set */
4781 dstset
= createSetObject();
4782 dictAdd(c
->db
->dict
,c
->argv
[2],dstset
);
4783 incrRefCount(c
->argv
[2]);
4785 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
4786 incrRefCount(c
->argv
[3]);
4787 addReply(c
,shared
.cone
);
4790 static void sismemberCommand(redisClient
*c
) {
4793 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4794 checkType(c
,set
,REDIS_SET
)) return;
4796 if (dictFind(set
->ptr
,c
->argv
[2]))
4797 addReply(c
,shared
.cone
);
4799 addReply(c
,shared
.czero
);
4802 static void scardCommand(redisClient
*c
) {
4806 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4807 checkType(c
,o
,REDIS_SET
)) return;
4810 addReplyUlong(c
,dictSize(s
));
4813 static void spopCommand(redisClient
*c
) {
4817 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4818 checkType(c
,set
,REDIS_SET
)) return;
4820 de
= dictGetRandomKey(set
->ptr
);
4822 addReply(c
,shared
.nullbulk
);
4824 robj
*ele
= dictGetEntryKey(de
);
4826 addReplyBulk(c
,ele
);
4827 dictDelete(set
->ptr
,ele
);
4828 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4829 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4834 static void srandmemberCommand(redisClient
*c
) {
4838 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4839 checkType(c
,set
,REDIS_SET
)) return;
4841 de
= dictGetRandomKey(set
->ptr
);
4843 addReply(c
,shared
.nullbulk
);
4845 robj
*ele
= dictGetEntryKey(de
);
4847 addReplyBulk(c
,ele
);
4851 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
4852 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
4854 return dictSize(*d1
)-dictSize(*d2
);
4857 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
4858 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4861 robj
*lenobj
= NULL
, *dstset
= NULL
;
4862 unsigned long j
, cardinality
= 0;
4864 for (j
= 0; j
< setsnum
; j
++) {
4868 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4869 lookupKeyRead(c
->db
,setskeys
[j
]);
4873 if (deleteKey(c
->db
,dstkey
))
4875 addReply(c
,shared
.czero
);
4877 addReply(c
,shared
.emptymultibulk
);
4881 if (setobj
->type
!= REDIS_SET
) {
4883 addReply(c
,shared
.wrongtypeerr
);
4886 dv
[j
] = setobj
->ptr
;
4888 /* Sort sets from the smallest to largest, this will improve our
4889 * algorithm's performace */
4890 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
4892 /* The first thing we should output is the total number of elements...
4893 * since this is a multi-bulk write, but at this stage we don't know
4894 * the intersection set size, so we use a trick, append an empty object
4895 * to the output list and save the pointer to later modify it with the
4898 lenobj
= createObject(REDIS_STRING
,NULL
);
4900 decrRefCount(lenobj
);
4902 /* If we have a target key where to store the resulting set
4903 * create this key with an empty set inside */
4904 dstset
= createSetObject();
4907 /* Iterate all the elements of the first (smallest) set, and test
4908 * the element against all the other sets, if at least one set does
4909 * not include the element it is discarded */
4910 di
= dictGetIterator(dv
[0]);
4912 while((de
= dictNext(di
)) != NULL
) {
4915 for (j
= 1; j
< setsnum
; j
++)
4916 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
4918 continue; /* at least one set does not contain the member */
4919 ele
= dictGetEntryKey(de
);
4921 addReplyBulk(c
,ele
);
4924 dictAdd(dstset
->ptr
,ele
,NULL
);
4928 dictReleaseIterator(di
);
4931 /* Store the resulting set into the target, if the intersection
4932 * is not an empty set. */
4933 deleteKey(c
->db
,dstkey
);
4934 if (dictSize((dict
*)dstset
->ptr
) > 0) {
4935 dictAdd(c
->db
->dict
,dstkey
,dstset
);
4936 incrRefCount(dstkey
);
4937 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
4939 decrRefCount(dstset
);
4940 addReply(c
,shared
.czero
);
4944 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
4949 static void sinterCommand(redisClient
*c
) {
4950 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
4953 static void sinterstoreCommand(redisClient
*c
) {
4954 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
4957 #define REDIS_OP_UNION 0
4958 #define REDIS_OP_DIFF 1
4959 #define REDIS_OP_INTER 2
4961 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
4962 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4965 robj
*dstset
= NULL
;
4966 int j
, cardinality
= 0;
4968 for (j
= 0; j
< setsnum
; j
++) {
4972 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4973 lookupKeyRead(c
->db
,setskeys
[j
]);
4978 if (setobj
->type
!= REDIS_SET
) {
4980 addReply(c
,shared
.wrongtypeerr
);
4983 dv
[j
] = setobj
->ptr
;
4986 /* We need a temp set object to store our union. If the dstkey
4987 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
4988 * this set object will be the resulting object to set into the target key*/
4989 dstset
= createSetObject();
4991 /* Iterate all the elements of all the sets, add every element a single
4992 * time to the result set */
4993 for (j
= 0; j
< setsnum
; j
++) {
4994 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
4995 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
4997 di
= dictGetIterator(dv
[j
]);
4999 while((de
= dictNext(di
)) != NULL
) {
5002 /* dictAdd will not add the same element multiple times */
5003 ele
= dictGetEntryKey(de
);
5004 if (op
== REDIS_OP_UNION
|| j
== 0) {
5005 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
5009 } else if (op
== REDIS_OP_DIFF
) {
5010 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
5015 dictReleaseIterator(di
);
5017 /* result set is empty? Exit asap. */
5018 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
5021 /* Output the content of the resulting set, if not in STORE mode */
5023 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
5024 di
= dictGetIterator(dstset
->ptr
);
5025 while((de
= dictNext(di
)) != NULL
) {
5028 ele
= dictGetEntryKey(de
);
5029 addReplyBulk(c
,ele
);
5031 dictReleaseIterator(di
);
5032 decrRefCount(dstset
);
5034 /* If we have a target key where to store the resulting set
5035 * create this key with the result set inside */
5036 deleteKey(c
->db
,dstkey
);
5037 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5038 dictAdd(c
->db
->dict
,dstkey
,dstset
);
5039 incrRefCount(dstkey
);
5040 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
5042 decrRefCount(dstset
);
5043 addReply(c
,shared
.czero
);
5050 static void sunionCommand(redisClient
*c
) {
5051 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
5054 static void sunionstoreCommand(redisClient
*c
) {
5055 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
5058 static void sdiffCommand(redisClient
*c
) {
5059 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
5062 static void sdiffstoreCommand(redisClient
*c
) {
5063 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
5066 /* ==================================== ZSets =============================== */
5068 /* ZSETs are ordered sets using two data structures to hold the same elements
5069 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
5072 * The elements are added to an hash table mapping Redis objects to scores.
5073 * At the same time the elements are added to a skip list mapping scores
5074 * to Redis objects (so objects are sorted by scores in this "view"). */
5076 /* This skiplist implementation is almost a C translation of the original
5077 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
5078 * Alternative to Balanced Trees", modified in three ways:
5079 * a) this implementation allows for repeated values.
5080 * b) the comparison is not just by key (our 'score') but by satellite data.
5081 * c) there is a back pointer, so it's a doubly linked list with the back
5082 * pointers being only at "level 1". This allows to traverse the list
5083 * from tail to head, useful for ZREVRANGE. */
5085 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
5086 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
5088 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
5090 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
5096 static zskiplist
*zslCreate(void) {
5100 zsl
= zmalloc(sizeof(*zsl
));
5103 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
5104 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
5105 zsl
->header
->forward
[j
] = NULL
;
5107 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
5108 if (j
< ZSKIPLIST_MAXLEVEL
-1)
5109 zsl
->header
->span
[j
] = 0;
5111 zsl
->header
->backward
= NULL
;
5116 static void zslFreeNode(zskiplistNode
*node
) {
5117 decrRefCount(node
->obj
);
5118 zfree(node
->forward
);
5123 static void zslFree(zskiplist
*zsl
) {
5124 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
5126 zfree(zsl
->header
->forward
);
5127 zfree(zsl
->header
->span
);
5130 next
= node
->forward
[0];
5137 static int zslRandomLevel(void) {
5139 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
5141 return (level
<ZSKIPLIST_MAXLEVEL
) ? level
: ZSKIPLIST_MAXLEVEL
;
5144 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
5145 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5146 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
5150 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5151 /* store rank that is crossed to reach the insert position */
5152 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
5154 while (x
->forward
[i
] &&
5155 (x
->forward
[i
]->score
< score
||
5156 (x
->forward
[i
]->score
== score
&&
5157 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
5158 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
5163 /* we assume the key is not already inside, since we allow duplicated
5164 * scores, and the re-insertion of score and redis object should never
5165 * happpen since the caller of zslInsert() should test in the hash table
5166 * if the element is already inside or not. */
5167 level
= zslRandomLevel();
5168 if (level
> zsl
->level
) {
5169 for (i
= zsl
->level
; i
< level
; i
++) {
5171 update
[i
] = zsl
->header
;
5172 update
[i
]->span
[i
-1] = zsl
->length
;
5176 x
= zslCreateNode(level
,score
,obj
);
5177 for (i
= 0; i
< level
; i
++) {
5178 x
->forward
[i
] = update
[i
]->forward
[i
];
5179 update
[i
]->forward
[i
] = x
;
5181 /* update span covered by update[i] as x is inserted here */
5183 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5184 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5188 /* increment span for untouched levels */
5189 for (i
= level
; i
< zsl
->level
; i
++) {
5190 update
[i
]->span
[i
-1]++;
5193 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
5195 x
->forward
[0]->backward
= x
;
5201 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
5202 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
5204 for (i
= 0; i
< zsl
->level
; i
++) {
5205 if (update
[i
]->forward
[i
] == x
) {
5207 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5209 update
[i
]->forward
[i
] = x
->forward
[i
];
5211 /* invariant: i > 0, because update[0]->forward[0]
5212 * is always equal to x */
5213 update
[i
]->span
[i
-1] -= 1;
5216 if (x
->forward
[0]) {
5217 x
->forward
[0]->backward
= x
->backward
;
5219 zsl
->tail
= x
->backward
;
5221 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5226 /* Delete an element with matching score/object from the skiplist. */
5227 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
5228 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5232 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5233 while (x
->forward
[i
] &&
5234 (x
->forward
[i
]->score
< score
||
5235 (x
->forward
[i
]->score
== score
&&
5236 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
5240 /* We may have multiple elements with the same score, what we need
5241 * is to find the element with both the right score and object. */
5243 if (x
&& score
== x
->score
&& compareStringObjects(x
->obj
,obj
) == 0) {
5244 zslDeleteNode(zsl
, x
, update
);
5248 return 0; /* not found */
5250 return 0; /* not found */
5253 /* Delete all the elements with score between min and max from the skiplist.
5254 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5255 * Note that this function takes the reference to the hash table view of the
5256 * sorted set, in order to remove the elements from the hash table too. */
5257 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
5258 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5259 unsigned long removed
= 0;
5263 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5264 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
5268 /* We may have multiple elements with the same score, what we need
5269 * is to find the element with both the right score and object. */
5271 while (x
&& x
->score
<= max
) {
5272 zskiplistNode
*next
= x
->forward
[0];
5273 zslDeleteNode(zsl
, x
, update
);
5274 dictDelete(dict
,x
->obj
);
5279 return removed
; /* not found */
5282 /* Delete all the elements with rank between start and end from the skiplist.
5283 * Start and end are inclusive. Note that start and end need to be 1-based */
5284 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
5285 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5286 unsigned long traversed
= 0, removed
= 0;
5290 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5291 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
5292 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5300 while (x
&& traversed
<= end
) {
5301 zskiplistNode
*next
= x
->forward
[0];
5302 zslDeleteNode(zsl
, x
, update
);
5303 dictDelete(dict
,x
->obj
);
5312 /* Find the first node having a score equal or greater than the specified one.
5313 * Returns NULL if there is no match. */
5314 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
5319 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5320 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
5323 /* We may have multiple elements with the same score, what we need
5324 * is to find the element with both the right score and object. */
5325 return x
->forward
[0];
5328 /* Find the rank for an element by both score and key.
5329 * Returns 0 when the element cannot be found, rank otherwise.
5330 * Note that the rank is 1-based due to the span of zsl->header to the
5332 static unsigned long zslGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
5334 unsigned long rank
= 0;
5338 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5339 while (x
->forward
[i
] &&
5340 (x
->forward
[i
]->score
< score
||
5341 (x
->forward
[i
]->score
== score
&&
5342 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
5343 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
5347 /* x might be equal to zsl->header, so test if obj is non-NULL */
5348 if (x
->obj
&& compareStringObjects(x
->obj
,o
) == 0) {
5355 /* Finds an element by its rank. The rank argument needs to be 1-based. */
5356 zskiplistNode
* zslGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
5358 unsigned long traversed
= 0;
5362 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5363 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
5365 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5368 if (traversed
== rank
) {
5375 /* The actual Z-commands implementations */
5377 /* This generic command implements both ZADD and ZINCRBY.
5378 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
5379 * the increment if the operation is a ZINCRBY (doincrement == 1). */
5380 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
5385 zsetobj
= lookupKeyWrite(c
->db
,key
);
5386 if (zsetobj
== NULL
) {
5387 zsetobj
= createZsetObject();
5388 dictAdd(c
->db
->dict
,key
,zsetobj
);
5391 if (zsetobj
->type
!= REDIS_ZSET
) {
5392 addReply(c
,shared
.wrongtypeerr
);
5398 /* Ok now since we implement both ZADD and ZINCRBY here the code
5399 * needs to handle the two different conditions. It's all about setting
5400 * '*score', that is, the new score to set, to the right value. */
5401 score
= zmalloc(sizeof(double));
5405 /* Read the old score. If the element was not present starts from 0 */
5406 de
= dictFind(zs
->dict
,ele
);
5408 double *oldscore
= dictGetEntryVal(de
);
5409 *score
= *oldscore
+ scoreval
;
5417 /* What follows is a simple remove and re-insert operation that is common
5418 * to both ZADD and ZINCRBY... */
5419 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
5420 /* case 1: New element */
5421 incrRefCount(ele
); /* added to hash */
5422 zslInsert(zs
->zsl
,*score
,ele
);
5423 incrRefCount(ele
); /* added to skiplist */
5426 addReplyDouble(c
,*score
);
5428 addReply(c
,shared
.cone
);
5433 /* case 2: Score update operation */
5434 de
= dictFind(zs
->dict
,ele
);
5435 redisAssert(de
!= NULL
);
5436 oldscore
= dictGetEntryVal(de
);
5437 if (*score
!= *oldscore
) {
5440 /* Remove and insert the element in the skip list with new score */
5441 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
5442 redisAssert(deleted
!= 0);
5443 zslInsert(zs
->zsl
,*score
,ele
);
5445 /* Update the score in the hash table */
5446 dictReplace(zs
->dict
,ele
,score
);
5452 addReplyDouble(c
,*score
);
5454 addReply(c
,shared
.czero
);
5458 static void zaddCommand(redisClient
*c
) {
5461 if (getDoubleFromObject(c
, c
->argv
[2], &scoreval
) != REDIS_OK
) return;
5463 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
5466 static void zincrbyCommand(redisClient
*c
) {
5469 if (getDoubleFromObject(c
, c
->argv
[2], &scoreval
) != REDIS_OK
) return;
5471 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
5474 static void zremCommand(redisClient
*c
) {
5481 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5482 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5485 de
= dictFind(zs
->dict
,c
->argv
[2]);
5487 addReply(c
,shared
.czero
);
5490 /* Delete from the skiplist */
5491 oldscore
= dictGetEntryVal(de
);
5492 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
5493 redisAssert(deleted
!= 0);
5495 /* Delete from the hash table */
5496 dictDelete(zs
->dict
,c
->argv
[2]);
5497 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5498 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5500 addReply(c
,shared
.cone
);
5503 static void zremrangebyscoreCommand(redisClient
*c
) {
5510 if ((getDoubleFromObject(c
, c
->argv
[2], &min
) != REDIS_OK
) ||
5511 (getDoubleFromObject(c
, c
->argv
[3], &max
) != REDIS_OK
)) return;
5513 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5514 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5517 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
5518 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5519 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5520 server
.dirty
+= deleted
;
5521 addReplyLong(c
,deleted
);
5524 static void zremrangebyrankCommand(redisClient
*c
) {
5532 if ((getLongFromObject(c
, c
->argv
[2], &start
) != REDIS_OK
) ||
5533 (getLongFromObject(c
, c
->argv
[3], &end
) != REDIS_OK
)) return;
5535 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5536 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5538 llen
= zs
->zsl
->length
;
5540 /* convert negative indexes */
5541 if (start
< 0) start
= llen
+start
;
5542 if (end
< 0) end
= llen
+end
;
5543 if (start
< 0) start
= 0;
5544 if (end
< 0) end
= 0;
5546 /* indexes sanity checks */
5547 if (start
> end
|| start
>= llen
) {
5548 addReply(c
,shared
.czero
);
5551 if (end
>= llen
) end
= llen
-1;
5553 /* increment start and end because zsl*Rank functions
5554 * use 1-based rank */
5555 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
5556 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5557 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5558 server
.dirty
+= deleted
;
5559 addReplyLong(c
, deleted
);
5567 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
5568 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
5569 unsigned long size1
, size2
;
5570 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
5571 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
5572 return size1
- size2
;
5575 #define REDIS_AGGR_SUM 1
5576 #define REDIS_AGGR_MIN 2
5577 #define REDIS_AGGR_MAX 3
5579 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
5580 if (aggregate
== REDIS_AGGR_SUM
) {
5581 *target
= *target
+ val
;
5582 } else if (aggregate
== REDIS_AGGR_MIN
) {
5583 *target
= val
< *target
? val
: *target
;
5584 } else if (aggregate
== REDIS_AGGR_MAX
) {
5585 *target
= val
> *target
? val
: *target
;
5588 redisAssert(0 != 0);
5592 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
5594 int aggregate
= REDIS_AGGR_SUM
;
5601 /* expect zsetnum input keys to be given */
5602 zsetnum
= atoi(c
->argv
[2]->ptr
);
5604 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNION/ZINTER\r\n"));
5608 /* test if the expected number of keys would overflow */
5609 if (3+zsetnum
> c
->argc
) {
5610 addReply(c
,shared
.syntaxerr
);
5614 /* read keys to be used for input */
5615 src
= zmalloc(sizeof(zsetopsrc
) * zsetnum
);
5616 for (i
= 0, j
= 3; i
< zsetnum
; i
++, j
++) {
5617 robj
*zsetobj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
5621 if (zsetobj
->type
!= REDIS_ZSET
) {
5623 addReply(c
,shared
.wrongtypeerr
);
5626 src
[i
].dict
= ((zset
*)zsetobj
->ptr
)->dict
;
5629 /* default all weights to 1 */
5630 src
[i
].weight
= 1.0;
5633 /* parse optional extra arguments */
5635 int remaining
= c
->argc
- j
;
5638 if (remaining
>= (zsetnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
5640 for (i
= 0; i
< zsetnum
; i
++, j
++, remaining
--) {
5641 if (getDoubleFromObject(c
, c
->argv
[j
], &src
[i
].weight
) != REDIS_OK
)
5644 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
5646 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
5647 aggregate
= REDIS_AGGR_SUM
;
5648 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
5649 aggregate
= REDIS_AGGR_MIN
;
5650 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
5651 aggregate
= REDIS_AGGR_MAX
;
5654 addReply(c
,shared
.syntaxerr
);
5660 addReply(c
,shared
.syntaxerr
);
5666 /* sort sets from the smallest to largest, this will improve our
5667 * algorithm's performance */
5668 qsort(src
,zsetnum
,sizeof(zsetopsrc
), qsortCompareZsetopsrcByCardinality
);
5670 dstobj
= createZsetObject();
5671 dstzset
= dstobj
->ptr
;
5673 if (op
== REDIS_OP_INTER
) {
5674 /* skip going over all entries if the smallest zset is NULL or empty */
5675 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
5676 /* precondition: as src[0].dict is non-empty and the zsets are ordered
5677 * from small to large, all src[i > 0].dict are non-empty too */
5678 di
= dictGetIterator(src
[0].dict
);
5679 while((de
= dictNext(di
)) != NULL
) {
5680 double *score
= zmalloc(sizeof(double)), value
;
5681 *score
= src
[0].weight
* (*(double*)dictGetEntryVal(de
));
5683 for (j
= 1; j
< zsetnum
; j
++) {
5684 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5686 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5687 zunionInterAggregate(score
, value
, aggregate
);
5693 /* skip entry when not present in every source dict */
5697 robj
*o
= dictGetEntryKey(de
);
5698 dictAdd(dstzset
->dict
,o
,score
);
5699 incrRefCount(o
); /* added to dictionary */
5700 zslInsert(dstzset
->zsl
,*score
,o
);
5701 incrRefCount(o
); /* added to skiplist */
5704 dictReleaseIterator(di
);
5706 } else if (op
== REDIS_OP_UNION
) {
5707 for (i
= 0; i
< zsetnum
; i
++) {
5708 if (!src
[i
].dict
) continue;
5710 di
= dictGetIterator(src
[i
].dict
);
5711 while((de
= dictNext(di
)) != NULL
) {
5712 /* skip key when already processed */
5713 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
5715 double *score
= zmalloc(sizeof(double)), value
;
5716 *score
= src
[i
].weight
* (*(double*)dictGetEntryVal(de
));
5718 /* because the zsets are sorted by size, its only possible
5719 * for sets at larger indices to hold this entry */
5720 for (j
= (i
+1); j
< zsetnum
; j
++) {
5721 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5723 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5724 zunionInterAggregate(score
, value
, aggregate
);
5728 robj
*o
= dictGetEntryKey(de
);
5729 dictAdd(dstzset
->dict
,o
,score
);
5730 incrRefCount(o
); /* added to dictionary */
5731 zslInsert(dstzset
->zsl
,*score
,o
);
5732 incrRefCount(o
); /* added to skiplist */
5734 dictReleaseIterator(di
);
5737 /* unknown operator */
5738 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
5741 deleteKey(c
->db
,dstkey
);
5742 if (dstzset
->zsl
->length
) {
5743 dictAdd(c
->db
->dict
,dstkey
,dstobj
);
5744 incrRefCount(dstkey
);
5745 addReplyLong(c
, dstzset
->zsl
->length
);
5748 decrRefCount(dstobj
);
5749 addReply(c
, shared
.czero
);
5754 static void zunionCommand(redisClient
*c
) {
5755 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
5758 static void zinterCommand(redisClient
*c
) {
5759 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
5762 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
5774 if ((getLongFromObject(c
, c
->argv
[2], &start
) != REDIS_OK
) ||
5775 (getLongFromObject(c
, c
->argv
[3], &end
) != REDIS_OK
)) return;
5777 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
5779 } else if (c
->argc
>= 5) {
5780 addReply(c
,shared
.syntaxerr
);
5784 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
5785 || checkType(c
,o
,REDIS_ZSET
)) return;
5790 /* convert negative indexes */
5791 if (start
< 0) start
= llen
+start
;
5792 if (end
< 0) end
= llen
+end
;
5793 if (start
< 0) start
= 0;
5794 if (end
< 0) end
= 0;
5796 /* indexes sanity checks */
5797 if (start
> end
|| start
>= llen
) {
5798 /* Out of range start or start > end result in empty list */
5799 addReply(c
,shared
.emptymultibulk
);
5802 if (end
>= llen
) end
= llen
-1;
5803 rangelen
= (end
-start
)+1;
5805 /* check if starting point is trivial, before searching
5806 * the element in log(N) time */
5808 ln
= start
== 0 ? zsl
->tail
: zslGetElementByRank(zsl
, llen
-start
);
5811 zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1);
5814 /* Return the result in form of a multi-bulk reply */
5815 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
5816 withscores
? (rangelen
*2) : rangelen
));
5817 for (j
= 0; j
< rangelen
; j
++) {
5819 addReplyBulk(c
,ele
);
5821 addReplyDouble(c
,ln
->score
);
5822 ln
= reverse
? ln
->backward
: ln
->forward
[0];
5826 static void zrangeCommand(redisClient
*c
) {
5827 zrangeGenericCommand(c
,0);
5830 static void zrevrangeCommand(redisClient
*c
) {
5831 zrangeGenericCommand(c
,1);
5834 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
5835 * If justcount is non-zero, just the count is returned. */
5836 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
5839 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
5840 int offset
= 0, limit
= -1;
5844 /* Parse the min-max interval. If one of the values is prefixed
5845 * by the "(" character, it's considered "open". For instance
5846 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
5847 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
5848 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
5849 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
5852 min
= strtod(c
->argv
[2]->ptr
,NULL
);
5854 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
5855 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
5858 max
= strtod(c
->argv
[3]->ptr
,NULL
);
5861 /* Parse "WITHSCORES": note that if the command was called with
5862 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
5863 * enter the following paths to parse WITHSCORES and LIMIT. */
5864 if (c
->argc
== 5 || c
->argc
== 8) {
5865 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
5870 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
5874 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
5879 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
5880 addReply(c
,shared
.syntaxerr
);
5882 } else if (c
->argc
== (7 + withscores
)) {
5883 offset
= atoi(c
->argv
[5]->ptr
);
5884 limit
= atoi(c
->argv
[6]->ptr
);
5885 if (offset
< 0) offset
= 0;
5888 /* Ok, lookup the key and get the range */
5889 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
5891 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
5893 if (o
->type
!= REDIS_ZSET
) {
5894 addReply(c
,shared
.wrongtypeerr
);
5896 zset
*zsetobj
= o
->ptr
;
5897 zskiplist
*zsl
= zsetobj
->zsl
;
5899 robj
*ele
, *lenobj
= NULL
;
5900 unsigned long rangelen
= 0;
5902 /* Get the first node with the score >= min, or with
5903 * score > min if 'minex' is true. */
5904 ln
= zslFirstWithScore(zsl
,min
);
5905 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
5908 /* No element matching the speciifed interval */
5909 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
5913 /* We don't know in advance how many matching elements there
5914 * are in the list, so we push this object that will represent
5915 * the multi-bulk length in the output buffer, and will "fix"
5918 lenobj
= createObject(REDIS_STRING
,NULL
);
5920 decrRefCount(lenobj
);
5923 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
5926 ln
= ln
->forward
[0];
5929 if (limit
== 0) break;
5932 addReplyBulk(c
,ele
);
5934 addReplyDouble(c
,ln
->score
);
5936 ln
= ln
->forward
[0];
5938 if (limit
> 0) limit
--;
5941 addReplyLong(c
,(long)rangelen
);
5943 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
5944 withscores
? (rangelen
*2) : rangelen
);
5950 static void zrangebyscoreCommand(redisClient
*c
) {
5951 genericZrangebyscoreCommand(c
,0);
5954 static void zcountCommand(redisClient
*c
) {
5955 genericZrangebyscoreCommand(c
,1);
5958 static void zcardCommand(redisClient
*c
) {
5962 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5963 checkType(c
,o
,REDIS_ZSET
)) return;
5966 addReplyUlong(c
,zs
->zsl
->length
);
5969 static void zscoreCommand(redisClient
*c
) {
5974 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5975 checkType(c
,o
,REDIS_ZSET
)) return;
5978 de
= dictFind(zs
->dict
,c
->argv
[2]);
5980 addReply(c
,shared
.nullbulk
);
5982 double *score
= dictGetEntryVal(de
);
5984 addReplyDouble(c
,*score
);
5988 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
5996 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5997 checkType(c
,o
,REDIS_ZSET
)) return;
6001 de
= dictFind(zs
->dict
,c
->argv
[2]);
6003 addReply(c
,shared
.nullbulk
);
6007 score
= dictGetEntryVal(de
);
6008 rank
= zslGetRank(zsl
, *score
, c
->argv
[2]);
6011 addReplyLong(c
, zsl
->length
- rank
);
6013 addReplyLong(c
, rank
-1);
6016 addReply(c
,shared
.nullbulk
);
6020 static void zrankCommand(redisClient
*c
) {
6021 zrankGenericCommand(c
, 0);
6024 static void zrevrankCommand(redisClient
*c
) {
6025 zrankGenericCommand(c
, 1);
6028 /* =================================== Hashes =============================== */
6029 static void hsetCommand(redisClient
*c
) {
6031 robj
*o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
6034 o
= createHashObject();
6035 dictAdd(c
->db
->dict
,c
->argv
[1],o
);
6036 incrRefCount(c
->argv
[1]);
6038 if (o
->type
!= REDIS_HASH
) {
6039 addReply(c
,shared
.wrongtypeerr
);
6043 /* We want to convert the zipmap into an hash table right now if the
6044 * entry to be added is too big. Note that we check if the object
6045 * is integer encoded before to try fetching the length in the test below.
6046 * This is because integers are small, but currently stringObjectLen()
6047 * performs a slow conversion: not worth it. */
6048 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
&&
6049 ((c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
&&
6050 sdslen(c
->argv
[2]->ptr
) > server
.hash_max_zipmap_value
) ||
6051 (c
->argv
[3]->encoding
== REDIS_ENCODING_RAW
&&
6052 sdslen(c
->argv
[3]->ptr
) > server
.hash_max_zipmap_value
)))
6054 convertToRealHash(o
);
6057 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6058 unsigned char *zm
= o
->ptr
;
6059 robj
*valobj
= getDecodedObject(c
->argv
[3]);
6061 zm
= zipmapSet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
6062 valobj
->ptr
,sdslen(valobj
->ptr
),&update
);
6063 decrRefCount(valobj
);
6066 /* And here there is the second check for hash conversion. */
6067 if (zipmapLen(zm
) > server
.hash_max_zipmap_entries
)
6068 convertToRealHash(o
);
6070 c
->argv
[2] = tryObjectEncoding(c
->argv
[2]);
6071 /* note that c->argv[3] is already encoded, as the latest arg
6072 * of a bulk command is always integer encoded if possible. */
6073 if (dictReplace(o
->ptr
,c
->argv
[2],c
->argv
[3])) {
6074 incrRefCount(c
->argv
[2]);
6078 incrRefCount(c
->argv
[3]);
6081 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",update
== 0));
6084 static void hmsetCommand(redisClient
*c
) {
6086 robj
*o
, *key
, *val
;
6088 if ((c
->argc
% 2) == 1) {
6089 addReplySds(c
,sdsnew("-ERR wrong number of arguments for HMSET\r\n"));
6093 if ((o
= lookupKeyWrite(c
->db
,c
->argv
[1])) == NULL
) {
6094 o
= createHashObject();
6095 dictAdd(c
->db
->dict
,c
->argv
[1],o
);
6096 incrRefCount(c
->argv
[1]);
6098 if (o
->type
!= REDIS_HASH
) {
6099 addReply(c
,shared
.wrongtypeerr
);
6104 /* We want to convert the zipmap into an hash table right now if the
6105 * entry to be added is too big. */
6106 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6107 for (i
= 2; i
< c
->argc
; i
+=2) {
6108 if ((c
->argv
[i
]->encoding
== REDIS_ENCODING_RAW
&&
6109 sdslen(c
->argv
[i
]->ptr
) > server
.hash_max_zipmap_value
) ||
6110 (c
->argv
[i
+1]->encoding
== REDIS_ENCODING_RAW
&&
6111 sdslen(c
->argv
[i
+1]->ptr
) > server
.hash_max_zipmap_value
)) {
6112 convertToRealHash(o
);
6118 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6119 unsigned char *zm
= o
->ptr
;
6121 for (i
= 2; i
< c
->argc
; i
+=2) {
6122 key
= getDecodedObject(c
->argv
[i
]);
6123 val
= getDecodedObject(c
->argv
[i
+1]);
6124 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
6125 val
->ptr
,sdslen(val
->ptr
),NULL
);
6131 /* And here there is the second check for hash conversion. */
6132 if (zipmapLen(zm
) > server
.hash_max_zipmap_entries
)
6133 convertToRealHash(o
);
6135 for (i
= 2; i
< c
->argc
; i
+=2) {
6136 key
= tryObjectEncoding(c
->argv
[i
]);
6137 val
= tryObjectEncoding(c
->argv
[i
+1]);
6138 if (dictReplace(o
->ptr
,key
,val
)) {
6145 addReply(c
, shared
.ok
);
6148 static void hincrbyCommand(redisClient
*c
) {
6149 long long value
= 0, incr
= 0;
6150 robj
*o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
6153 o
= createHashObject();
6154 dictAdd(c
->db
->dict
,c
->argv
[1],o
);
6155 incrRefCount(c
->argv
[1]);
6157 if (o
->type
!= REDIS_HASH
) {
6158 addReply(c
,shared
.wrongtypeerr
);
6163 if (getLongLongFromObject(c
, c
->argv
[3], &incr
) != REDIS_OK
) return;
6165 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6166 unsigned char *zm
= o
->ptr
;
6167 unsigned char *zval
;
6170 /* Find value if already present in hash */
6171 if (zipmapGet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
6173 /* strtoll needs the char* to have a trailing \0, but
6174 * the zipmap doesn't include them. */
6175 sds szval
= sdsnewlen(zval
, zvlen
);
6176 value
= strtoll(szval
,NULL
,10);
6181 sds svalue
= sdscatprintf(sdsempty(),"%lld",value
);
6182 zm
= zipmapSet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
6183 (unsigned char*)svalue
,sdslen(svalue
),NULL
);
6187 /* Check if the zipmap needs to be converted. */
6188 if (zipmapLen(zm
) > server
.hash_max_zipmap_entries
)
6189 convertToRealHash(o
);
6194 /* Find value if already present in hash */
6195 de
= dictFind(o
->ptr
,c
->argv
[2]);
6197 hval
= dictGetEntryVal(de
);
6198 if (hval
->encoding
== REDIS_ENCODING_RAW
)
6199 value
= strtoll(hval
->ptr
,NULL
,10);
6200 else if (hval
->encoding
== REDIS_ENCODING_INT
)
6201 value
= (long)hval
->ptr
;
6203 redisAssert(1 != 1);
6207 hval
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
6208 hval
= tryObjectEncoding(hval
);
6209 if (dictReplace(o
->ptr
,c
->argv
[2],hval
)) {
6210 incrRefCount(c
->argv
[2]);
6215 addReplyLongLong(c
, value
);
6218 static void hgetCommand(redisClient
*c
) {
6221 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6222 checkType(c
,o
,REDIS_HASH
)) return;
6224 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6225 unsigned char *zm
= o
->ptr
;
6230 field
= getDecodedObject(c
->argv
[2]);
6231 if (zipmapGet(zm
,field
->ptr
,sdslen(field
->ptr
), &val
,&vlen
)) {
6232 addReplySds(c
,sdscatprintf(sdsempty(),"$%u\r\n", vlen
));
6233 addReplySds(c
,sdsnewlen(val
,vlen
));
6234 addReply(c
,shared
.crlf
);
6235 decrRefCount(field
);
6238 addReply(c
,shared
.nullbulk
);
6239 decrRefCount(field
);
6243 struct dictEntry
*de
;
6245 de
= dictFind(o
->ptr
,c
->argv
[2]);
6247 addReply(c
,shared
.nullbulk
);
6249 robj
*e
= dictGetEntryVal(de
);
6256 static void hdelCommand(redisClient
*c
) {
6260 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6261 checkType(c
,o
,REDIS_HASH
)) return;
6263 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6264 robj
*field
= getDecodedObject(c
->argv
[2]);
6266 o
->ptr
= zipmapDel((unsigned char*) o
->ptr
,
6267 (unsigned char*) field
->ptr
,
6268 sdslen(field
->ptr
), &deleted
);
6269 decrRefCount(field
);
6270 if (zipmapLen((unsigned char*) o
->ptr
) == 0)
6271 deleteKey(c
->db
,c
->argv
[1]);
6273 deleted
= dictDelete((dict
*)o
->ptr
,c
->argv
[2]) == DICT_OK
;
6274 if (htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
6275 if (dictSize((dict
*)o
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
6277 if (deleted
) server
.dirty
++;
6278 addReply(c
,deleted
? shared
.cone
: shared
.czero
);
6281 static void hlenCommand(redisClient
*c
) {
6285 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6286 checkType(c
,o
,REDIS_HASH
)) return;
6288 len
= (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
6289 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
6290 addReplyUlong(c
,len
);
6293 #define REDIS_GETALL_KEYS 1
6294 #define REDIS_GETALL_VALS 2
6295 static void genericHgetallCommand(redisClient
*c
, int flags
) {
6297 unsigned long count
= 0;
6299 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6300 || checkType(c
,o
,REDIS_HASH
)) return;
6302 lenobj
= createObject(REDIS_STRING
,NULL
);
6304 decrRefCount(lenobj
);
6306 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6307 unsigned char *p
= zipmapRewind(o
->ptr
);
6308 unsigned char *field
, *val
;
6309 unsigned int flen
, vlen
;
6311 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
6314 if (flags
& REDIS_GETALL_KEYS
) {
6315 aux
= createStringObject((char*)field
,flen
);
6316 addReplyBulk(c
,aux
);
6320 if (flags
& REDIS_GETALL_VALS
) {
6321 aux
= createStringObject((char*)val
,vlen
);
6322 addReplyBulk(c
,aux
);
6328 dictIterator
*di
= dictGetIterator(o
->ptr
);
6331 while((de
= dictNext(di
)) != NULL
) {
6332 robj
*fieldobj
= dictGetEntryKey(de
);
6333 robj
*valobj
= dictGetEntryVal(de
);
6335 if (flags
& REDIS_GETALL_KEYS
) {
6336 addReplyBulk(c
,fieldobj
);
6339 if (flags
& REDIS_GETALL_VALS
) {
6340 addReplyBulk(c
,valobj
);
6344 dictReleaseIterator(di
);
6346 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
6349 static void hkeysCommand(redisClient
*c
) {
6350 genericHgetallCommand(c
,REDIS_GETALL_KEYS
);
6353 static void hvalsCommand(redisClient
*c
) {
6354 genericHgetallCommand(c
,REDIS_GETALL_VALS
);
6357 static void hgetallCommand(redisClient
*c
) {
6358 genericHgetallCommand(c
,REDIS_GETALL_KEYS
|REDIS_GETALL_VALS
);
6361 static void hexistsCommand(redisClient
*c
) {
6365 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6366 checkType(c
,o
,REDIS_HASH
)) return;
6368 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6370 unsigned char *zm
= o
->ptr
;
6372 field
= getDecodedObject(c
->argv
[2]);
6373 exists
= zipmapExists(zm
,field
->ptr
,sdslen(field
->ptr
));
6374 decrRefCount(field
);
6376 exists
= dictFind(o
->ptr
,c
->argv
[2]) != NULL
;
6378 addReply(c
,exists
? shared
.cone
: shared
.czero
);
6381 static void convertToRealHash(robj
*o
) {
6382 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
6383 unsigned int klen
, vlen
;
6384 dict
*dict
= dictCreate(&hashDictType
,NULL
);
6386 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
6387 p
= zipmapRewind(zm
);
6388 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
6389 robj
*keyobj
, *valobj
;
6391 keyobj
= createStringObject((char*)key
,klen
);
6392 valobj
= createStringObject((char*)val
,vlen
);
6393 keyobj
= tryObjectEncoding(keyobj
);
6394 valobj
= tryObjectEncoding(valobj
);
6395 dictAdd(dict
,keyobj
,valobj
);
6397 o
->encoding
= REDIS_ENCODING_HT
;
6402 /* ========================= Non type-specific commands ==================== */
6404 static void flushdbCommand(redisClient
*c
) {
6405 server
.dirty
+= dictSize(c
->db
->dict
);
6406 dictEmpty(c
->db
->dict
);
6407 dictEmpty(c
->db
->expires
);
6408 addReply(c
,shared
.ok
);
6411 static void flushallCommand(redisClient
*c
) {
6412 server
.dirty
+= emptyDb();
6413 addReply(c
,shared
.ok
);
6414 if (server
.bgsavechildpid
!= -1) {
6415 kill(server
.bgsavechildpid
,SIGKILL
);
6416 rdbRemoveTempFile(server
.bgsavechildpid
);
6418 rdbSave(server
.dbfilename
);
6422 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
6423 redisSortOperation
*so
= zmalloc(sizeof(*so
));
6425 so
->pattern
= pattern
;
6429 /* Return the value associated to the key with a name obtained
6430 * substituting the first occurence of '*' in 'pattern' with 'subst' */
6431 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
6435 int prefixlen
, sublen
, postfixlen
;
6436 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
6440 char buf
[REDIS_SORTKEY_MAX
+1];
6443 /* If the pattern is "#" return the substitution object itself in order
6444 * to implement the "SORT ... GET #" feature. */
6445 spat
= pattern
->ptr
;
6446 if (spat
[0] == '#' && spat
[1] == '\0') {
6450 /* The substitution object may be specially encoded. If so we create
6451 * a decoded object on the fly. Otherwise getDecodedObject will just
6452 * increment the ref count, that we'll decrement later. */
6453 subst
= getDecodedObject(subst
);
6456 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
6457 p
= strchr(spat
,'*');
6459 decrRefCount(subst
);
6464 sublen
= sdslen(ssub
);
6465 postfixlen
= sdslen(spat
)-(prefixlen
+1);
6466 memcpy(keyname
.buf
,spat
,prefixlen
);
6467 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
6468 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
6469 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
6470 keyname
.len
= prefixlen
+sublen
+postfixlen
;
6472 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2))
6473 decrRefCount(subst
);
6475 /* printf("lookup '%s' => %p\n", keyname.buf,de); */
6476 return lookupKeyRead(db
,&keyobj
);
6479 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
6480 * the additional parameter is not standard but a BSD-specific we have to
6481 * pass sorting parameters via the global 'server' structure */
6482 static int sortCompare(const void *s1
, const void *s2
) {
6483 const redisSortObject
*so1
= s1
, *so2
= s2
;
6486 if (!server
.sort_alpha
) {
6487 /* Numeric sorting. Here it's trivial as we precomputed scores */
6488 if (so1
->u
.score
> so2
->u
.score
) {
6490 } else if (so1
->u
.score
< so2
->u
.score
) {
6496 /* Alphanumeric sorting */
6497 if (server
.sort_bypattern
) {
6498 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
6499 /* At least one compare object is NULL */
6500 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
6502 else if (so1
->u
.cmpobj
== NULL
)
6507 /* We have both the objects, use strcoll */
6508 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
6511 /* Compare elements directly */
6514 dec1
= getDecodedObject(so1
->obj
);
6515 dec2
= getDecodedObject(so2
->obj
);
6516 cmp
= strcoll(dec1
->ptr
,dec2
->ptr
);
6521 return server
.sort_desc
? -cmp
: cmp
;
6524 /* The SORT command is the most complex command in Redis. Warning: this code
6525 * is optimized for speed and a bit less for readability */
6526 static void sortCommand(redisClient
*c
) {
6529 int desc
= 0, alpha
= 0;
6530 int limit_start
= 0, limit_count
= -1, start
, end
;
6531 int j
, dontsort
= 0, vectorlen
;
6532 int getop
= 0; /* GET operation counter */
6533 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
6534 redisSortObject
*vector
; /* Resulting vector to sort */
6536 /* Lookup the key to sort. It must be of the right types */
6537 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
6538 if (sortval
== NULL
) {
6539 addReply(c
,shared
.emptymultibulk
);
6542 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
6543 sortval
->type
!= REDIS_ZSET
)
6545 addReply(c
,shared
.wrongtypeerr
);
6549 /* Create a list of operations to perform for every sorted element.
6550 * Operations can be GET/DEL/INCR/DECR */
6551 operations
= listCreate();
6552 listSetFreeMethod(operations
,zfree
);
6555 /* Now we need to protect sortval incrementing its count, in the future
6556 * SORT may have options able to overwrite/delete keys during the sorting
6557 * and the sorted key itself may get destroied */
6558 incrRefCount(sortval
);
6560 /* The SORT command has an SQL-alike syntax, parse it */
6561 while(j
< c
->argc
) {
6562 int leftargs
= c
->argc
-j
-1;
6563 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
6565 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
6567 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
6569 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
6570 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
6571 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
6573 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
6574 storekey
= c
->argv
[j
+1];
6576 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
6577 sortby
= c
->argv
[j
+1];
6578 /* If the BY pattern does not contain '*', i.e. it is constant,
6579 * we don't need to sort nor to lookup the weight keys. */
6580 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
6582 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
6583 listAddNodeTail(operations
,createSortOperation(
6584 REDIS_SORT_GET
,c
->argv
[j
+1]));
6588 decrRefCount(sortval
);
6589 listRelease(operations
);
6590 addReply(c
,shared
.syntaxerr
);
6596 /* Load the sorting vector with all the objects to sort */
6597 switch(sortval
->type
) {
6598 case REDIS_LIST
: vectorlen
= listLength((list
*)sortval
->ptr
); break;
6599 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
6600 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
6601 default: vectorlen
= 0; redisAssert(0); /* Avoid GCC warning */
6603 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
6606 if (sortval
->type
== REDIS_LIST
) {
6607 list
*list
= sortval
->ptr
;
6611 listRewind(list
,&li
);
6612 while((ln
= listNext(&li
))) {
6613 robj
*ele
= ln
->value
;
6614 vector
[j
].obj
= ele
;
6615 vector
[j
].u
.score
= 0;
6616 vector
[j
].u
.cmpobj
= NULL
;
6624 if (sortval
->type
== REDIS_SET
) {
6627 zset
*zs
= sortval
->ptr
;
6631 di
= dictGetIterator(set
);
6632 while((setele
= dictNext(di
)) != NULL
) {
6633 vector
[j
].obj
= dictGetEntryKey(setele
);
6634 vector
[j
].u
.score
= 0;
6635 vector
[j
].u
.cmpobj
= NULL
;
6638 dictReleaseIterator(di
);
6640 redisAssert(j
== vectorlen
);
6642 /* Now it's time to load the right scores in the sorting vector */
6643 if (dontsort
== 0) {
6644 for (j
= 0; j
< vectorlen
; j
++) {
6648 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
6649 if (!byval
|| byval
->type
!= REDIS_STRING
) continue;
6651 vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
6653 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
6654 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
6656 /* Don't need to decode the object if it's
6657 * integer-encoded (the only encoding supported) so
6658 * far. We can just cast it */
6659 if (byval
->encoding
== REDIS_ENCODING_INT
) {
6660 vector
[j
].u
.score
= (long)byval
->ptr
;
6662 redisAssert(1 != 1);
6667 if (vector
[j
].obj
->encoding
== REDIS_ENCODING_RAW
)
6668 vector
[j
].u
.score
= strtod(vector
[j
].obj
->ptr
,NULL
);
6670 if (vector
[j
].obj
->encoding
== REDIS_ENCODING_INT
)
6671 vector
[j
].u
.score
= (long) vector
[j
].obj
->ptr
;
6673 redisAssert(1 != 1);
6680 /* We are ready to sort the vector... perform a bit of sanity check
6681 * on the LIMIT option too. We'll use a partial version of quicksort. */
6682 start
= (limit_start
< 0) ? 0 : limit_start
;
6683 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
6684 if (start
>= vectorlen
) {
6685 start
= vectorlen
-1;
6688 if (end
>= vectorlen
) end
= vectorlen
-1;
6690 if (dontsort
== 0) {
6691 server
.sort_desc
= desc
;
6692 server
.sort_alpha
= alpha
;
6693 server
.sort_bypattern
= sortby
? 1 : 0;
6694 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
6695 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
6697 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
6700 /* Send command output to the output buffer, performing the specified
6701 * GET/DEL/INCR/DECR operations if any. */
6702 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
6703 if (storekey
== NULL
) {
6704 /* STORE option not specified, sent the sorting result to client */
6705 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
6706 for (j
= start
; j
<= end
; j
++) {
6710 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
6711 listRewind(operations
,&li
);
6712 while((ln
= listNext(&li
))) {
6713 redisSortOperation
*sop
= ln
->value
;
6714 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6717 if (sop
->type
== REDIS_SORT_GET
) {
6718 if (!val
|| val
->type
!= REDIS_STRING
) {
6719 addReply(c
,shared
.nullbulk
);
6721 addReplyBulk(c
,val
);
6724 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6729 robj
*listObject
= createListObject();
6730 list
*listPtr
= (list
*) listObject
->ptr
;
6732 /* STORE option specified, set the sorting result as a List object */
6733 for (j
= start
; j
<= end
; j
++) {
6738 listAddNodeTail(listPtr
,vector
[j
].obj
);
6739 incrRefCount(vector
[j
].obj
);
6741 listRewind(operations
,&li
);
6742 while((ln
= listNext(&li
))) {
6743 redisSortOperation
*sop
= ln
->value
;
6744 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6747 if (sop
->type
== REDIS_SORT_GET
) {
6748 if (!val
|| val
->type
!= REDIS_STRING
) {
6749 listAddNodeTail(listPtr
,createStringObject("",0));
6751 listAddNodeTail(listPtr
,val
);
6755 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6759 if (dictReplace(c
->db
->dict
,storekey
,listObject
)) {
6760 incrRefCount(storekey
);
6762 /* Note: we add 1 because the DB is dirty anyway since even if the
6763 * SORT result is empty a new key is set and maybe the old content
6765 server
.dirty
+= 1+outputlen
;
6766 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
6770 decrRefCount(sortval
);
6771 listRelease(operations
);
6772 for (j
= 0; j
< vectorlen
; j
++) {
6773 if (sortby
&& alpha
&& vector
[j
].u
.cmpobj
)
6774 decrRefCount(vector
[j
].u
.cmpobj
);
6779 /* Convert an amount of bytes into a human readable string in the form
6780 * of 100B, 2G, 100M, 4K, and so forth. */
6781 static void bytesToHuman(char *s
, unsigned long long n
) {
6786 sprintf(s
,"%lluB",n
);
6788 } else if (n
< (1024*1024)) {
6789 d
= (double)n
/(1024);
6790 sprintf(s
,"%.2fK",d
);
6791 } else if (n
< (1024LL*1024*1024)) {
6792 d
= (double)n
/(1024*1024);
6793 sprintf(s
,"%.2fM",d
);
6794 } else if (n
< (1024LL*1024*1024*1024)) {
6795 d
= (double)n
/(1024LL*1024*1024);
6796 sprintf(s
,"%.2fG",d
);
6800 /* Create the string returned by the INFO command. This is decoupled
6801 * by the INFO command itself as we need to report the same information
6802 * on memory corruption problems. */
6803 static sds
genRedisInfoString(void) {
6805 time_t uptime
= time(NULL
)-server
.stat_starttime
;
6809 bytesToHuman(hmem
,zmalloc_used_memory());
6810 info
= sdscatprintf(sdsempty(),
6811 "redis_version:%s\r\n"
6813 "multiplexing_api:%s\r\n"
6814 "process_id:%ld\r\n"
6815 "uptime_in_seconds:%ld\r\n"
6816 "uptime_in_days:%ld\r\n"
6817 "connected_clients:%d\r\n"
6818 "connected_slaves:%d\r\n"
6819 "blocked_clients:%d\r\n"
6820 "used_memory:%zu\r\n"
6821 "used_memory_human:%s\r\n"
6822 "changes_since_last_save:%lld\r\n"
6823 "bgsave_in_progress:%d\r\n"
6824 "last_save_time:%ld\r\n"
6825 "bgrewriteaof_in_progress:%d\r\n"
6826 "total_connections_received:%lld\r\n"
6827 "total_commands_processed:%lld\r\n"
6828 "expired_keys:%lld\r\n"
6829 "hash_max_zipmap_entries:%ld\r\n"
6830 "hash_max_zipmap_value:%ld\r\n"
6831 "pubsub_channels:%ld\r\n"
6832 "pubsub_patterns:%u\r\n"
6836 (sizeof(long) == 8) ? "64" : "32",
6841 listLength(server
.clients
)-listLength(server
.slaves
),
6842 listLength(server
.slaves
),
6843 server
.blpop_blocked_clients
,
6844 zmalloc_used_memory(),
6847 server
.bgsavechildpid
!= -1,
6849 server
.bgrewritechildpid
!= -1,
6850 server
.stat_numconnections
,
6851 server
.stat_numcommands
,
6852 server
.stat_expiredkeys
,
6853 server
.hash_max_zipmap_entries
,
6854 server
.hash_max_zipmap_value
,
6855 dictSize(server
.pubsub_channels
),
6856 listLength(server
.pubsub_patterns
),
6857 server
.vm_enabled
!= 0,
6858 server
.masterhost
== NULL
? "master" : "slave"
6860 if (server
.masterhost
) {
6861 info
= sdscatprintf(info
,
6862 "master_host:%s\r\n"
6863 "master_port:%d\r\n"
6864 "master_link_status:%s\r\n"
6865 "master_last_io_seconds_ago:%d\r\n"
6868 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
6870 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
6873 if (server
.vm_enabled
) {
6875 info
= sdscatprintf(info
,
6876 "vm_conf_max_memory:%llu\r\n"
6877 "vm_conf_page_size:%llu\r\n"
6878 "vm_conf_pages:%llu\r\n"
6879 "vm_stats_used_pages:%llu\r\n"
6880 "vm_stats_swapped_objects:%llu\r\n"
6881 "vm_stats_swappin_count:%llu\r\n"
6882 "vm_stats_swappout_count:%llu\r\n"
6883 "vm_stats_io_newjobs_len:%lu\r\n"
6884 "vm_stats_io_processing_len:%lu\r\n"
6885 "vm_stats_io_processed_len:%lu\r\n"
6886 "vm_stats_io_active_threads:%lu\r\n"
6887 "vm_stats_blocked_clients:%lu\r\n"
6888 ,(unsigned long long) server
.vm_max_memory
,
6889 (unsigned long long) server
.vm_page_size
,
6890 (unsigned long long) server
.vm_pages
,
6891 (unsigned long long) server
.vm_stats_used_pages
,
6892 (unsigned long long) server
.vm_stats_swapped_objects
,
6893 (unsigned long long) server
.vm_stats_swapins
,
6894 (unsigned long long) server
.vm_stats_swapouts
,
6895 (unsigned long) listLength(server
.io_newjobs
),
6896 (unsigned long) listLength(server
.io_processing
),
6897 (unsigned long) listLength(server
.io_processed
),
6898 (unsigned long) server
.io_active_threads
,
6899 (unsigned long) server
.vm_blocked_clients
6903 for (j
= 0; j
< server
.dbnum
; j
++) {
6904 long long keys
, vkeys
;
6906 keys
= dictSize(server
.db
[j
].dict
);
6907 vkeys
= dictSize(server
.db
[j
].expires
);
6908 if (keys
|| vkeys
) {
6909 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
6916 static void infoCommand(redisClient
*c
) {
6917 sds info
= genRedisInfoString();
6918 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
6919 (unsigned long)sdslen(info
)));
6920 addReplySds(c
,info
);
6921 addReply(c
,shared
.crlf
);
6924 static void monitorCommand(redisClient
*c
) {
6925 /* ignore MONITOR if aleady slave or in monitor mode */
6926 if (c
->flags
& REDIS_SLAVE
) return;
6928 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
6930 listAddNodeTail(server
.monitors
,c
);
6931 addReply(c
,shared
.ok
);
6934 /* ================================= Expire ================================= */
6935 static int removeExpire(redisDb
*db
, robj
*key
) {
6936 if (dictDelete(db
->expires
,key
) == DICT_OK
) {
6943 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
6944 if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) {
6952 /* Return the expire time of the specified key, or -1 if no expire
6953 * is associated with this key (i.e. the key is non volatile) */
6954 static time_t getExpire(redisDb
*db
, robj
*key
) {
6957 /* No expire? return ASAP */
6958 if (dictSize(db
->expires
) == 0 ||
6959 (de
= dictFind(db
->expires
,key
)) == NULL
) return -1;
6961 return (time_t) dictGetEntryVal(de
);
6964 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
6968 /* No expire? return ASAP */
6969 if (dictSize(db
->expires
) == 0 ||
6970 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
6972 /* Lookup the expire */
6973 when
= (time_t) dictGetEntryVal(de
);
6974 if (time(NULL
) <= when
) return 0;
6976 /* Delete the key */
6977 dictDelete(db
->expires
,key
);
6978 server
.stat_expiredkeys
++;
6979 return dictDelete(db
->dict
,key
) == DICT_OK
;
6982 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
6985 /* No expire? return ASAP */
6986 if (dictSize(db
->expires
) == 0 ||
6987 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
6989 /* Delete the key */
6991 server
.stat_expiredkeys
++;
6992 dictDelete(db
->expires
,key
);
6993 return dictDelete(db
->dict
,key
) == DICT_OK
;
6996 static void expireGenericCommand(redisClient
*c
, robj
*key
, robj
*param
, long offset
) {
7000 if (getLongFromObject(c
, param
, &seconds
) != REDIS_OK
) return;
7004 de
= dictFind(c
->db
->dict
,key
);
7006 addReply(c
,shared
.czero
);
7010 if (deleteKey(c
->db
,key
)) server
.dirty
++;
7011 addReply(c
, shared
.cone
);
7014 time_t when
= time(NULL
)+seconds
;
7015 if (setExpire(c
->db
,key
,when
)) {
7016 addReply(c
,shared
.cone
);
7019 addReply(c
,shared
.czero
);
7025 static void expireCommand(redisClient
*c
) {
7026 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],0);
7029 static void expireatCommand(redisClient
*c
) {
7030 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],time(NULL
));
7033 static void ttlCommand(redisClient
*c
) {
7037 expire
= getExpire(c
->db
,c
->argv
[1]);
7039 ttl
= (int) (expire
-time(NULL
));
7040 if (ttl
< 0) ttl
= -1;
7042 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
7045 /* ================================ MULTI/EXEC ============================== */
7047 /* Client state initialization for MULTI/EXEC */
7048 static void initClientMultiState(redisClient
*c
) {
7049 c
->mstate
.commands
= NULL
;
7050 c
->mstate
.count
= 0;
7053 /* Release all the resources associated with MULTI/EXEC state */
7054 static void freeClientMultiState(redisClient
*c
) {
7057 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7059 multiCmd
*mc
= c
->mstate
.commands
+j
;
7061 for (i
= 0; i
< mc
->argc
; i
++)
7062 decrRefCount(mc
->argv
[i
]);
7065 zfree(c
->mstate
.commands
);
7068 /* Add a new command into the MULTI commands queue */
7069 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
7073 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
7074 sizeof(multiCmd
)*(c
->mstate
.count
+1));
7075 mc
= c
->mstate
.commands
+c
->mstate
.count
;
7078 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
7079 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
7080 for (j
= 0; j
< c
->argc
; j
++)
7081 incrRefCount(mc
->argv
[j
]);
7085 static void multiCommand(redisClient
*c
) {
7086 c
->flags
|= REDIS_MULTI
;
7087 addReply(c
,shared
.ok
);
7090 static void discardCommand(redisClient
*c
) {
7091 if (!(c
->flags
& REDIS_MULTI
)) {
7092 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
7096 freeClientMultiState(c
);
7097 initClientMultiState(c
);
7098 c
->flags
&= (~REDIS_MULTI
);
7099 addReply(c
,shared
.ok
);
7102 static void execCommand(redisClient
*c
) {
7107 if (!(c
->flags
& REDIS_MULTI
)) {
7108 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
7112 orig_argv
= c
->argv
;
7113 orig_argc
= c
->argc
;
7114 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
7115 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7116 c
->argc
= c
->mstate
.commands
[j
].argc
;
7117 c
->argv
= c
->mstate
.commands
[j
].argv
;
7118 call(c
,c
->mstate
.commands
[j
].cmd
);
7120 c
->argv
= orig_argv
;
7121 c
->argc
= orig_argc
;
7122 freeClientMultiState(c
);
7123 initClientMultiState(c
);
7124 c
->flags
&= (~REDIS_MULTI
);
7127 /* =========================== Blocking Operations ========================= */
7129 /* Currently Redis blocking operations support is limited to list POP ops,
7130 * so the current implementation is not fully generic, but it is also not
7131 * completely specific so it will not require a rewrite to support new
7132 * kind of blocking operations in the future.
7134 * Still it's important to note that list blocking operations can be already
7135 * used as a notification mechanism in order to implement other blocking
7136 * operations at application level, so there must be a very strong evidence
7137 * of usefulness and generality before new blocking operations are implemented.
7139 * This is how the current blocking POP works, we use BLPOP as example:
7140 * - If the user calls BLPOP and the key exists and contains a non empty list
7141 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
7142 * if there is not to block.
7143 * - If instead BLPOP is called and the key does not exists or the list is
7144 * empty we need to block. In order to do so we remove the notification for
7145 * new data to read in the client socket (so that we'll not serve new
7146 * requests if the blocking request is not served). Also we put the client
7147 * in a dictionary (db->blockingkeys) mapping keys to a list of clients
7148 * blocking for this keys.
7149 * - If a PUSH operation against a key with blocked clients waiting is
7150 * performed, we serve the first in the list: basically instead to push
7151 * the new element inside the list we return it to the (first / oldest)
7152 * blocking client, unblock the client, and remove it form the list.
7154 * The above comment and the source code should be enough in order to understand
7155 * the implementation and modify / fix it later.
7158 /* Set a client in blocking mode for the specified key, with the specified
7160 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
7165 c
->blockingkeys
= zmalloc(sizeof(robj
*)*numkeys
);
7166 c
->blockingkeysnum
= numkeys
;
7167 c
->blockingto
= timeout
;
7168 for (j
= 0; j
< numkeys
; j
++) {
7169 /* Add the key in the client structure, to map clients -> keys */
7170 c
->blockingkeys
[j
] = keys
[j
];
7171 incrRefCount(keys
[j
]);
7173 /* And in the other "side", to map keys -> clients */
7174 de
= dictFind(c
->db
->blockingkeys
,keys
[j
]);
7178 /* For every key we take a list of clients blocked for it */
7180 retval
= dictAdd(c
->db
->blockingkeys
,keys
[j
],l
);
7181 incrRefCount(keys
[j
]);
7182 assert(retval
== DICT_OK
);
7184 l
= dictGetEntryVal(de
);
7186 listAddNodeTail(l
,c
);
7188 /* Mark the client as a blocked client */
7189 c
->flags
|= REDIS_BLOCKED
;
7190 server
.blpop_blocked_clients
++;
7193 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
7194 static void unblockClientWaitingData(redisClient
*c
) {
7199 assert(c
->blockingkeys
!= NULL
);
7200 /* The client may wait for multiple keys, so unblock it for every key. */
7201 for (j
= 0; j
< c
->blockingkeysnum
; j
++) {
7202 /* Remove this client from the list of clients waiting for this key. */
7203 de
= dictFind(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7205 l
= dictGetEntryVal(de
);
7206 listDelNode(l
,listSearchKey(l
,c
));
7207 /* If the list is empty we need to remove it to avoid wasting memory */
7208 if (listLength(l
) == 0)
7209 dictDelete(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7210 decrRefCount(c
->blockingkeys
[j
]);
7212 /* Cleanup the client structure */
7213 zfree(c
->blockingkeys
);
7214 c
->blockingkeys
= NULL
;
7215 c
->flags
&= (~REDIS_BLOCKED
);
7216 server
.blpop_blocked_clients
--;
7217 /* We want to process data if there is some command waiting
7218 * in the input buffer. Note that this is safe even if
7219 * unblockClientWaitingData() gets called from freeClient() because
7220 * freeClient() will be smart enough to call this function
7221 * *after* c->querybuf was set to NULL. */
7222 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
7225 /* This should be called from any function PUSHing into lists.
7226 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
7227 * 'ele' is the element pushed.
7229 * If the function returns 0 there was no client waiting for a list push
7232 * If the function returns 1 there was a client waiting for a list push
7233 * against this key, the element was passed to this client thus it's not
7234 * needed to actually add it to the list and the caller should return asap. */
7235 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
7236 struct dictEntry
*de
;
7237 redisClient
*receiver
;
7241 de
= dictFind(c
->db
->blockingkeys
,key
);
7242 if (de
== NULL
) return 0;
7243 l
= dictGetEntryVal(de
);
7246 receiver
= ln
->value
;
7248 addReplySds(receiver
,sdsnew("*2\r\n"));
7249 addReplyBulk(receiver
,key
);
7250 addReplyBulk(receiver
,ele
);
7251 unblockClientWaitingData(receiver
);
7255 /* Blocking RPOP/LPOP */
7256 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
7261 for (j
= 1; j
< c
->argc
-1; j
++) {
7262 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
7264 if (o
->type
!= REDIS_LIST
) {
7265 addReply(c
,shared
.wrongtypeerr
);
7268 list
*list
= o
->ptr
;
7269 if (listLength(list
) != 0) {
7270 /* If the list contains elements fall back to the usual
7271 * non-blocking POP operation */
7272 robj
*argv
[2], **orig_argv
;
7275 /* We need to alter the command arguments before to call
7276 * popGenericCommand() as the command takes a single key. */
7277 orig_argv
= c
->argv
;
7278 orig_argc
= c
->argc
;
7279 argv
[1] = c
->argv
[j
];
7283 /* Also the return value is different, we need to output
7284 * the multi bulk reply header and the key name. The
7285 * "real" command will add the last element (the value)
7286 * for us. If this souds like an hack to you it's just
7287 * because it is... */
7288 addReplySds(c
,sdsnew("*2\r\n"));
7289 addReplyBulk(c
,argv
[1]);
7290 popGenericCommand(c
,where
);
7292 /* Fix the client structure with the original stuff */
7293 c
->argv
= orig_argv
;
7294 c
->argc
= orig_argc
;
7300 /* If the list is empty or the key does not exists we must block */
7301 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
7302 if (timeout
> 0) timeout
+= time(NULL
);
7303 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
7306 static void blpopCommand(redisClient
*c
) {
7307 blockingPopGenericCommand(c
,REDIS_HEAD
);
7310 static void brpopCommand(redisClient
*c
) {
7311 blockingPopGenericCommand(c
,REDIS_TAIL
);
7314 /* =============================== Replication ============================= */
7316 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7317 ssize_t nwritten
, ret
= size
;
7318 time_t start
= time(NULL
);
7322 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
7323 nwritten
= write(fd
,ptr
,size
);
7324 if (nwritten
== -1) return -1;
7328 if ((time(NULL
)-start
) > timeout
) {
7336 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7337 ssize_t nread
, totread
= 0;
7338 time_t start
= time(NULL
);
7342 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
7343 nread
= read(fd
,ptr
,size
);
7344 if (nread
== -1) return -1;
7349 if ((time(NULL
)-start
) > timeout
) {
7357 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7364 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
7367 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
7378 static void syncCommand(redisClient
*c
) {
7379 /* ignore SYNC if aleady slave or in monitor mode */
7380 if (c
->flags
& REDIS_SLAVE
) return;
7382 /* SYNC can't be issued when the server has pending data to send to
7383 * the client about already issued commands. We need a fresh reply
7384 * buffer registering the differences between the BGSAVE and the current
7385 * dataset, so that we can copy to other slaves if needed. */
7386 if (listLength(c
->reply
) != 0) {
7387 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
7391 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
7392 /* Here we need to check if there is a background saving operation
7393 * in progress, or if it is required to start one */
7394 if (server
.bgsavechildpid
!= -1) {
7395 /* Ok a background save is in progress. Let's check if it is a good
7396 * one for replication, i.e. if there is another slave that is
7397 * registering differences since the server forked to save */
7402 listRewind(server
.slaves
,&li
);
7403 while((ln
= listNext(&li
))) {
7405 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
7408 /* Perfect, the server is already registering differences for
7409 * another slave. Set the right state, and copy the buffer. */
7410 listRelease(c
->reply
);
7411 c
->reply
= listDup(slave
->reply
);
7412 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7413 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
7415 /* No way, we need to wait for the next BGSAVE in order to
7416 * register differences */
7417 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7418 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
7421 /* Ok we don't have a BGSAVE in progress, let's start one */
7422 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
7423 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7424 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
7425 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
7428 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7431 c
->flags
|= REDIS_SLAVE
;
7433 listAddNodeTail(server
.slaves
,c
);
7437 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
7438 redisClient
*slave
= privdata
;
7440 REDIS_NOTUSED(mask
);
7441 char buf
[REDIS_IOBUF_LEN
];
7442 ssize_t nwritten
, buflen
;
7444 if (slave
->repldboff
== 0) {
7445 /* Write the bulk write count before to transfer the DB. In theory here
7446 * we don't know how much room there is in the output buffer of the
7447 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
7448 * operations) will never be smaller than the few bytes we need. */
7451 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
7453 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
7461 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
7462 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
7464 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
7465 (buflen
== 0) ? "premature EOF" : strerror(errno
));
7469 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
7470 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
7475 slave
->repldboff
+= nwritten
;
7476 if (slave
->repldboff
== slave
->repldbsize
) {
7477 close(slave
->repldbfd
);
7478 slave
->repldbfd
= -1;
7479 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7480 slave
->replstate
= REDIS_REPL_ONLINE
;
7481 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
7482 sendReplyToClient
, slave
) == AE_ERR
) {
7486 addReplySds(slave
,sdsempty());
7487 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
7491 /* This function is called at the end of every backgrond saving.
7492 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
7493 * otherwise REDIS_ERR is passed to the function.
7495 * The goal of this function is to handle slaves waiting for a successful
7496 * background saving in order to perform non-blocking synchronization. */
7497 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
7499 int startbgsave
= 0;
7502 listRewind(server
.slaves
,&li
);
7503 while((ln
= listNext(&li
))) {
7504 redisClient
*slave
= ln
->value
;
7506 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
7508 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7509 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
7510 struct redis_stat buf
;
7512 if (bgsaveerr
!= REDIS_OK
) {
7514 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
7517 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
7518 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
7520 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
7523 slave
->repldboff
= 0;
7524 slave
->repldbsize
= buf
.st_size
;
7525 slave
->replstate
= REDIS_REPL_SEND_BULK
;
7526 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7527 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
7534 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7537 listRewind(server
.slaves
,&li
);
7538 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
7539 while((ln
= listNext(&li
))) {
7540 redisClient
*slave
= ln
->value
;
7542 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
7549 static int syncWithMaster(void) {
7550 char buf
[1024], tmpfile
[256], authcmd
[1024];
7552 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
7553 int dfd
, maxtries
= 5;
7556 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
7561 /* AUTH with the master if required. */
7562 if(server
.masterauth
) {
7563 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
7564 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
7566 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
7570 /* Read the AUTH result. */
7571 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7573 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
7577 if (buf
[0] != '+') {
7579 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
7584 /* Issue the SYNC command */
7585 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
7587 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
7591 /* Read the bulk write count */
7592 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7594 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
7598 if (buf
[0] != '$') {
7600 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
7603 dumpsize
= strtol(buf
+1,NULL
,10);
7604 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
7605 /* Read the bulk write data on a temp file */
7607 snprintf(tmpfile
,256,
7608 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
7609 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
7610 if (dfd
!= -1) break;
7615 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
7619 int nread
, nwritten
;
7621 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
7623 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
7629 nwritten
= write(dfd
,buf
,nread
);
7630 if (nwritten
== -1) {
7631 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
7639 if (rename(tmpfile
,server
.dbfilename
) == -1) {
7640 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
7646 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
7647 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
7651 server
.master
= createClient(fd
);
7652 server
.master
->flags
|= REDIS_MASTER
;
7653 server
.master
->authenticated
= 1;
7654 server
.replstate
= REDIS_REPL_CONNECTED
;
7658 static void slaveofCommand(redisClient
*c
) {
7659 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
7660 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
7661 if (server
.masterhost
) {
7662 sdsfree(server
.masterhost
);
7663 server
.masterhost
= NULL
;
7664 if (server
.master
) freeClient(server
.master
);
7665 server
.replstate
= REDIS_REPL_NONE
;
7666 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
7669 sdsfree(server
.masterhost
);
7670 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
7671 server
.masterport
= atoi(c
->argv
[2]->ptr
);
7672 if (server
.master
) freeClient(server
.master
);
7673 server
.replstate
= REDIS_REPL_CONNECT
;
7674 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
7675 server
.masterhost
, server
.masterport
);
7677 addReply(c
,shared
.ok
);
7680 /* ============================ Maxmemory directive ======================== */
7682 /* Try to free one object form the pre-allocated objects free list.
7683 * This is useful under low mem conditions as by default we take 1 million
7684 * free objects allocated. On success REDIS_OK is returned, otherwise
7686 static int tryFreeOneObjectFromFreelist(void) {
7689 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
7690 if (listLength(server
.objfreelist
)) {
7691 listNode
*head
= listFirst(server
.objfreelist
);
7692 o
= listNodeValue(head
);
7693 listDelNode(server
.objfreelist
,head
);
7694 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7698 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7703 /* This function gets called when 'maxmemory' is set on the config file to limit
7704 * the max memory used by the server, and we are out of memory.
7705 * This function will try to, in order:
7707 * - Free objects from the free list
7708 * - Try to remove keys with an EXPIRE set
7710 * It is not possible to free enough memory to reach used-memory < maxmemory
7711 * the server will start refusing commands that will enlarge even more the
7714 static void freeMemoryIfNeeded(void) {
7715 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
7716 int j
, k
, freed
= 0;
7718 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
7719 for (j
= 0; j
< server
.dbnum
; j
++) {
7721 robj
*minkey
= NULL
;
7722 struct dictEntry
*de
;
7724 if (dictSize(server
.db
[j
].expires
)) {
7726 /* From a sample of three keys drop the one nearest to
7727 * the natural expire */
7728 for (k
= 0; k
< 3; k
++) {
7731 de
= dictGetRandomKey(server
.db
[j
].expires
);
7732 t
= (time_t) dictGetEntryVal(de
);
7733 if (minttl
== -1 || t
< minttl
) {
7734 minkey
= dictGetEntryKey(de
);
7738 deleteKey(server
.db
+j
,minkey
);
7741 if (!freed
) return; /* nothing to free... */
7745 /* ============================== Append Only file ========================== */
7747 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
7748 sds buf
= sdsempty();
7754 /* The DB this command was targetting is not the same as the last command
7755 * we appendend. To issue a SELECT command is needed. */
7756 if (dictid
!= server
.appendseldb
) {
7759 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
7760 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
7761 (unsigned long)strlen(seldb
),seldb
);
7762 server
.appendseldb
= dictid
;
7765 /* "Fix" the argv vector if the command is EXPIRE. We want to translate
7766 * EXPIREs into EXPIREATs calls */
7767 if (cmd
->proc
== expireCommand
) {
7770 tmpargv
[0] = createStringObject("EXPIREAT",8);
7771 tmpargv
[1] = argv
[1];
7772 incrRefCount(argv
[1]);
7773 when
= time(NULL
)+strtol(argv
[2]->ptr
,NULL
,10);
7774 tmpargv
[2] = createObject(REDIS_STRING
,
7775 sdscatprintf(sdsempty(),"%ld",when
));
7779 /* Append the actual command */
7780 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
7781 for (j
= 0; j
< argc
; j
++) {
7784 o
= getDecodedObject(o
);
7785 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
7786 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
7787 buf
= sdscatlen(buf
,"\r\n",2);
7791 /* Free the objects from the modified argv for EXPIREAT */
7792 if (cmd
->proc
== expireCommand
) {
7793 for (j
= 0; j
< 3; j
++)
7794 decrRefCount(argv
[j
]);
7797 /* We want to perform a single write. This should be guaranteed atomic
7798 * at least if the filesystem we are writing is a real physical one.
7799 * While this will save us against the server being killed I don't think
7800 * there is much to do about the whole server stopping for power problems
7802 nwritten
= write(server
.appendfd
,buf
,sdslen(buf
));
7803 if (nwritten
!= (signed)sdslen(buf
)) {
7804 /* Ooops, we are in troubles. The best thing to do for now is
7805 * to simply exit instead to give the illusion that everything is
7806 * working as expected. */
7807 if (nwritten
== -1) {
7808 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
7810 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
7814 /* If a background append only file rewriting is in progress we want to
7815 * accumulate the differences between the child DB and the current one
7816 * in a buffer, so that when the child process will do its work we
7817 * can append the differences to the new append only file. */
7818 if (server
.bgrewritechildpid
!= -1)
7819 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
7823 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
7824 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
7825 now
-server
.lastfsync
> 1))
7827 fsync(server
.appendfd
); /* Let's try to get this data on the disk */
7828 server
.lastfsync
= now
;
7832 /* In Redis commands are always executed in the context of a client, so in
7833 * order to load the append only file we need to create a fake client. */
7834 static struct redisClient
*createFakeClient(void) {
7835 struct redisClient
*c
= zmalloc(sizeof(*c
));
7839 c
->querybuf
= sdsempty();
7843 /* We set the fake client as a slave waiting for the synchronization
7844 * so that Redis will not try to send replies to this client. */
7845 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7846 c
->reply
= listCreate();
7847 listSetFreeMethod(c
->reply
,decrRefCount
);
7848 listSetDupMethod(c
->reply
,dupClientReplyValue
);
7852 static void freeFakeClient(struct redisClient
*c
) {
7853 sdsfree(c
->querybuf
);
7854 listRelease(c
->reply
);
7858 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
7859 * error (the append only file is zero-length) REDIS_ERR is returned. On
7860 * fatal error an error message is logged and the program exists. */
7861 int loadAppendOnlyFile(char *filename
) {
7862 struct redisClient
*fakeClient
;
7863 FILE *fp
= fopen(filename
,"r");
7864 struct redis_stat sb
;
7865 unsigned long long loadedkeys
= 0;
7867 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
7871 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
7875 fakeClient
= createFakeClient();
7882 struct redisCommand
*cmd
;
7884 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
7890 if (buf
[0] != '*') goto fmterr
;
7892 argv
= zmalloc(sizeof(robj
*)*argc
);
7893 for (j
= 0; j
< argc
; j
++) {
7894 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
7895 if (buf
[0] != '$') goto fmterr
;
7896 len
= strtol(buf
+1,NULL
,10);
7897 argsds
= sdsnewlen(NULL
,len
);
7898 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
7899 argv
[j
] = createObject(REDIS_STRING
,argsds
);
7900 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
7903 /* Command lookup */
7904 cmd
= lookupCommand(argv
[0]->ptr
);
7906 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
7909 /* Try object encoding */
7910 if (cmd
->flags
& REDIS_CMD_BULK
)
7911 argv
[argc
-1] = tryObjectEncoding(argv
[argc
-1]);
7912 /* Run the command in the context of a fake client */
7913 fakeClient
->argc
= argc
;
7914 fakeClient
->argv
= argv
;
7915 cmd
->proc(fakeClient
);
7916 /* Discard the reply objects list from the fake client */
7917 while(listLength(fakeClient
->reply
))
7918 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
7919 /* Clean up, ready for the next command */
7920 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
7922 /* Handle swapping while loading big datasets when VM is on */
7924 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
7925 while (zmalloc_used_memory() > server
.vm_max_memory
) {
7926 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
7931 freeFakeClient(fakeClient
);
7936 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
7938 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
7942 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
7946 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
7947 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
7951 /* Avoid the incr/decr ref count business if possible to help
7952 * copy-on-write (we are often in a child process when this function
7954 * Also makes sure that key objects don't get incrRefCount-ed when VM
7956 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
7957 obj
= getDecodedObject(obj
);
7960 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
));
7961 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
;
7962 if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0)
7964 if (fwrite("\r\n",2,1,fp
) == 0) goto err
;
7965 if (decrrc
) decrRefCount(obj
);
7968 if (decrrc
) decrRefCount(obj
);
7972 /* Write binary-safe string into a file in the bulkformat
7973 * $<count>\r\n<payload>\r\n */
7974 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
7977 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
);
7978 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7979 if (len
&& fwrite(s
,len
,1,fp
) == 0) return 0;
7980 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
7984 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
7985 static int fwriteBulkDouble(FILE *fp
, double d
) {
7986 char buf
[128], dbuf
[128];
7988 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
7989 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
7990 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7991 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
7995 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
7996 static int fwriteBulkLong(FILE *fp
, long l
) {
7997 char buf
[128], lbuf
[128];
7999 snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
);
8000 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2);
8001 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8002 if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0;
8006 /* Write a sequence of commands able to fully rebuild the dataset into
8007 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
8008 static int rewriteAppendOnlyFile(char *filename
) {
8009 dictIterator
*di
= NULL
;
8014 time_t now
= time(NULL
);
8016 /* Note that we have to use a different temp name here compared to the
8017 * one used by rewriteAppendOnlyFileBackground() function. */
8018 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
8019 fp
= fopen(tmpfile
,"w");
8021 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
8024 for (j
= 0; j
< server
.dbnum
; j
++) {
8025 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
8026 redisDb
*db
= server
.db
+j
;
8028 if (dictSize(d
) == 0) continue;
8029 di
= dictGetIterator(d
);
8035 /* SELECT the new DB */
8036 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
8037 if (fwriteBulkLong(fp
,j
) == 0) goto werr
;
8039 /* Iterate this DB writing every entry */
8040 while((de
= dictNext(di
)) != NULL
) {
8045 key
= dictGetEntryKey(de
);
8046 /* If the value for this key is swapped, load a preview in memory.
8047 * We use a "swapped" flag to remember if we need to free the
8048 * value object instead to just increment the ref count anyway
8049 * in order to avoid copy-on-write of pages if we are forked() */
8050 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
8051 key
->storage
== REDIS_VM_SWAPPING
) {
8052 o
= dictGetEntryVal(de
);
8055 o
= vmPreviewObject(key
);
8058 expiretime
= getExpire(db
,key
);
8060 /* Save the key and associated value */
8061 if (o
->type
== REDIS_STRING
) {
8062 /* Emit a SET command */
8063 char cmd
[]="*3\r\n$3\r\nSET\r\n";
8064 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8066 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8067 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
8068 } else if (o
->type
== REDIS_LIST
) {
8069 /* Emit the RPUSHes needed to rebuild the list */
8070 list
*list
= o
->ptr
;
8074 listRewind(list
,&li
);
8075 while((ln
= listNext(&li
))) {
8076 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
8077 robj
*eleobj
= listNodeValue(ln
);
8079 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8080 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8081 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8083 } else if (o
->type
== REDIS_SET
) {
8084 /* Emit the SADDs needed to rebuild the set */
8086 dictIterator
*di
= dictGetIterator(set
);
8089 while((de
= dictNext(di
)) != NULL
) {
8090 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
8091 robj
*eleobj
= dictGetEntryKey(de
);
8093 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8094 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8095 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8097 dictReleaseIterator(di
);
8098 } else if (o
->type
== REDIS_ZSET
) {
8099 /* Emit the ZADDs needed to rebuild the sorted set */
8101 dictIterator
*di
= dictGetIterator(zs
->dict
);
8104 while((de
= dictNext(di
)) != NULL
) {
8105 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
8106 robj
*eleobj
= dictGetEntryKey(de
);
8107 double *score
= dictGetEntryVal(de
);
8109 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8110 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8111 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
8112 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8114 dictReleaseIterator(di
);
8115 } else if (o
->type
== REDIS_HASH
) {
8116 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
8118 /* Emit the HSETs needed to rebuild the hash */
8119 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8120 unsigned char *p
= zipmapRewind(o
->ptr
);
8121 unsigned char *field
, *val
;
8122 unsigned int flen
, vlen
;
8124 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
8125 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8126 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8127 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
8129 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
8133 dictIterator
*di
= dictGetIterator(o
->ptr
);
8136 while((de
= dictNext(di
)) != NULL
) {
8137 robj
*field
= dictGetEntryKey(de
);
8138 robj
*val
= dictGetEntryVal(de
);
8140 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8141 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8142 if (fwriteBulkObject(fp
,field
) == -1) return -1;
8143 if (fwriteBulkObject(fp
,val
) == -1) return -1;
8145 dictReleaseIterator(di
);
8150 /* Save the expire time */
8151 if (expiretime
!= -1) {
8152 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
8153 /* If this key is already expired skip it */
8154 if (expiretime
< now
) continue;
8155 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8156 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8157 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
;
8159 if (swapped
) decrRefCount(o
);
8161 dictReleaseIterator(di
);
8164 /* Make sure data will not remain on the OS's output buffers */
8169 /* Use RENAME to make sure the DB file is changed atomically only
8170 * if the generate DB file is ok. */
8171 if (rename(tmpfile
,filename
) == -1) {
8172 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
8176 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
8182 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
8183 if (di
) dictReleaseIterator(di
);
8187 /* This is how rewriting of the append only file in background works:
8189 * 1) The user calls BGREWRITEAOF
8190 * 2) Redis calls this function, that forks():
8191 * 2a) the child rewrite the append only file in a temp file.
8192 * 2b) the parent accumulates differences in server.bgrewritebuf.
8193 * 3) When the child finished '2a' exists.
8194 * 4) The parent will trap the exit code, if it's OK, will append the
8195 * data accumulated into server.bgrewritebuf into the temp file, and
8196 * finally will rename(2) the temp file in the actual file name.
8197 * The the new file is reopened as the new append only file. Profit!
8199 static int rewriteAppendOnlyFileBackground(void) {
8202 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
8203 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
8204 if ((childpid
= fork()) == 0) {
8208 if (server
.vm_enabled
) vmReopenSwapFile();
8210 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
8211 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
8218 if (childpid
== -1) {
8219 redisLog(REDIS_WARNING
,
8220 "Can't rewrite append only file in background: fork: %s",
8224 redisLog(REDIS_NOTICE
,
8225 "Background append only file rewriting started by pid %d",childpid
);
8226 server
.bgrewritechildpid
= childpid
;
8227 updateDictResizePolicy();
8228 /* We set appendseldb to -1 in order to force the next call to the
8229 * feedAppendOnlyFile() to issue a SELECT command, so the differences
8230 * accumulated by the parent into server.bgrewritebuf will start
8231 * with a SELECT statement and it will be safe to merge. */
8232 server
.appendseldb
= -1;
8235 return REDIS_OK
; /* unreached */
8238 static void bgrewriteaofCommand(redisClient
*c
) {
8239 if (server
.bgrewritechildpid
!= -1) {
8240 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
8243 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
8244 char *status
= "+Background append only file rewriting started\r\n";
8245 addReplySds(c
,sdsnew(status
));
8247 addReply(c
,shared
.err
);
8251 static void aofRemoveTempFile(pid_t childpid
) {
8254 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
8258 /* Virtual Memory is composed mainly of two subsystems:
8259 * - Blocking Virutal Memory
8260 * - Threaded Virtual Memory I/O
8261 * The two parts are not fully decoupled, but functions are split among two
8262 * different sections of the source code (delimited by comments) in order to
8263 * make more clear what functionality is about the blocking VM and what about
8264 * the threaded (not blocking) VM.
8268 * Redis VM is a blocking VM (one that blocks reading swapped values from
8269 * disk into memory when a value swapped out is needed in memory) that is made
8270 * unblocking by trying to examine the command argument vector in order to
8271 * load in background values that will likely be needed in order to exec
8272 * the command. The command is executed only once all the relevant keys
8273 * are loaded into memory.
8275 * This basically is almost as simple of a blocking VM, but almost as parallel
8276 * as a fully non-blocking VM.
8279 /* =================== Virtual Memory - Blocking Side ====================== */
8281 /* substitute the first occurrence of '%p' with the process pid in the
8282 * swap file name. */
8283 static void expandVmSwapFilename(void) {
8284 char *p
= strstr(server
.vm_swap_file
,"%p");
8290 new = sdscat(new,server
.vm_swap_file
);
8291 new = sdscatprintf(new,"%ld",(long) getpid());
8292 new = sdscat(new,p
+2);
8293 zfree(server
.vm_swap_file
);
8294 server
.vm_swap_file
= new;
8297 static void vmInit(void) {
8302 if (server
.vm_max_threads
!= 0)
8303 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
8305 expandVmSwapFilename();
8306 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
8307 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
8308 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
8310 if (server
.vm_fp
== NULL
) {
8311 redisLog(REDIS_WARNING
,
8312 "Impossible to open the swap file: %s. Exiting.",
8316 server
.vm_fd
= fileno(server
.vm_fp
);
8317 server
.vm_next_page
= 0;
8318 server
.vm_near_pages
= 0;
8319 server
.vm_stats_used_pages
= 0;
8320 server
.vm_stats_swapped_objects
= 0;
8321 server
.vm_stats_swapouts
= 0;
8322 server
.vm_stats_swapins
= 0;
8323 totsize
= server
.vm_pages
*server
.vm_page_size
;
8324 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
8325 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
8326 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
8330 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
8332 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
8333 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
8334 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
8335 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
8337 /* Initialize threaded I/O (used by Virtual Memory) */
8338 server
.io_newjobs
= listCreate();
8339 server
.io_processing
= listCreate();
8340 server
.io_processed
= listCreate();
8341 server
.io_ready_clients
= listCreate();
8342 pthread_mutex_init(&server
.io_mutex
,NULL
);
8343 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
8344 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
8345 server
.io_active_threads
= 0;
8346 if (pipe(pipefds
) == -1) {
8347 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
8351 server
.io_ready_pipe_read
= pipefds
[0];
8352 server
.io_ready_pipe_write
= pipefds
[1];
8353 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
8354 /* LZF requires a lot of stack */
8355 pthread_attr_init(&server
.io_threads_attr
);
8356 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
8357 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
8358 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
8359 /* Listen for events in the threaded I/O pipe */
8360 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
8361 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
8362 oom("creating file event");
8365 /* Mark the page as used */
8366 static void vmMarkPageUsed(off_t page
) {
8367 off_t byte
= page
/8;
8369 redisAssert(vmFreePage(page
) == 1);
8370 server
.vm_bitmap
[byte
] |= 1<<bit
;
8373 /* Mark N contiguous pages as used, with 'page' being the first. */
8374 static void vmMarkPagesUsed(off_t page
, off_t count
) {
8377 for (j
= 0; j
< count
; j
++)
8378 vmMarkPageUsed(page
+j
);
8379 server
.vm_stats_used_pages
+= count
;
8380 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
8381 (long long)count
, (long long)page
);
8384 /* Mark the page as free */
8385 static void vmMarkPageFree(off_t page
) {
8386 off_t byte
= page
/8;
8388 redisAssert(vmFreePage(page
) == 0);
8389 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
8392 /* Mark N contiguous pages as free, with 'page' being the first. */
8393 static void vmMarkPagesFree(off_t page
, off_t count
) {
8396 for (j
= 0; j
< count
; j
++)
8397 vmMarkPageFree(page
+j
);
8398 server
.vm_stats_used_pages
-= count
;
8399 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
8400 (long long)count
, (long long)page
);
8403 /* Test if the page is free */
8404 static int vmFreePage(off_t page
) {
8405 off_t byte
= page
/8;
8407 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
8410 /* Find N contiguous free pages storing the first page of the cluster in *first.
8411 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
8412 * REDIS_ERR is returned.
8414 * This function uses a simple algorithm: we try to allocate
8415 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
8416 * again from the start of the swap file searching for free spaces.
8418 * If it looks pretty clear that there are no free pages near our offset
8419 * we try to find less populated places doing a forward jump of
8420 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
8421 * without hurry, and then we jump again and so forth...
8423 * This function can be improved using a free list to avoid to guess
8424 * too much, since we could collect data about freed pages.
8426 * note: I implemented this function just after watching an episode of
8427 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
8429 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
8430 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
8432 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
8433 server
.vm_near_pages
= 0;
8434 server
.vm_next_page
= 0;
8436 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
8437 base
= server
.vm_next_page
;
8439 while(offset
< server
.vm_pages
) {
8440 off_t
this = base
+offset
;
8442 /* If we overflow, restart from page zero */
8443 if (this >= server
.vm_pages
) {
8444 this -= server
.vm_pages
;
8446 /* Just overflowed, what we found on tail is no longer
8447 * interesting, as it's no longer contiguous. */
8451 if (vmFreePage(this)) {
8452 /* This is a free page */
8454 /* Already got N free pages? Return to the caller, with success */
8456 *first
= this-(n
-1);
8457 server
.vm_next_page
= this+1;
8458 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
8462 /* The current one is not a free page */
8466 /* Fast-forward if the current page is not free and we already
8467 * searched enough near this place. */
8469 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
8470 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
8472 /* Note that even if we rewind after the jump, we are don't need
8473 * to make sure numfree is set to zero as we only jump *if* it
8474 * is set to zero. */
8476 /* Otherwise just check the next page */
8483 /* Write the specified object at the specified page of the swap file */
8484 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
8485 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8486 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8487 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8488 redisLog(REDIS_WARNING
,
8489 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
8493 rdbSaveObject(server
.vm_fp
,o
);
8494 fflush(server
.vm_fp
);
8495 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8499 /* Swap the 'val' object relative to 'key' into disk. Store all the information
8500 * needed to later retrieve the object into the key object.
8501 * If we can't find enough contiguous empty pages to swap the object on disk
8502 * REDIS_ERR is returned. */
8503 static int vmSwapObjectBlocking(robj
*key
, robj
*val
) {
8504 off_t pages
= rdbSavedObjectPages(val
,NULL
);
8507 assert(key
->storage
== REDIS_VM_MEMORY
);
8508 assert(key
->refcount
== 1);
8509 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
;
8510 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
;
8511 key
->vm
.page
= page
;
8512 key
->vm
.usedpages
= pages
;
8513 key
->storage
= REDIS_VM_SWAPPED
;
8514 key
->vtype
= val
->type
;
8515 decrRefCount(val
); /* Deallocate the object from memory. */
8516 vmMarkPagesUsed(page
,pages
);
8517 redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)",
8518 (unsigned char*) key
->ptr
,
8519 (unsigned long long) page
, (unsigned long long) pages
);
8520 server
.vm_stats_swapped_objects
++;
8521 server
.vm_stats_swapouts
++;
8525 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
8528 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8529 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8530 redisLog(REDIS_WARNING
,
8531 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
8535 o
= rdbLoadObject(type
,server
.vm_fp
);
8537 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
8540 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8544 /* Load the value object relative to the 'key' object from swap to memory.
8545 * The newly allocated object is returned.
8547 * If preview is true the unserialized object is returned to the caller but
8548 * no changes are made to the key object, nor the pages are marked as freed */
8549 static robj
*vmGenericLoadObject(robj
*key
, int preview
) {
8552 redisAssert(key
->storage
== REDIS_VM_SWAPPED
|| key
->storage
== REDIS_VM_LOADING
);
8553 val
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
);
8555 key
->storage
= REDIS_VM_MEMORY
;
8556 key
->vm
.atime
= server
.unixtime
;
8557 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8558 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk",
8559 (unsigned char*) key
->ptr
);
8560 server
.vm_stats_swapped_objects
--;
8562 redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk",
8563 (unsigned char*) key
->ptr
);
8565 server
.vm_stats_swapins
++;
8569 /* Plain object loading, from swap to memory */
8570 static robj
*vmLoadObject(robj
*key
) {
8571 /* If we are loading the object in background, stop it, we
8572 * need to load this object synchronously ASAP. */
8573 if (key
->storage
== REDIS_VM_LOADING
)
8574 vmCancelThreadedIOJob(key
);
8575 return vmGenericLoadObject(key
,0);
8578 /* Just load the value on disk, without to modify the key.
8579 * This is useful when we want to perform some operation on the value
8580 * without to really bring it from swap to memory, like while saving the
8581 * dataset or rewriting the append only log. */
8582 static robj
*vmPreviewObject(robj
*key
) {
8583 return vmGenericLoadObject(key
,1);
8586 /* How a good candidate is this object for swapping?
8587 * The better candidate it is, the greater the returned value.
8589 * Currently we try to perform a fast estimation of the object size in
8590 * memory, and combine it with aging informations.
8592 * Basically swappability = idle-time * log(estimated size)
8594 * Bigger objects are preferred over smaller objects, but not
8595 * proportionally, this is why we use the logarithm. This algorithm is
8596 * just a first try and will probably be tuned later. */
8597 static double computeObjectSwappability(robj
*o
) {
8598 time_t age
= server
.unixtime
- o
->vm
.atime
;
8602 struct dictEntry
*de
;
8605 if (age
<= 0) return 0;
8608 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
8611 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
8616 listNode
*ln
= listFirst(l
);
8618 asize
= sizeof(list
);
8620 robj
*ele
= ln
->value
;
8623 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8624 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8626 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
8631 z
= (o
->type
== REDIS_ZSET
);
8632 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
8634 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8635 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
8640 de
= dictGetRandomKey(d
);
8641 ele
= dictGetEntryKey(de
);
8642 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8643 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8645 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8646 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
8650 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8651 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
8652 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
8653 unsigned int klen
, vlen
;
8654 unsigned char *key
, *val
;
8656 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
8660 asize
= len
*(klen
+vlen
+3);
8661 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
8663 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8668 de
= dictGetRandomKey(d
);
8669 ele
= dictGetEntryKey(de
);
8670 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8671 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8673 ele
= dictGetEntryVal(de
);
8674 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8675 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8677 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8682 return (double)age
*log(1+asize
);
8685 /* Try to swap an object that's a good candidate for swapping.
8686 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
8687 * to swap any object at all.
8689 * If 'usethreaded' is true, Redis will try to swap the object in background
8690 * using I/O threads. */
8691 static int vmSwapOneObject(int usethreads
) {
8693 struct dictEntry
*best
= NULL
;
8694 double best_swappability
= 0;
8695 redisDb
*best_db
= NULL
;
8698 for (j
= 0; j
< server
.dbnum
; j
++) {
8699 redisDb
*db
= server
.db
+j
;
8700 /* Why maxtries is set to 100?
8701 * Because this way (usually) we'll find 1 object even if just 1% - 2%
8702 * are swappable objects */
8705 if (dictSize(db
->dict
) == 0) continue;
8706 for (i
= 0; i
< 5; i
++) {
8708 double swappability
;
8710 if (maxtries
) maxtries
--;
8711 de
= dictGetRandomKey(db
->dict
);
8712 key
= dictGetEntryKey(de
);
8713 val
= dictGetEntryVal(de
);
8714 /* Only swap objects that are currently in memory.
8716 * Also don't swap shared objects if threaded VM is on, as we
8717 * try to ensure that the main thread does not touch the
8718 * object while the I/O thread is using it, but we can't
8719 * control other keys without adding additional mutex. */
8720 if (key
->storage
!= REDIS_VM_MEMORY
||
8721 (server
.vm_max_threads
!= 0 && val
->refcount
!= 1)) {
8722 if (maxtries
) i
--; /* don't count this try */
8725 swappability
= computeObjectSwappability(val
);
8726 if (!best
|| swappability
> best_swappability
) {
8728 best_swappability
= swappability
;
8733 if (best
== NULL
) return REDIS_ERR
;
8734 key
= dictGetEntryKey(best
);
8735 val
= dictGetEntryVal(best
);
8737 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
8738 key
->ptr
, best_swappability
);
8740 /* Unshare the key if needed */
8741 if (key
->refcount
> 1) {
8742 robj
*newkey
= dupStringObject(key
);
8744 key
= dictGetEntryKey(best
) = newkey
;
8748 vmSwapObjectThreaded(key
,val
,best_db
);
8751 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
8752 dictGetEntryVal(best
) = NULL
;
8760 static int vmSwapOneObjectBlocking() {
8761 return vmSwapOneObject(0);
8764 static int vmSwapOneObjectThreaded() {
8765 return vmSwapOneObject(1);
8768 /* Return true if it's safe to swap out objects in a given moment.
8769 * Basically we don't want to swap objects out while there is a BGSAVE
8770 * or a BGAEOREWRITE running in backgroud. */
8771 static int vmCanSwapOut(void) {
8772 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
8775 /* Delete a key if swapped. Returns 1 if the key was found, was swapped
8776 * and was deleted. Otherwise 0 is returned. */
8777 static int deleteIfSwapped(redisDb
*db
, robj
*key
) {
8781 if ((de
= dictFind(db
->dict
,key
)) == NULL
) return 0;
8782 foundkey
= dictGetEntryKey(de
);
8783 if (foundkey
->storage
== REDIS_VM_MEMORY
) return 0;
8788 /* =================== Virtual Memory - Threaded I/O ======================= */
8790 static void freeIOJob(iojob
*j
) {
8791 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
8792 j
->type
== REDIS_IOJOB_DO_SWAP
||
8793 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
8794 decrRefCount(j
->val
);
8795 /* We don't decrRefCount the j->key field as we did't incremented
8796 * the count creating IO Jobs. This is because the key field here is
8797 * just used as an indentifier and if a key is removed the Job should
8798 * never be touched again. */
8802 /* Every time a thread finished a Job, it writes a byte into the write side
8803 * of an unix pipe in order to "awake" the main thread, and this function
8805 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
8809 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
8811 REDIS_NOTUSED(mask
);
8812 REDIS_NOTUSED(privdata
);
8814 /* For every byte we read in the read side of the pipe, there is one
8815 * I/O job completed to process. */
8816 while((retval
= read(fd
,buf
,1)) == 1) {
8820 struct dictEntry
*de
;
8822 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
8824 /* Get the processed element (the oldest one) */
8826 assert(listLength(server
.io_processed
) != 0);
8827 if (toprocess
== -1) {
8828 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
8829 if (toprocess
<= 0) toprocess
= 1;
8831 ln
= listFirst(server
.io_processed
);
8833 listDelNode(server
.io_processed
,ln
);
8835 /* If this job is marked as canceled, just ignore it */
8840 /* Post process it in the main thread, as there are things we
8841 * can do just here to avoid race conditions and/or invasive locks */
8842 redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
);
8843 de
= dictFind(j
->db
->dict
,j
->key
);
8845 key
= dictGetEntryKey(de
);
8846 if (j
->type
== REDIS_IOJOB_LOAD
) {
8849 /* Key loaded, bring it at home */
8850 key
->storage
= REDIS_VM_MEMORY
;
8851 key
->vm
.atime
= server
.unixtime
;
8852 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8853 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
8854 (unsigned char*) key
->ptr
);
8855 server
.vm_stats_swapped_objects
--;
8856 server
.vm_stats_swapins
++;
8857 dictGetEntryVal(de
) = j
->val
;
8858 incrRefCount(j
->val
);
8861 /* Handle clients waiting for this key to be loaded. */
8862 handleClientsBlockedOnSwappedKey(db
,key
);
8863 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
8864 /* Now we know the amount of pages required to swap this object.
8865 * Let's find some space for it, and queue this task again
8866 * rebranded as REDIS_IOJOB_DO_SWAP. */
8867 if (!vmCanSwapOut() ||
8868 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
8870 /* Ooops... no space or we can't swap as there is
8871 * a fork()ed Redis trying to save stuff on disk. */
8873 key
->storage
= REDIS_VM_MEMORY
; /* undo operation */
8875 /* Note that we need to mark this pages as used now,
8876 * if the job will be canceled, we'll mark them as freed
8878 vmMarkPagesUsed(j
->page
,j
->pages
);
8879 j
->type
= REDIS_IOJOB_DO_SWAP
;
8884 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
8887 /* Key swapped. We can finally free some memory. */
8888 if (key
->storage
!= REDIS_VM_SWAPPING
) {
8889 printf("key->storage: %d\n",key
->storage
);
8890 printf("key->name: %s\n",(char*)key
->ptr
);
8891 printf("key->refcount: %d\n",key
->refcount
);
8892 printf("val: %p\n",(void*)j
->val
);
8893 printf("val->type: %d\n",j
->val
->type
);
8894 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
8896 redisAssert(key
->storage
== REDIS_VM_SWAPPING
);
8897 val
= dictGetEntryVal(de
);
8898 key
->vm
.page
= j
->page
;
8899 key
->vm
.usedpages
= j
->pages
;
8900 key
->storage
= REDIS_VM_SWAPPED
;
8901 key
->vtype
= j
->val
->type
;
8902 decrRefCount(val
); /* Deallocate the object from memory. */
8903 dictGetEntryVal(de
) = NULL
;
8904 redisLog(REDIS_DEBUG
,
8905 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
8906 (unsigned char*) key
->ptr
,
8907 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
8908 server
.vm_stats_swapped_objects
++;
8909 server
.vm_stats_swapouts
++;
8911 /* Put a few more swap requests in queue if we are still
8913 if (trytoswap
&& vmCanSwapOut() &&
8914 zmalloc_used_memory() > server
.vm_max_memory
)
8919 more
= listLength(server
.io_newjobs
) <
8920 (unsigned) server
.vm_max_threads
;
8922 /* Don't waste CPU time if swappable objects are rare. */
8923 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
8931 if (processed
== toprocess
) return;
8933 if (retval
< 0 && errno
!= EAGAIN
) {
8934 redisLog(REDIS_WARNING
,
8935 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
8940 static void lockThreadedIO(void) {
8941 pthread_mutex_lock(&server
.io_mutex
);
8944 static void unlockThreadedIO(void) {
8945 pthread_mutex_unlock(&server
.io_mutex
);
8948 /* Remove the specified object from the threaded I/O queue if still not
8949 * processed, otherwise make sure to flag it as canceled. */
8950 static void vmCancelThreadedIOJob(robj
*o
) {
8952 server
.io_newjobs
, /* 0 */
8953 server
.io_processing
, /* 1 */
8954 server
.io_processed
/* 2 */
8958 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
8961 /* Search for a matching key in one of the queues */
8962 for (i
= 0; i
< 3; i
++) {
8966 listRewind(lists
[i
],&li
);
8967 while ((ln
= listNext(&li
)) != NULL
) {
8968 iojob
*job
= ln
->value
;
8970 if (job
->canceled
) continue; /* Skip this, already canceled. */
8971 if (job
->key
== o
) {
8972 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n",
8973 (void*)job
, (char*)o
->ptr
, job
->type
, i
);
8974 /* Mark the pages as free since the swap didn't happened
8975 * or happened but is now discarded. */
8976 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
8977 vmMarkPagesFree(job
->page
,job
->pages
);
8978 /* Cancel the job. It depends on the list the job is
8981 case 0: /* io_newjobs */
8982 /* If the job was yet not processed the best thing to do
8983 * is to remove it from the queue at all */
8985 listDelNode(lists
[i
],ln
);
8987 case 1: /* io_processing */
8988 /* Oh Shi- the thread is messing with the Job:
8990 * Probably it's accessing the object if this is a
8991 * PREPARE_SWAP or DO_SWAP job.
8992 * If it's a LOAD job it may be reading from disk and
8993 * if we don't wait for the job to terminate before to
8994 * cancel it, maybe in a few microseconds data can be
8995 * corrupted in this pages. So the short story is:
8997 * Better to wait for the job to move into the
8998 * next queue (processed)... */
9000 /* We try again and again until the job is completed. */
9002 /* But let's wait some time for the I/O thread
9003 * to finish with this job. After all this condition
9004 * should be very rare. */
9007 case 2: /* io_processed */
9008 /* The job was already processed, that's easy...
9009 * just mark it as canceled so that we'll ignore it
9010 * when processing completed jobs. */
9014 /* Finally we have to adjust the storage type of the object
9015 * in order to "UNDO" the operaiton. */
9016 if (o
->storage
== REDIS_VM_LOADING
)
9017 o
->storage
= REDIS_VM_SWAPPED
;
9018 else if (o
->storage
== REDIS_VM_SWAPPING
)
9019 o
->storage
= REDIS_VM_MEMORY
;
9026 assert(1 != 1); /* We should never reach this */
9029 static void *IOThreadEntryPoint(void *arg
) {
9034 pthread_detach(pthread_self());
9036 /* Get a new job to process */
9038 if (listLength(server
.io_newjobs
) == 0) {
9039 /* No new jobs in queue, exit. */
9040 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
9041 (long) pthread_self());
9042 server
.io_active_threads
--;
9046 ln
= listFirst(server
.io_newjobs
);
9048 listDelNode(server
.io_newjobs
,ln
);
9049 /* Add the job in the processing queue */
9050 j
->thread
= pthread_self();
9051 listAddNodeTail(server
.io_processing
,j
);
9052 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
9054 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
9055 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
9057 /* Process the Job */
9058 if (j
->type
== REDIS_IOJOB_LOAD
) {
9059 j
->val
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
);
9060 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9061 FILE *fp
= fopen("/dev/null","w+");
9062 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
9064 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9065 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
9069 /* Done: insert the job into the processed queue */
9070 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
9071 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
9073 listDelNode(server
.io_processing
,ln
);
9074 listAddNodeTail(server
.io_processed
,j
);
9077 /* Signal the main thread there is new stuff to process */
9078 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
9080 return NULL
; /* never reached */
9083 static void spawnIOThread(void) {
9085 sigset_t mask
, omask
;
9089 sigaddset(&mask
,SIGCHLD
);
9090 sigaddset(&mask
,SIGHUP
);
9091 sigaddset(&mask
,SIGPIPE
);
9092 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
9093 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
9094 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
9098 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
9099 server
.io_active_threads
++;
9102 /* We need to wait for the last thread to exit before we are able to
9103 * fork() in order to BGSAVE or BGREWRITEAOF. */
9104 static void waitEmptyIOJobsQueue(void) {
9106 int io_processed_len
;
9109 if (listLength(server
.io_newjobs
) == 0 &&
9110 listLength(server
.io_processing
) == 0 &&
9111 server
.io_active_threads
== 0)
9116 /* While waiting for empty jobs queue condition we post-process some
9117 * finshed job, as I/O threads may be hanging trying to write against
9118 * the io_ready_pipe_write FD but there are so much pending jobs that
9120 io_processed_len
= listLength(server
.io_processed
);
9122 if (io_processed_len
) {
9123 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
9124 usleep(1000); /* 1 millisecond */
9126 usleep(10000); /* 10 milliseconds */
9131 static void vmReopenSwapFile(void) {
9132 /* Note: we don't close the old one as we are in the child process
9133 * and don't want to mess at all with the original file object. */
9134 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
9135 if (server
.vm_fp
== NULL
) {
9136 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
9137 server
.vm_swap_file
);
9140 server
.vm_fd
= fileno(server
.vm_fp
);
9143 /* This function must be called while with threaded IO locked */
9144 static void queueIOJob(iojob
*j
) {
9145 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
9146 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
9147 listAddNodeTail(server
.io_newjobs
,j
);
9148 if (server
.io_active_threads
< server
.vm_max_threads
)
9152 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
9155 assert(key
->storage
== REDIS_VM_MEMORY
);
9156 assert(key
->refcount
== 1);
9158 j
= zmalloc(sizeof(*j
));
9159 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
9165 j
->thread
= (pthread_t
) -1;
9166 key
->storage
= REDIS_VM_SWAPPING
;
9174 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
9176 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
9177 * If there is not already a job loading the key, it is craeted.
9178 * The key is added to the io_keys list in the client structure, and also
9179 * in the hash table mapping swapped keys to waiting clients, that is,
9180 * server.io_waited_keys. */
9181 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
9182 struct dictEntry
*de
;
9186 /* If the key does not exist or is already in RAM we don't need to
9187 * block the client at all. */
9188 de
= dictFind(c
->db
->dict
,key
);
9189 if (de
== NULL
) return 0;
9190 o
= dictGetEntryKey(de
);
9191 if (o
->storage
== REDIS_VM_MEMORY
) {
9193 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
9194 /* We were swapping the key, undo it! */
9195 vmCancelThreadedIOJob(o
);
9199 /* OK: the key is either swapped, or being loaded just now. */
9201 /* Add the key to the list of keys this client is waiting for.
9202 * This maps clients to keys they are waiting for. */
9203 listAddNodeTail(c
->io_keys
,key
);
9206 /* Add the client to the swapped keys => clients waiting map. */
9207 de
= dictFind(c
->db
->io_keys
,key
);
9211 /* For every key we take a list of clients blocked for it */
9213 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
9215 assert(retval
== DICT_OK
);
9217 l
= dictGetEntryVal(de
);
9219 listAddNodeTail(l
,c
);
9221 /* Are we already loading the key from disk? If not create a job */
9222 if (o
->storage
== REDIS_VM_SWAPPED
) {
9225 o
->storage
= REDIS_VM_LOADING
;
9226 j
= zmalloc(sizeof(*j
));
9227 j
->type
= REDIS_IOJOB_LOAD
;
9230 j
->key
->vtype
= o
->vtype
;
9231 j
->page
= o
->vm
.page
;
9234 j
->thread
= (pthread_t
) -1;
9242 /* Preload keys needed for the ZUNION and ZINTER commands. */
9243 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
) {
9245 num
= atoi(c
->argv
[2]->ptr
);
9246 for (i
= 0; i
< num
; i
++) {
9247 waitForSwappedKey(c
,c
->argv
[3+i
]);
9251 /* Is this client attempting to run a command against swapped keys?
9252 * If so, block it ASAP, load the keys in background, then resume it.
9254 * The important idea about this function is that it can fail! If keys will
9255 * still be swapped when the client is resumed, this key lookups will
9256 * just block loading keys from disk. In practical terms this should only
9257 * happen with SORT BY command or if there is a bug in this function.
9259 * Return 1 if the client is marked as blocked, 0 if the client can
9260 * continue as the keys it is going to access appear to be in memory. */
9261 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
) {
9264 if (cmd
->vm_preload_proc
!= NULL
) {
9265 cmd
->vm_preload_proc(c
);
9267 if (cmd
->vm_firstkey
== 0) return 0;
9268 last
= cmd
->vm_lastkey
;
9269 if (last
< 0) last
= c
->argc
+last
;
9270 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
)
9271 waitForSwappedKey(c
,c
->argv
[j
]);
9274 /* If the client was blocked for at least one key, mark it as blocked. */
9275 if (listLength(c
->io_keys
)) {
9276 c
->flags
|= REDIS_IO_WAIT
;
9277 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
9278 server
.vm_blocked_clients
++;
9285 /* Remove the 'key' from the list of blocked keys for a given client.
9287 * The function returns 1 when there are no longer blocking keys after
9288 * the current one was removed (and the client can be unblocked). */
9289 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
9293 struct dictEntry
*de
;
9295 /* Remove the key from the list of keys this client is waiting for. */
9296 listRewind(c
->io_keys
,&li
);
9297 while ((ln
= listNext(&li
)) != NULL
) {
9298 if (compareStringObjects(ln
->value
,key
) == 0) {
9299 listDelNode(c
->io_keys
,ln
);
9305 /* Remove the client form the key => waiting clients map. */
9306 de
= dictFind(c
->db
->io_keys
,key
);
9308 l
= dictGetEntryVal(de
);
9309 ln
= listSearchKey(l
,c
);
9312 if (listLength(l
) == 0)
9313 dictDelete(c
->db
->io_keys
,key
);
9315 return listLength(c
->io_keys
) == 0;
9318 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
9319 struct dictEntry
*de
;
9324 de
= dictFind(db
->io_keys
,key
);
9327 l
= dictGetEntryVal(de
);
9328 len
= listLength(l
);
9329 /* Note: we can't use something like while(listLength(l)) as the list
9330 * can be freed by the calling function when we remove the last element. */
9333 redisClient
*c
= ln
->value
;
9335 if (dontWaitForSwappedKey(c
,key
)) {
9336 /* Put the client in the list of clients ready to go as we
9337 * loaded all the keys about it. */
9338 listAddNodeTail(server
.io_ready_clients
,c
);
9343 /* =========================== Remote Configuration ========================= */
9345 static void configSetCommand(redisClient
*c
) {
9346 robj
*o
= getDecodedObject(c
->argv
[3]);
9347 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
9348 zfree(server
.dbfilename
);
9349 server
.dbfilename
= zstrdup(o
->ptr
);
9350 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
9351 zfree(server
.requirepass
);
9352 server
.requirepass
= zstrdup(o
->ptr
);
9353 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
9354 zfree(server
.masterauth
);
9355 server
.masterauth
= zstrdup(o
->ptr
);
9356 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
9357 server
.maxmemory
= strtoll(o
->ptr
, NULL
, 10);
9359 addReplySds(c
,sdscatprintf(sdsempty(),
9360 "-ERR not supported CONFIG parameter %s\r\n",
9361 (char*)c
->argv
[2]->ptr
));
9366 addReply(c
,shared
.ok
);
9369 static void configGetCommand(redisClient
*c
) {
9370 robj
*o
= getDecodedObject(c
->argv
[2]);
9371 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
9372 char *pattern
= o
->ptr
;
9376 decrRefCount(lenobj
);
9378 if (stringmatch(pattern
,"dbfilename",0)) {
9379 addReplyBulkCString(c
,"dbfilename");
9380 addReplyBulkCString(c
,server
.dbfilename
);
9383 if (stringmatch(pattern
,"requirepass",0)) {
9384 addReplyBulkCString(c
,"requirepass");
9385 addReplyBulkCString(c
,server
.requirepass
);
9388 if (stringmatch(pattern
,"masterauth",0)) {
9389 addReplyBulkCString(c
,"masterauth");
9390 addReplyBulkCString(c
,server
.masterauth
);
9393 if (stringmatch(pattern
,"maxmemory",0)) {
9396 snprintf(buf
,128,"%llu\n",server
.maxmemory
);
9397 addReplyBulkCString(c
,"maxmemory");
9398 addReplyBulkCString(c
,buf
);
9402 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
9405 static void configCommand(redisClient
*c
) {
9406 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
9407 if (c
->argc
!= 4) goto badarity
;
9408 configSetCommand(c
);
9409 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
9410 if (c
->argc
!= 3) goto badarity
;
9411 configGetCommand(c
);
9412 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
9413 if (c
->argc
!= 2) goto badarity
;
9414 server
.stat_numcommands
= 0;
9415 server
.stat_numconnections
= 0;
9416 server
.stat_expiredkeys
= 0;
9417 server
.stat_starttime
= time(NULL
);
9418 addReply(c
,shared
.ok
);
9420 addReplySds(c
,sdscatprintf(sdsempty(),
9421 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
9426 addReplySds(c
,sdscatprintf(sdsempty(),
9427 "-ERR Wrong number of arguments for CONFIG %s\r\n",
9428 (char*) c
->argv
[1]->ptr
));
9431 /* =========================== Pubsub implementation ======================== */
9433 static void freePubsubPattern(void *p
) {
9434 pubsubPattern
*pat
= p
;
9436 decrRefCount(pat
->pattern
);
9440 static int listMatchPubsubPattern(void *a
, void *b
) {
9441 pubsubPattern
*pa
= a
, *pb
= b
;
9443 return (pa
->client
== pb
->client
) &&
9444 (compareStringObjects(pa
->pattern
,pb
->pattern
) == 0);
9447 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
9448 * 0 if the client was already subscribed to that channel. */
9449 static int pubsubSubscribeChannel(redisClient
*c
, robj
*channel
) {
9450 struct dictEntry
*de
;
9451 list
*clients
= NULL
;
9454 /* Add the channel to the client -> channels hash table */
9455 if (dictAdd(c
->pubsub_channels
,channel
,NULL
) == DICT_OK
) {
9457 incrRefCount(channel
);
9458 /* Add the client to the channel -> list of clients hash table */
9459 de
= dictFind(server
.pubsub_channels
,channel
);
9461 clients
= listCreate();
9462 dictAdd(server
.pubsub_channels
,channel
,clients
);
9463 incrRefCount(channel
);
9465 clients
= dictGetEntryVal(de
);
9467 listAddNodeTail(clients
,c
);
9469 /* Notify the client */
9470 addReply(c
,shared
.mbulk3
);
9471 addReply(c
,shared
.subscribebulk
);
9472 addReplyBulk(c
,channel
);
9473 addReplyLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
9477 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
9478 * 0 if the client was not subscribed to the specified channel. */
9479 static int pubsubUnsubscribeChannel(redisClient
*c
, robj
*channel
, int notify
) {
9480 struct dictEntry
*de
;
9485 /* Remove the channel from the client -> channels hash table */
9486 incrRefCount(channel
); /* channel may be just a pointer to the same object
9487 we have in the hash tables. Protect it... */
9488 if (dictDelete(c
->pubsub_channels
,channel
) == DICT_OK
) {
9490 /* Remove the client from the channel -> clients list hash table */
9491 de
= dictFind(server
.pubsub_channels
,channel
);
9493 clients
= dictGetEntryVal(de
);
9494 ln
= listSearchKey(clients
,c
);
9496 listDelNode(clients
,ln
);
9497 if (listLength(clients
) == 0) {
9498 /* Free the list and associated hash entry at all if this was
9499 * the latest client, so that it will be possible to abuse
9500 * Redis PUBSUB creating millions of channels. */
9501 dictDelete(server
.pubsub_channels
,channel
);
9504 /* Notify the client */
9506 addReply(c
,shared
.mbulk3
);
9507 addReply(c
,shared
.unsubscribebulk
);
9508 addReplyBulk(c
,channel
);
9509 addReplyLong(c
,dictSize(c
->pubsub_channels
)+
9510 listLength(c
->pubsub_patterns
));
9513 decrRefCount(channel
); /* it is finally safe to release it */
9517 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */
9518 static int pubsubSubscribePattern(redisClient
*c
, robj
*pattern
) {
9521 if (listSearchKey(c
->pubsub_patterns
,pattern
) == NULL
) {
9524 listAddNodeTail(c
->pubsub_patterns
,pattern
);
9525 incrRefCount(pattern
);
9526 pat
= zmalloc(sizeof(*pat
));
9527 pat
->pattern
= getDecodedObject(pattern
);
9529 listAddNodeTail(server
.pubsub_patterns
,pat
);
9531 /* Notify the client */
9532 addReply(c
,shared
.mbulk3
);
9533 addReply(c
,shared
.psubscribebulk
);
9534 addReplyBulk(c
,pattern
);
9535 addReplyLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
9539 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
9540 * 0 if the client was not subscribed to the specified channel. */
9541 static int pubsubUnsubscribePattern(redisClient
*c
, robj
*pattern
, int notify
) {
9546 incrRefCount(pattern
); /* Protect the object. May be the same we remove */
9547 if ((ln
= listSearchKey(c
->pubsub_patterns
,pattern
)) != NULL
) {
9549 listDelNode(c
->pubsub_patterns
,ln
);
9551 pat
.pattern
= pattern
;
9552 ln
= listSearchKey(server
.pubsub_patterns
,&pat
);
9553 listDelNode(server
.pubsub_patterns
,ln
);
9555 /* Notify the client */
9557 addReply(c
,shared
.mbulk3
);
9558 addReply(c
,shared
.punsubscribebulk
);
9559 addReplyBulk(c
,pattern
);
9560 addReplyLong(c
,dictSize(c
->pubsub_channels
)+
9561 listLength(c
->pubsub_patterns
));
9563 decrRefCount(pattern
);
9567 /* Unsubscribe from all the channels. Return the number of channels the
9568 * client was subscribed from. */
9569 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
) {
9570 dictIterator
*di
= dictGetIterator(c
->pubsub_channels
);
9574 while((de
= dictNext(di
)) != NULL
) {
9575 robj
*channel
= dictGetEntryKey(de
);
9577 count
+= pubsubUnsubscribeChannel(c
,channel
,notify
);
9579 dictReleaseIterator(di
);
9583 /* Unsubscribe from all the patterns. Return the number of patterns the
9584 * client was subscribed from. */
9585 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
) {
9590 listRewind(c
->pubsub_patterns
,&li
);
9591 while ((ln
= listNext(&li
)) != NULL
) {
9592 robj
*pattern
= ln
->value
;
9594 count
+= pubsubUnsubscribePattern(c
,pattern
,notify
);
9599 /* Publish a message */
9600 static int pubsubPublishMessage(robj
*channel
, robj
*message
) {
9602 struct dictEntry
*de
;
9606 /* Send to clients listening for that channel */
9607 de
= dictFind(server
.pubsub_channels
,channel
);
9609 list
*list
= dictGetEntryVal(de
);
9613 listRewind(list
,&li
);
9614 while ((ln
= listNext(&li
)) != NULL
) {
9615 redisClient
*c
= ln
->value
;
9617 addReply(c
,shared
.mbulk3
);
9618 addReply(c
,shared
.messagebulk
);
9619 addReplyBulk(c
,channel
);
9620 addReplyBulk(c
,message
);
9624 /* Send to clients listening to matching channels */
9625 if (listLength(server
.pubsub_patterns
)) {
9626 listRewind(server
.pubsub_patterns
,&li
);
9627 channel
= getDecodedObject(channel
);
9628 while ((ln
= listNext(&li
)) != NULL
) {
9629 pubsubPattern
*pat
= ln
->value
;
9631 if (stringmatchlen((char*)pat
->pattern
->ptr
,
9632 sdslen(pat
->pattern
->ptr
),
9633 (char*)channel
->ptr
,
9634 sdslen(channel
->ptr
),0)) {
9635 addReply(pat
->client
,shared
.mbulk3
);
9636 addReply(pat
->client
,shared
.messagebulk
);
9637 addReplyBulk(pat
->client
,channel
);
9638 addReplyBulk(pat
->client
,message
);
9642 decrRefCount(channel
);
9647 static void subscribeCommand(redisClient
*c
) {
9650 for (j
= 1; j
< c
->argc
; j
++)
9651 pubsubSubscribeChannel(c
,c
->argv
[j
]);
9654 static void unsubscribeCommand(redisClient
*c
) {
9656 pubsubUnsubscribeAllChannels(c
,1);
9661 for (j
= 1; j
< c
->argc
; j
++)
9662 pubsubUnsubscribeChannel(c
,c
->argv
[j
],1);
9666 static void psubscribeCommand(redisClient
*c
) {
9669 for (j
= 1; j
< c
->argc
; j
++)
9670 pubsubSubscribePattern(c
,c
->argv
[j
]);
9673 static void punsubscribeCommand(redisClient
*c
) {
9675 pubsubUnsubscribeAllPatterns(c
,1);
9680 for (j
= 1; j
< c
->argc
; j
++)
9681 pubsubUnsubscribePattern(c
,c
->argv
[j
],1);
9685 static void publishCommand(redisClient
*c
) {
9686 int receivers
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]);
9687 addReplyLong(c
,receivers
);
9690 /* ================================= Debugging ============================== */
9692 static void debugCommand(redisClient
*c
) {
9693 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
9695 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
9696 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
9697 addReply(c
,shared
.err
);
9701 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
9702 addReply(c
,shared
.err
);
9705 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
9706 addReply(c
,shared
.ok
);
9707 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
9709 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
9710 addReply(c
,shared
.err
);
9713 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
9714 addReply(c
,shared
.ok
);
9715 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
9716 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9720 addReply(c
,shared
.nokeyerr
);
9723 key
= dictGetEntryKey(de
);
9724 val
= dictGetEntryVal(de
);
9725 if (!server
.vm_enabled
|| (key
->storage
== REDIS_VM_MEMORY
||
9726 key
->storage
== REDIS_VM_SWAPPING
)) {
9730 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
9731 strenc
= strencoding
[val
->encoding
];
9733 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
9736 addReplySds(c
,sdscatprintf(sdsempty(),
9737 "+Key at:%p refcount:%d, value at:%p refcount:%d "
9738 "encoding:%s serializedlength:%lld\r\n",
9739 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
,
9740 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
9742 addReplySds(c
,sdscatprintf(sdsempty(),
9743 "+Key at:%p refcount:%d, value swapped at: page %llu "
9744 "using %llu pages\r\n",
9745 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
,
9746 (unsigned long long) key
->vm
.usedpages
));
9748 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapin") && c
->argc
== 3) {
9749 lookupKeyRead(c
->db
,c
->argv
[2]);
9750 addReply(c
,shared
.ok
);
9751 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
9752 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9755 if (!server
.vm_enabled
) {
9756 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
9760 addReply(c
,shared
.nokeyerr
);
9763 key
= dictGetEntryKey(de
);
9764 val
= dictGetEntryVal(de
);
9765 /* If the key is shared we want to create a copy */
9766 if (key
->refcount
> 1) {
9767 robj
*newkey
= dupStringObject(key
);
9769 key
= dictGetEntryKey(de
) = newkey
;
9772 if (key
->storage
!= REDIS_VM_MEMORY
) {
9773 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
9774 } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
9775 dictGetEntryVal(de
) = NULL
;
9776 addReply(c
,shared
.ok
);
9778 addReply(c
,shared
.err
);
9781 addReplySds(c
,sdsnew(
9782 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n"));
9786 static void _redisAssert(char *estr
, char *file
, int line
) {
9787 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
9788 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true\n",file
,line
,estr
);
9789 #ifdef HAVE_BACKTRACE
9790 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
9795 /* =================================== Main! ================================ */
9798 int linuxOvercommitMemoryValue(void) {
9799 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
9803 if (fgets(buf
,64,fp
) == NULL
) {
9812 void linuxOvercommitMemoryWarning(void) {
9813 if (linuxOvercommitMemoryValue() == 0) {
9814 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low condition memory. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
9817 #endif /* __linux__ */
9819 static void daemonize(void) {
9823 if (fork() != 0) exit(0); /* parent exits */
9824 setsid(); /* create a new session */
9826 /* Every output goes to /dev/null. If Redis is daemonized but
9827 * the 'logfile' is set to 'stdout' in the configuration file
9828 * it will not log at all. */
9829 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
9830 dup2(fd
, STDIN_FILENO
);
9831 dup2(fd
, STDOUT_FILENO
);
9832 dup2(fd
, STDERR_FILENO
);
9833 if (fd
> STDERR_FILENO
) close(fd
);
9835 /* Try to write the pid file */
9836 fp
= fopen(server
.pidfile
,"w");
9838 fprintf(fp
,"%d\n",getpid());
9843 static void version() {
9844 printf("Redis server version %s\n", REDIS_VERSION
);
9848 static void usage() {
9849 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
9850 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
9854 int main(int argc
, char **argv
) {
9859 if (strcmp(argv
[1], "-v") == 0 ||
9860 strcmp(argv
[1], "--version") == 0) version();
9861 if (strcmp(argv
[1], "--help") == 0) usage();
9862 resetServerSaveParams();
9863 loadServerConfig(argv
[1]);
9864 } else if ((argc
> 2)) {
9867 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
9869 if (server
.daemonize
) daemonize();
9871 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
9873 linuxOvercommitMemoryWarning();
9876 if (server
.appendonly
) {
9877 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
9878 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
9880 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
9881 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
9883 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
9884 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
9886 aeDeleteEventLoop(server
.el
);
9890 /* ============================= Backtrace support ========================= */
9892 #ifdef HAVE_BACKTRACE
9893 static char *findFuncName(void *pointer
, unsigned long *offset
);
9895 static void *getMcontextEip(ucontext_t
*uc
) {
9896 #if defined(__FreeBSD__)
9897 return (void*) uc
->uc_mcontext
.mc_eip
;
9898 #elif defined(__dietlibc__)
9899 return (void*) uc
->uc_mcontext
.eip
;
9900 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
9902 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
9904 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
9906 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
9907 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
9908 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
9910 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
9912 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
9913 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
9914 #elif defined(__ia64__) /* Linux IA64 */
9915 return (void*) uc
->uc_mcontext
.sc_ip
;
9921 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
9923 char **messages
= NULL
;
9924 int i
, trace_size
= 0;
9925 unsigned long offset
=0;
9926 ucontext_t
*uc
= (ucontext_t
*) secret
;
9928 REDIS_NOTUSED(info
);
9930 redisLog(REDIS_WARNING
,
9931 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
9932 infostring
= genRedisInfoString();
9933 redisLog(REDIS_WARNING
, "%s",infostring
);
9934 /* It's not safe to sdsfree() the returned string under memory
9935 * corruption conditions. Let it leak as we are going to abort */
9937 trace_size
= backtrace(trace
, 100);
9938 /* overwrite sigaction with caller's address */
9939 if (getMcontextEip(uc
) != NULL
) {
9940 trace
[1] = getMcontextEip(uc
);
9942 messages
= backtrace_symbols(trace
, trace_size
);
9944 for (i
=1; i
<trace_size
; ++i
) {
9945 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
9947 p
= strchr(messages
[i
],'+');
9948 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
9949 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
9951 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
9954 /* free(messages); Don't call free() with possibly corrupted memory. */
9958 static void setupSigSegvAction(void) {
9959 struct sigaction act
;
9961 sigemptyset (&act
.sa_mask
);
9962 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
9963 * is used. Otherwise, sa_handler is used */
9964 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
9965 act
.sa_sigaction
= segvHandler
;
9966 sigaction (SIGSEGV
, &act
, NULL
);
9967 sigaction (SIGBUS
, &act
, NULL
);
9968 sigaction (SIGFPE
, &act
, NULL
);
9969 sigaction (SIGILL
, &act
, NULL
);
9970 sigaction (SIGBUS
, &act
, NULL
);
9974 #include "staticsymbols.h"
9975 /* This function try to convert a pointer into a function name. It's used in
9976 * oreder to provide a backtrace under segmentation fault that's able to
9977 * display functions declared as static (otherwise the backtrace is useless). */
9978 static char *findFuncName(void *pointer
, unsigned long *offset
){
9980 unsigned long off
, minoff
= 0;
9982 /* Try to match against the Symbol with the smallest offset */
9983 for (i
=0; symsTable
[i
].pointer
; i
++) {
9984 unsigned long lp
= (unsigned long) pointer
;
9986 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
9987 off
=lp
-symsTable
[i
].pointer
;
9988 if (ret
< 0 || off
< minoff
) {
9994 if (ret
== -1) return NULL
;
9996 return symsTable
[ret
].name
;
9998 #else /* HAVE_BACKTRACE */
9999 static void setupSigSegvAction(void) {
10001 #endif /* HAVE_BACKTRACE */