2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "1.3.8"
40 #define __USE_POSIX199309
47 #endif /* HAVE_BACKTRACE */
55 #include <arpa/inet.h>
60 #include <sys/resource.h>
67 #include "solarisfixes.h"
71 #include "ae.h" /* Event driven programming library */
72 #include "sds.h" /* Dynamic safe strings */
73 #include "anet.h" /* Networking the easy way */
74 #include "dict.h" /* Hash tables */
75 #include "adlist.h" /* Linked lists */
76 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
77 #include "lzf.h" /* LZF compression library */
78 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
85 /* Static server configuration */
86 #define REDIS_SERVERPORT 6379 /* TCP port */
87 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
88 #define REDIS_IOBUF_LEN 1024
89 #define REDIS_LOADBUF_LEN 1024
90 #define REDIS_STATIC_ARGS 8
91 #define REDIS_DEFAULT_DBNUM 16
92 #define REDIS_CONFIGLINE_MAX 1024
93 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
94 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
95 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* lookup 10 expires per loop */
96 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
97 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
99 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
100 #define REDIS_WRITEV_THRESHOLD 3
101 /* Max number of iovecs used for each writev call */
102 #define REDIS_WRITEV_IOVEC_COUNT 256
104 /* Hash table parameters */
105 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
108 #define REDIS_CMD_BULK 1 /* Bulk write command */
109 #define REDIS_CMD_INLINE 2 /* Inline command */
110 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
111 this flags will return an error when the 'maxmemory' option is set in the
112 config file and the server is using more than maxmemory bytes of memory.
113 In short this commands are denied on low memory conditions. */
114 #define REDIS_CMD_DENYOOM 4
115 #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */
118 #define REDIS_STRING 0
124 /* Objects encoding. Some kind of objects like Strings and Hashes can be
125 * internally represented in multiple ways. The 'encoding' field of the object
126 * is set to one of this fields for this object. */
127 #define REDIS_ENCODING_RAW 0 /* Raw representation */
128 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
129 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
130 #define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
132 static char* strencoding
[] = {
133 "raw", "int", "zipmap", "hashtable"
136 /* Object types only used for dumping to disk */
137 #define REDIS_EXPIRETIME 253
138 #define REDIS_SELECTDB 254
139 #define REDIS_EOF 255
141 /* Defines related to the dump file format. To store 32 bits lengths for short
142 * keys requires a lot of space, so we check the most significant 2 bits of
143 * the first byte to interpreter the length:
145 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
146 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
147 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
148 * 11|000000 this means: specially encoded object will follow. The six bits
149 * number specify the kind of object that follows.
150 * See the REDIS_RDB_ENC_* defines.
152 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
153 * values, will fit inside. */
154 #define REDIS_RDB_6BITLEN 0
155 #define REDIS_RDB_14BITLEN 1
156 #define REDIS_RDB_32BITLEN 2
157 #define REDIS_RDB_ENCVAL 3
158 #define REDIS_RDB_LENERR UINT_MAX
160 /* When a length of a string object stored on disk has the first two bits
161 * set, the remaining two bits specify a special encoding for the object
162 * accordingly to the following defines: */
163 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
164 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
165 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
166 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
168 /* Virtual memory object->where field. */
169 #define REDIS_VM_MEMORY 0 /* The object is on memory */
170 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
171 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
172 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
174 /* Virtual memory static configuration stuff.
175 * Check vmFindContiguousPages() to know more about this magic numbers. */
176 #define REDIS_VM_MAX_NEAR_PAGES 65536
177 #define REDIS_VM_MAX_RANDOM_JUMP 4096
178 #define REDIS_VM_MAX_THREADS 32
179 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
180 /* The following is the *percentage* of completed I/O jobs to process when the
181 * handelr is called. While Virtual Memory I/O operations are performed by
182 * threads, this operations must be processed by the main thread when completed
183 * in order to take effect. */
184 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
187 #define REDIS_SLAVE 1 /* This client is a slave server */
188 #define REDIS_MASTER 2 /* This client is a master server */
189 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
190 #define REDIS_MULTI 8 /* This client is in a MULTI context */
191 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
192 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
194 /* Slave replication state - slave side */
195 #define REDIS_REPL_NONE 0 /* No active replication */
196 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
197 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
199 /* Slave replication state - from the point of view of master
200 * Note that in SEND_BULK and ONLINE state the slave receives new updates
201 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
202 * to start the next background saving in order to send updates to it. */
203 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
204 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
205 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
206 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
208 /* List related stuff */
212 /* Sort operations */
213 #define REDIS_SORT_GET 0
214 #define REDIS_SORT_ASC 1
215 #define REDIS_SORT_DESC 2
216 #define REDIS_SORTKEY_MAX 1024
219 #define REDIS_DEBUG 0
220 #define REDIS_VERBOSE 1
221 #define REDIS_NOTICE 2
222 #define REDIS_WARNING 3
224 /* Anti-warning macro... */
225 #define REDIS_NOTUSED(V) ((void) V)
227 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
228 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
230 /* Append only defines */
231 #define APPENDFSYNC_NO 0
232 #define APPENDFSYNC_ALWAYS 1
233 #define APPENDFSYNC_EVERYSEC 2
235 /* Hashes related defaults */
236 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
237 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
239 /* We can print the stacktrace, so our assert is defined this way: */
240 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
241 #define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1)
242 static void _redisAssert(char *estr
, char *file
, int line
);
243 static void _redisPanic(char *msg
, char *file
, int line
);
245 /*================================= Data types ============================== */
247 /* A redis object, that is a type able to hold a string / list / set */
249 /* The VM object structure */
250 struct redisObjectVM
{
251 off_t page
; /* the page at witch the object is stored on disk */
252 off_t usedpages
; /* number of pages used on disk */
253 time_t atime
; /* Last access time */
256 /* The actual Redis Object */
257 typedef struct redisObject
{
260 unsigned char encoding
;
261 unsigned char storage
; /* If this object is a key, where is the value?
262 * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
263 unsigned char vtype
; /* If this object is a key, and value is swapped out,
264 * this is the type of the swapped out object. */
266 /* VM fields, this are only allocated if VM is active, otherwise the
267 * object allocation function will just allocate
268 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
269 * Redis without VM active will not have any overhead. */
270 struct redisObjectVM vm
;
273 /* Macro used to initalize a Redis object allocated on the stack.
274 * Note that this macro is taken near the structure definition to make sure
275 * we'll update it when the structure is changed, to avoid bugs like
276 * bug #85 introduced exactly in this way. */
277 #define initStaticStringObject(_var,_ptr) do { \
279 _var.type = REDIS_STRING; \
280 _var.encoding = REDIS_ENCODING_RAW; \
282 if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \
285 typedef struct redisDb
{
286 dict
*dict
; /* The keyspace for this DB */
287 dict
*expires
; /* Timeout of keys with a timeout set */
288 dict
*blockingkeys
; /* Keys with clients waiting for data (BLPOP) */
289 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
293 /* Client MULTI/EXEC state */
294 typedef struct multiCmd
{
297 struct redisCommand
*cmd
;
300 typedef struct multiState
{
301 multiCmd
*commands
; /* Array of MULTI commands */
302 int count
; /* Total number of MULTI commands */
305 /* With multiplexing we need to take per-clinet state.
306 * Clients are taken in a liked list. */
307 typedef struct redisClient
{
312 robj
**argv
, **mbargv
;
314 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
315 int multibulk
; /* multi bulk command format active */
318 time_t lastinteraction
; /* time of the last interaction, used for timeout */
319 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
320 int slaveseldb
; /* slave selected db, if this client is a slave */
321 int authenticated
; /* when requirepass is non-NULL */
322 int replstate
; /* replication state if this is a slave */
323 int repldbfd
; /* replication DB file descriptor */
324 long repldboff
; /* replication DB file offset */
325 off_t repldbsize
; /* replication DB file size */
326 multiState mstate
; /* MULTI/EXEC state */
327 robj
**blockingkeys
; /* The key we are waiting to terminate a blocking
328 * operation such as BLPOP. Otherwise NULL. */
329 int blockingkeysnum
; /* Number of blocking keys */
330 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
331 * is >= blockingto then the operation timed out. */
332 list
*io_keys
; /* Keys this client is waiting to be loaded from the
333 * swap file in order to continue. */
334 dict
*pubsub_channels
; /* channels a client is interested in (SUBSCRIBE) */
335 list
*pubsub_patterns
; /* patterns a client is interested in (SUBSCRIBE) */
343 /* Global server state structure */
348 long long dirty
; /* changes to DB from the last save */
350 list
*slaves
, *monitors
;
351 char neterr
[ANET_ERR_LEN
];
353 int cronloops
; /* number of times the cron function run */
354 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
355 time_t lastsave
; /* Unix time of last save succeeede */
356 /* Fields used only for stats */
357 time_t stat_starttime
; /* server start time */
358 long long stat_numcommands
; /* number of processed commands */
359 long long stat_numconnections
; /* number of connections received */
360 long long stat_expiredkeys
; /* number of expired keys */
373 pid_t bgsavechildpid
;
374 pid_t bgrewritechildpid
;
375 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
376 struct saveparam
*saveparams
;
381 char *appendfilename
;
386 /* Replication related */
391 redisClient
*master
; /* client that is master for this slave */
393 unsigned int maxclients
;
394 unsigned long long maxmemory
;
395 unsigned int blpop_blocked_clients
;
396 unsigned int vm_blocked_clients
;
397 /* Sort parameters - qsort_r() is only available under BSD so we
398 * have to take this state global, in order to pass it to sortCompare() */
402 /* Virtual memory configuration */
407 unsigned long long vm_max_memory
;
409 size_t hash_max_zipmap_entries
;
410 size_t hash_max_zipmap_value
;
411 /* Virtual memory state */
414 off_t vm_next_page
; /* Next probably empty page */
415 off_t vm_near_pages
; /* Number of pages allocated sequentially */
416 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
417 time_t unixtime
; /* Unix time sampled every second. */
418 /* Virtual memory I/O threads stuff */
419 /* An I/O thread process an element taken from the io_jobs queue and
420 * put the result of the operation in the io_done list. While the
421 * job is being processed, it's put on io_processing queue. */
422 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
423 list
*io_processing
; /* List of VM I/O jobs being processed */
424 list
*io_processed
; /* List of VM I/O jobs already processed */
425 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
426 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
427 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
428 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
429 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
430 int io_active_threads
; /* Number of running I/O threads */
431 int vm_max_threads
; /* Max number of I/O threads running at the same time */
432 /* Our main thread is blocked on the event loop, locking for sockets ready
433 * to be read or written, so when a threaded I/O operation is ready to be
434 * processed by the main thread, the I/O thread will use a unix pipe to
435 * awake the main thread. The followings are the two pipe FDs. */
436 int io_ready_pipe_read
;
437 int io_ready_pipe_write
;
438 /* Virtual memory stats */
439 unsigned long long vm_stats_used_pages
;
440 unsigned long long vm_stats_swapped_objects
;
441 unsigned long long vm_stats_swapouts
;
442 unsigned long long vm_stats_swapins
;
444 dict
*pubsub_channels
; /* Map channels to list of subscribed clients */
445 list
*pubsub_patterns
; /* A list of pubsub_patterns */
450 typedef struct pubsubPattern
{
455 typedef void redisCommandProc(redisClient
*c
);
456 struct redisCommand
{
458 redisCommandProc
*proc
;
461 /* Use a function to determine which keys need to be loaded
462 * in the background prior to executing this command. Takes precedence
463 * over vm_firstkey and others, ignored when NULL */
464 redisCommandProc
*vm_preload_proc
;
465 /* What keys should be loaded in background when calling this command? */
466 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
467 int vm_lastkey
; /* THe last argument that's a key */
468 int vm_keystep
; /* The step between first and last key */
471 struct redisFunctionSym
{
473 unsigned long pointer
;
476 typedef struct _redisSortObject
{
484 typedef struct _redisSortOperation
{
487 } redisSortOperation
;
489 /* ZSETs use a specialized version of Skiplists */
491 typedef struct zskiplistNode
{
492 struct zskiplistNode
**forward
;
493 struct zskiplistNode
*backward
;
499 typedef struct zskiplist
{
500 struct zskiplistNode
*header
, *tail
;
501 unsigned long length
;
505 typedef struct zset
{
510 /* Our shared "common" objects */
512 #define REDIS_SHARED_INTEGERS 10000
513 struct sharedObjectsStruct
{
514 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
515 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
516 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
517 *outofrangeerr
, *plus
,
518 *select0
, *select1
, *select2
, *select3
, *select4
,
519 *select5
, *select6
, *select7
, *select8
, *select9
,
520 *messagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
,
521 *psubscribebulk
, *punsubscribebulk
, *integers
[REDIS_SHARED_INTEGERS
];
524 /* Global vars that are actally used as constants. The following double
525 * values are used for double on-disk serialization, and are initialized
526 * at runtime to avoid strange compiler optimizations. */
528 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
530 /* VM threaded I/O request message */
531 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
532 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
533 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
534 typedef struct iojob
{
535 int type
; /* Request type, REDIS_IOJOB_* */
536 redisDb
*db
;/* Redis database */
537 robj
*key
; /* This I/O request is about swapping this key */
538 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
539 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
540 off_t page
; /* Swap page where to read/write the object */
541 off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */
542 int canceled
; /* True if this command was canceled by blocking side of VM */
543 pthread_t thread
; /* ID of the thread processing this entry */
546 /*================================ Prototypes =============================== */
548 static void freeStringObject(robj
*o
);
549 static void freeListObject(robj
*o
);
550 static void freeSetObject(robj
*o
);
551 static void decrRefCount(void *o
);
552 static robj
*createObject(int type
, void *ptr
);
553 static void freeClient(redisClient
*c
);
554 static int rdbLoad(char *filename
);
555 static void addReply(redisClient
*c
, robj
*obj
);
556 static void addReplySds(redisClient
*c
, sds s
);
557 static void incrRefCount(robj
*o
);
558 static int rdbSaveBackground(char *filename
);
559 static robj
*createStringObject(char *ptr
, size_t len
);
560 static robj
*dupStringObject(robj
*o
);
561 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
);
562 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
563 static int syncWithMaster(void);
564 static robj
*tryObjectEncoding(robj
*o
);
565 static robj
*getDecodedObject(robj
*o
);
566 static int removeExpire(redisDb
*db
, robj
*key
);
567 static int expireIfNeeded(redisDb
*db
, robj
*key
);
568 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
569 static int deleteIfSwapped(redisDb
*db
, robj
*key
);
570 static int deleteKey(redisDb
*db
, robj
*key
);
571 static time_t getExpire(redisDb
*db
, robj
*key
);
572 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
573 static void updateSlavesWaitingBgsave(int bgsaveerr
);
574 static void freeMemoryIfNeeded(void);
575 static int processCommand(redisClient
*c
);
576 static void setupSigSegvAction(void);
577 static void rdbRemoveTempFile(pid_t childpid
);
578 static void aofRemoveTempFile(pid_t childpid
);
579 static size_t stringObjectLen(robj
*o
);
580 static void processInputBuffer(redisClient
*c
);
581 static zskiplist
*zslCreate(void);
582 static void zslFree(zskiplist
*zsl
);
583 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
584 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
585 static void initClientMultiState(redisClient
*c
);
586 static void freeClientMultiState(redisClient
*c
);
587 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
588 static void unblockClientWaitingData(redisClient
*c
);
589 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
590 static void vmInit(void);
591 static void vmMarkPagesFree(off_t page
, off_t count
);
592 static robj
*vmLoadObject(robj
*key
);
593 static robj
*vmPreviewObject(robj
*key
);
594 static int vmSwapOneObjectBlocking(void);
595 static int vmSwapOneObjectThreaded(void);
596 static int vmCanSwapOut(void);
597 static int tryFreeOneObjectFromFreelist(void);
598 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
599 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
600 static void vmCancelThreadedIOJob(robj
*o
);
601 static void lockThreadedIO(void);
602 static void unlockThreadedIO(void);
603 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
604 static void freeIOJob(iojob
*j
);
605 static void queueIOJob(iojob
*j
);
606 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
607 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
608 static void waitEmptyIOJobsQueue(void);
609 static void vmReopenSwapFile(void);
610 static int vmFreePage(off_t page
);
611 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
);
612 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
);
613 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
614 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
615 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
616 static struct redisCommand
*lookupCommand(char *name
);
617 static void call(redisClient
*c
, struct redisCommand
*cmd
);
618 static void resetClient(redisClient
*c
);
619 static void convertToRealHash(robj
*o
);
620 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
);
621 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
);
622 static void freePubsubPattern(void *p
);
623 static int listMatchPubsubPattern(void *a
, void *b
);
624 static int compareStringObjects(robj
*a
, robj
*b
);
627 static void authCommand(redisClient
*c
);
628 static void pingCommand(redisClient
*c
);
629 static void echoCommand(redisClient
*c
);
630 static void setCommand(redisClient
*c
);
631 static void setnxCommand(redisClient
*c
);
632 static void getCommand(redisClient
*c
);
633 static void delCommand(redisClient
*c
);
634 static void existsCommand(redisClient
*c
);
635 static void incrCommand(redisClient
*c
);
636 static void decrCommand(redisClient
*c
);
637 static void incrbyCommand(redisClient
*c
);
638 static void decrbyCommand(redisClient
*c
);
639 static void selectCommand(redisClient
*c
);
640 static void randomkeyCommand(redisClient
*c
);
641 static void keysCommand(redisClient
*c
);
642 static void dbsizeCommand(redisClient
*c
);
643 static void lastsaveCommand(redisClient
*c
);
644 static void saveCommand(redisClient
*c
);
645 static void bgsaveCommand(redisClient
*c
);
646 static void bgrewriteaofCommand(redisClient
*c
);
647 static void shutdownCommand(redisClient
*c
);
648 static void moveCommand(redisClient
*c
);
649 static void renameCommand(redisClient
*c
);
650 static void renamenxCommand(redisClient
*c
);
651 static void lpushCommand(redisClient
*c
);
652 static void rpushCommand(redisClient
*c
);
653 static void lpopCommand(redisClient
*c
);
654 static void rpopCommand(redisClient
*c
);
655 static void llenCommand(redisClient
*c
);
656 static void lindexCommand(redisClient
*c
);
657 static void lrangeCommand(redisClient
*c
);
658 static void ltrimCommand(redisClient
*c
);
659 static void typeCommand(redisClient
*c
);
660 static void lsetCommand(redisClient
*c
);
661 static void saddCommand(redisClient
*c
);
662 static void sremCommand(redisClient
*c
);
663 static void smoveCommand(redisClient
*c
);
664 static void sismemberCommand(redisClient
*c
);
665 static void scardCommand(redisClient
*c
);
666 static void spopCommand(redisClient
*c
);
667 static void srandmemberCommand(redisClient
*c
);
668 static void sinterCommand(redisClient
*c
);
669 static void sinterstoreCommand(redisClient
*c
);
670 static void sunionCommand(redisClient
*c
);
671 static void sunionstoreCommand(redisClient
*c
);
672 static void sdiffCommand(redisClient
*c
);
673 static void sdiffstoreCommand(redisClient
*c
);
674 static void syncCommand(redisClient
*c
);
675 static void flushdbCommand(redisClient
*c
);
676 static void flushallCommand(redisClient
*c
);
677 static void sortCommand(redisClient
*c
);
678 static void lremCommand(redisClient
*c
);
679 static void rpoplpushcommand(redisClient
*c
);
680 static void infoCommand(redisClient
*c
);
681 static void mgetCommand(redisClient
*c
);
682 static void monitorCommand(redisClient
*c
);
683 static void expireCommand(redisClient
*c
);
684 static void expireatCommand(redisClient
*c
);
685 static void getsetCommand(redisClient
*c
);
686 static void ttlCommand(redisClient
*c
);
687 static void slaveofCommand(redisClient
*c
);
688 static void debugCommand(redisClient
*c
);
689 static void msetCommand(redisClient
*c
);
690 static void msetnxCommand(redisClient
*c
);
691 static void zaddCommand(redisClient
*c
);
692 static void zincrbyCommand(redisClient
*c
);
693 static void zrangeCommand(redisClient
*c
);
694 static void zrangebyscoreCommand(redisClient
*c
);
695 static void zcountCommand(redisClient
*c
);
696 static void zrevrangeCommand(redisClient
*c
);
697 static void zcardCommand(redisClient
*c
);
698 static void zremCommand(redisClient
*c
);
699 static void zscoreCommand(redisClient
*c
);
700 static void zremrangebyscoreCommand(redisClient
*c
);
701 static void multiCommand(redisClient
*c
);
702 static void execCommand(redisClient
*c
);
703 static void discardCommand(redisClient
*c
);
704 static void blpopCommand(redisClient
*c
);
705 static void brpopCommand(redisClient
*c
);
706 static void appendCommand(redisClient
*c
);
707 static void substrCommand(redisClient
*c
);
708 static void zrankCommand(redisClient
*c
);
709 static void zrevrankCommand(redisClient
*c
);
710 static void hsetCommand(redisClient
*c
);
711 static void hsetnxCommand(redisClient
*c
);
712 static void hgetCommand(redisClient
*c
);
713 static void hmsetCommand(redisClient
*c
);
714 static void hmgetCommand(redisClient
*c
);
715 static void hdelCommand(redisClient
*c
);
716 static void hlenCommand(redisClient
*c
);
717 static void zremrangebyrankCommand(redisClient
*c
);
718 static void zunionCommand(redisClient
*c
);
719 static void zinterCommand(redisClient
*c
);
720 static void hkeysCommand(redisClient
*c
);
721 static void hvalsCommand(redisClient
*c
);
722 static void hgetallCommand(redisClient
*c
);
723 static void hexistsCommand(redisClient
*c
);
724 static void configCommand(redisClient
*c
);
725 static void hincrbyCommand(redisClient
*c
);
726 static void subscribeCommand(redisClient
*c
);
727 static void unsubscribeCommand(redisClient
*c
);
728 static void psubscribeCommand(redisClient
*c
);
729 static void punsubscribeCommand(redisClient
*c
);
730 static void publishCommand(redisClient
*c
);
732 /*================================= Globals ================================= */
735 static struct redisServer server
; /* server global state */
736 static struct redisCommand cmdTable
[] = {
737 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
738 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
739 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
740 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
741 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
742 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
743 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
744 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
745 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
746 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
747 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
748 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
749 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
750 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
751 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
752 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
753 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
754 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
755 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
756 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
757 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
758 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
759 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
760 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
761 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
762 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
763 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
764 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
765 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
766 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
767 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
768 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
769 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
770 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
771 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
772 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
773 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
774 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
775 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
776 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
777 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
778 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
779 {"zunion",zunionCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
780 {"zinter",zinterCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
781 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
782 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
783 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
784 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
785 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
786 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
787 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
788 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
789 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
790 {"hsetnx",hsetnxCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
791 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
792 {"hmset",hmsetCommand
,-4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
793 {"hmget",hmgetCommand
,-3,REDIS_CMD_BULK
,NULL
,1,1,1},
794 {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
795 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
796 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
797 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
798 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
799 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
800 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
801 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
802 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
803 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
804 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
805 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
806 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
807 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
808 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
809 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
810 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
811 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
812 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
813 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
814 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
815 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
816 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
817 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
818 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
819 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
820 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
821 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
822 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
823 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
824 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
825 {"exec",execCommand
,1,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
826 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
827 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
828 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
829 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
830 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
831 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
832 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
833 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
834 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
835 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
836 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
837 {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
838 {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
839 {"psubscribe",psubscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
840 {"punsubscribe",punsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
841 {"publish",publishCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_FORCE_REPLICATION
,NULL
,0,0,0},
842 {NULL
,NULL
,0,0,NULL
,0,0,0}
845 /*============================ Utility functions ============================ */
847 /* Glob-style pattern matching. */
848 static int stringmatchlen(const char *pattern
, int patternLen
,
849 const char *string
, int stringLen
, int nocase
)
854 while (pattern
[1] == '*') {
859 return 1; /* match */
861 if (stringmatchlen(pattern
+1, patternLen
-1,
862 string
, stringLen
, nocase
))
863 return 1; /* match */
867 return 0; /* no match */
871 return 0; /* no match */
881 not = pattern
[0] == '^';
888 if (pattern
[0] == '\\') {
891 if (pattern
[0] == string
[0])
893 } else if (pattern
[0] == ']') {
895 } else if (patternLen
== 0) {
899 } else if (pattern
[1] == '-' && patternLen
>= 3) {
900 int start
= pattern
[0];
901 int end
= pattern
[2];
909 start
= tolower(start
);
915 if (c
>= start
&& c
<= end
)
919 if (pattern
[0] == string
[0])
922 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
932 return 0; /* no match */
938 if (patternLen
>= 2) {
945 if (pattern
[0] != string
[0])
946 return 0; /* no match */
948 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
949 return 0; /* no match */
957 if (stringLen
== 0) {
958 while(*pattern
== '*') {
965 if (patternLen
== 0 && stringLen
== 0)
970 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
971 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
974 static void redisLog(int level
, const char *fmt
, ...) {
978 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
982 if (level
>= server
.verbosity
) {
988 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
989 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
990 vfprintf(fp
, fmt
, ap
);
996 if (server
.logfile
) fclose(fp
);
999 /*====================== Hash table type implementation ==================== */
1001 /* This is an hash table type that uses the SDS dynamic strings libary as
1002 * keys and radis objects as values (objects can hold SDS strings,
1005 static void dictVanillaFree(void *privdata
, void *val
)
1007 DICT_NOTUSED(privdata
);
1011 static void dictListDestructor(void *privdata
, void *val
)
1013 DICT_NOTUSED(privdata
);
1014 listRelease((list
*)val
);
1017 static int sdsDictKeyCompare(void *privdata
, const void *key1
,
1021 DICT_NOTUSED(privdata
);
1023 l1
= sdslen((sds
)key1
);
1024 l2
= sdslen((sds
)key2
);
1025 if (l1
!= l2
) return 0;
1026 return memcmp(key1
, key2
, l1
) == 0;
1029 static void dictRedisObjectDestructor(void *privdata
, void *val
)
1031 DICT_NOTUSED(privdata
);
1033 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
1037 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1040 const robj
*o1
= key1
, *o2
= key2
;
1041 return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1044 static unsigned int dictObjHash(const void *key
) {
1045 const robj
*o
= key
;
1046 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1049 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1052 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1055 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1056 o2
->encoding
== REDIS_ENCODING_INT
&&
1057 o1
->ptr
== o2
->ptr
) return 1;
1059 o1
= getDecodedObject(o1
);
1060 o2
= getDecodedObject(o2
);
1061 cmp
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1067 static unsigned int dictEncObjHash(const void *key
) {
1068 robj
*o
= (robj
*) key
;
1070 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1071 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1073 if (o
->encoding
== REDIS_ENCODING_INT
) {
1077 len
= snprintf(buf
,32,"%ld",(long)o
->ptr
);
1078 return dictGenHashFunction((unsigned char*)buf
, len
);
1082 o
= getDecodedObject(o
);
1083 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1090 /* Sets type and expires */
1091 static dictType setDictType
= {
1092 dictEncObjHash
, /* hash function */
1095 dictEncObjKeyCompare
, /* key compare */
1096 dictRedisObjectDestructor
, /* key destructor */
1097 NULL
/* val destructor */
1100 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1101 static dictType zsetDictType
= {
1102 dictEncObjHash
, /* hash function */
1105 dictEncObjKeyCompare
, /* key compare */
1106 dictRedisObjectDestructor
, /* key destructor */
1107 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1111 static dictType dbDictType
= {
1112 dictObjHash
, /* hash function */
1115 dictObjKeyCompare
, /* key compare */
1116 dictRedisObjectDestructor
, /* key destructor */
1117 dictRedisObjectDestructor
/* val destructor */
1121 static dictType keyptrDictType
= {
1122 dictObjHash
, /* hash function */
1125 dictObjKeyCompare
, /* key compare */
1126 dictRedisObjectDestructor
, /* key destructor */
1127 NULL
/* val destructor */
1130 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1131 static dictType hashDictType
= {
1132 dictEncObjHash
, /* hash function */
1135 dictEncObjKeyCompare
, /* key compare */
1136 dictRedisObjectDestructor
, /* key destructor */
1137 dictRedisObjectDestructor
/* val destructor */
1140 /* Keylist hash table type has unencoded redis objects as keys and
1141 * lists as values. It's used for blocking operations (BLPOP) and to
1142 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1143 static dictType keylistDictType
= {
1144 dictObjHash
, /* hash function */
1147 dictObjKeyCompare
, /* key compare */
1148 dictRedisObjectDestructor
, /* key destructor */
1149 dictListDestructor
/* val destructor */
1152 static void version();
1154 /* ========================= Random utility functions ======================= */
1156 /* Redis generally does not try to recover from out of memory conditions
1157 * when allocating objects or strings, it is not clear if it will be possible
1158 * to report this condition to the client since the networking layer itself
1159 * is based on heap allocation for send buffers, so we simply abort.
1160 * At least the code will be simpler to read... */
1161 static void oom(const char *msg
) {
1162 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1167 /* ====================== Redis server networking stuff ===================== */
1168 static void closeTimedoutClients(void) {
1171 time_t now
= time(NULL
);
1174 listRewind(server
.clients
,&li
);
1175 while ((ln
= listNext(&li
)) != NULL
) {
1176 c
= listNodeValue(ln
);
1177 if (server
.maxidletime
&&
1178 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1179 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1180 dictSize(c
->pubsub_channels
) == 0 && /* no timeout for pubsub */
1181 listLength(c
->pubsub_patterns
) == 0 &&
1182 (now
- c
->lastinteraction
> server
.maxidletime
))
1184 redisLog(REDIS_VERBOSE
,"Closing idle client");
1186 } else if (c
->flags
& REDIS_BLOCKED
) {
1187 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1188 addReply(c
,shared
.nullmultibulk
);
1189 unblockClientWaitingData(c
);
1195 static int htNeedsResize(dict
*dict
) {
1196 long long size
, used
;
1198 size
= dictSlots(dict
);
1199 used
= dictSize(dict
);
1200 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1201 (used
*100/size
< REDIS_HT_MINFILL
));
1204 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1205 * we resize the hash table to save memory */
1206 static void tryResizeHashTables(void) {
1209 for (j
= 0; j
< server
.dbnum
; j
++) {
1210 if (htNeedsResize(server
.db
[j
].dict
))
1211 dictResize(server
.db
[j
].dict
);
1212 if (htNeedsResize(server
.db
[j
].expires
))
1213 dictResize(server
.db
[j
].expires
);
1217 /* Our hash table implementation performs rehashing incrementally while
1218 * we write/read from the hash table. Still if the server is idle, the hash
1219 * table will use two tables for a long time. So we try to use 1 millisecond
1220 * of CPU time at every serverCron() loop in order to rehash some key. */
1221 static void incrementallyRehash(void) {
1224 for (j
= 0; j
< server
.dbnum
; j
++) {
1225 if (dictIsRehashing(server
.db
[j
].dict
)) {
1226 dictRehashMilliseconds(server
.db
[j
].dict
,1);
1227 break; /* already used our millisecond for this loop... */
1232 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1233 void backgroundSaveDoneHandler(int statloc
) {
1234 int exitcode
= WEXITSTATUS(statloc
);
1235 int bysignal
= WIFSIGNALED(statloc
);
1237 if (!bysignal
&& exitcode
== 0) {
1238 redisLog(REDIS_NOTICE
,
1239 "Background saving terminated with success");
1241 server
.lastsave
= time(NULL
);
1242 } else if (!bysignal
&& exitcode
!= 0) {
1243 redisLog(REDIS_WARNING
, "Background saving error");
1245 redisLog(REDIS_WARNING
,
1246 "Background saving terminated by signal %d", WTERMSIG(statloc
));
1247 rdbRemoveTempFile(server
.bgsavechildpid
);
1249 server
.bgsavechildpid
= -1;
1250 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1251 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1252 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1255 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1257 void backgroundRewriteDoneHandler(int statloc
) {
1258 int exitcode
= WEXITSTATUS(statloc
);
1259 int bysignal
= WIFSIGNALED(statloc
);
1261 if (!bysignal
&& exitcode
== 0) {
1265 redisLog(REDIS_NOTICE
,
1266 "Background append only file rewriting terminated with success");
1267 /* Now it's time to flush the differences accumulated by the parent */
1268 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1269 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1271 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1274 /* Flush our data... */
1275 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1276 (signed) sdslen(server
.bgrewritebuf
)) {
1277 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1281 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1282 /* Now our work is to rename the temp file into the stable file. And
1283 * switch the file descriptor used by the server for append only. */
1284 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1285 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1289 /* Mission completed... almost */
1290 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1291 if (server
.appendfd
!= -1) {
1292 /* If append only is actually enabled... */
1293 close(server
.appendfd
);
1294 server
.appendfd
= fd
;
1296 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1297 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1299 /* If append only is disabled we just generate a dump in this
1300 * format. Why not? */
1303 } else if (!bysignal
&& exitcode
!= 0) {
1304 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1306 redisLog(REDIS_WARNING
,
1307 "Background append only file rewriting terminated by signal %d",
1311 sdsfree(server
.bgrewritebuf
);
1312 server
.bgrewritebuf
= sdsempty();
1313 aofRemoveTempFile(server
.bgrewritechildpid
);
1314 server
.bgrewritechildpid
= -1;
1317 /* This function is called once a background process of some kind terminates,
1318 * as we want to avoid resizing the hash tables when there is a child in order
1319 * to play well with copy-on-write (otherwise when a resize happens lots of
1320 * memory pages are copied). The goal of this function is to update the ability
1321 * for dict.c to resize the hash tables accordingly to the fact we have o not
1322 * running childs. */
1323 static void updateDictResizePolicy(void) {
1324 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1)
1327 dictDisableResize();
1330 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1331 int j
, loops
= server
.cronloops
++;
1332 REDIS_NOTUSED(eventLoop
);
1334 REDIS_NOTUSED(clientData
);
1336 /* We take a cached value of the unix time in the global state because
1337 * with virtual memory and aging there is to store the current time
1338 * in objects at every object access, and accuracy is not needed.
1339 * To access a global var is faster than calling time(NULL) */
1340 server
.unixtime
= time(NULL
);
1342 /* Show some info about non-empty databases */
1343 for (j
= 0; j
< server
.dbnum
; j
++) {
1344 long long size
, used
, vkeys
;
1346 size
= dictSlots(server
.db
[j
].dict
);
1347 used
= dictSize(server
.db
[j
].dict
);
1348 vkeys
= dictSize(server
.db
[j
].expires
);
1349 if (!(loops
% 50) && (used
|| vkeys
)) {
1350 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1351 /* dictPrintStats(server.dict); */
1355 /* We don't want to resize the hash tables while a bacground saving
1356 * is in progress: the saving child is created using fork() that is
1357 * implemented with a copy-on-write semantic in most modern systems, so
1358 * if we resize the HT while there is the saving child at work actually
1359 * a lot of memory movements in the parent will cause a lot of pages
1361 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1) {
1362 if (!(loops
% 10)) tryResizeHashTables();
1363 if (server
.activerehashing
) incrementallyRehash();
1366 /* Show information about connected clients */
1367 if (!(loops
% 50)) {
1368 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use",
1369 listLength(server
.clients
)-listLength(server
.slaves
),
1370 listLength(server
.slaves
),
1371 zmalloc_used_memory());
1374 /* Close connections of timedout clients */
1375 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1376 closeTimedoutClients();
1378 /* Check if a background saving or AOF rewrite in progress terminated */
1379 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1383 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1384 if (pid
== server
.bgsavechildpid
) {
1385 backgroundSaveDoneHandler(statloc
);
1387 backgroundRewriteDoneHandler(statloc
);
1389 updateDictResizePolicy();
1392 /* If there is not a background saving in progress check if
1393 * we have to save now */
1394 time_t now
= time(NULL
);
1395 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1396 struct saveparam
*sp
= server
.saveparams
+j
;
1398 if (server
.dirty
>= sp
->changes
&&
1399 now
-server
.lastsave
> sp
->seconds
) {
1400 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1401 sp
->changes
, sp
->seconds
);
1402 rdbSaveBackground(server
.dbfilename
);
1408 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1409 * will use few CPU cycles if there are few expiring keys, otherwise
1410 * it will get more aggressive to avoid that too much memory is used by
1411 * keys that can be removed from the keyspace. */
1412 for (j
= 0; j
< server
.dbnum
; j
++) {
1414 redisDb
*db
= server
.db
+j
;
1416 /* Continue to expire if at the end of the cycle more than 25%
1417 * of the keys were expired. */
1419 long num
= dictSize(db
->expires
);
1420 time_t now
= time(NULL
);
1423 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1424 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1429 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1430 t
= (time_t) dictGetEntryVal(de
);
1432 deleteKey(db
,dictGetEntryKey(de
));
1434 server
.stat_expiredkeys
++;
1437 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1440 /* Swap a few keys on disk if we are over the memory limit and VM
1441 * is enbled. Try to free objects from the free list first. */
1442 if (vmCanSwapOut()) {
1443 while (server
.vm_enabled
&& zmalloc_used_memory() >
1444 server
.vm_max_memory
)
1448 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1449 retval
= (server
.vm_max_threads
== 0) ?
1450 vmSwapOneObjectBlocking() :
1451 vmSwapOneObjectThreaded();
1452 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1453 zmalloc_used_memory() >
1454 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1456 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1458 /* Note that when using threade I/O we free just one object,
1459 * because anyway when the I/O thread in charge to swap this
1460 * object out will finish, the handler of completed jobs
1461 * will try to swap more objects if we are still out of memory. */
1462 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1466 /* Check if we should connect to a MASTER */
1467 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1468 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1469 if (syncWithMaster() == REDIS_OK
) {
1470 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1476 /* This function gets called every time Redis is entering the
1477 * main loop of the event driven library, that is, before to sleep
1478 * for ready file descriptors. */
1479 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1480 REDIS_NOTUSED(eventLoop
);
1482 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1486 listRewind(server
.io_ready_clients
,&li
);
1487 while((ln
= listNext(&li
))) {
1488 redisClient
*c
= ln
->value
;
1489 struct redisCommand
*cmd
;
1491 /* Resume the client. */
1492 listDelNode(server
.io_ready_clients
,ln
);
1493 c
->flags
&= (~REDIS_IO_WAIT
);
1494 server
.vm_blocked_clients
--;
1495 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1496 readQueryFromClient
, c
);
1497 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1498 assert(cmd
!= NULL
);
1501 /* There may be more data to process in the input buffer. */
1502 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1503 processInputBuffer(c
);
1508 static void createSharedObjects(void) {
1511 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1512 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1513 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1514 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1515 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1516 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1517 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1518 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1519 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1520 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1521 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1522 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1523 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1524 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1525 "-ERR no such key\r\n"));
1526 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1527 "-ERR syntax error\r\n"));
1528 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1529 "-ERR source and destination objects are the same\r\n"));
1530 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1531 "-ERR index out of range\r\n"));
1532 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1533 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1534 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1535 shared
.select0
= createStringObject("select 0\r\n",10);
1536 shared
.select1
= createStringObject("select 1\r\n",10);
1537 shared
.select2
= createStringObject("select 2\r\n",10);
1538 shared
.select3
= createStringObject("select 3\r\n",10);
1539 shared
.select4
= createStringObject("select 4\r\n",10);
1540 shared
.select5
= createStringObject("select 5\r\n",10);
1541 shared
.select6
= createStringObject("select 6\r\n",10);
1542 shared
.select7
= createStringObject("select 7\r\n",10);
1543 shared
.select8
= createStringObject("select 8\r\n",10);
1544 shared
.select9
= createStringObject("select 9\r\n",10);
1545 shared
.messagebulk
= createStringObject("$7\r\nmessage\r\n",13);
1546 shared
.subscribebulk
= createStringObject("$9\r\nsubscribe\r\n",15);
1547 shared
.unsubscribebulk
= createStringObject("$11\r\nunsubscribe\r\n",18);
1548 shared
.psubscribebulk
= createStringObject("$10\r\npsubscribe\r\n",17);
1549 shared
.punsubscribebulk
= createStringObject("$12\r\npunsubscribe\r\n",19);
1550 shared
.mbulk3
= createStringObject("*3\r\n",4);
1551 for (j
= 0; j
< REDIS_SHARED_INTEGERS
; j
++) {
1552 shared
.integers
[j
] = createObject(REDIS_STRING
,(void*)(long)j
);
1553 shared
.integers
[j
]->encoding
= REDIS_ENCODING_INT
;
1557 static void appendServerSaveParams(time_t seconds
, int changes
) {
1558 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1559 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1560 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1561 server
.saveparamslen
++;
1564 static void resetServerSaveParams() {
1565 zfree(server
.saveparams
);
1566 server
.saveparams
= NULL
;
1567 server
.saveparamslen
= 0;
1570 static void initServerConfig() {
1571 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1572 server
.port
= REDIS_SERVERPORT
;
1573 server
.verbosity
= REDIS_VERBOSE
;
1574 server
.maxidletime
= REDIS_MAXIDLETIME
;
1575 server
.saveparams
= NULL
;
1576 server
.logfile
= NULL
; /* NULL = log on standard output */
1577 server
.bindaddr
= NULL
;
1578 server
.glueoutputbuf
= 1;
1579 server
.daemonize
= 0;
1580 server
.appendonly
= 0;
1581 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1582 server
.lastfsync
= time(NULL
);
1583 server
.appendfd
= -1;
1584 server
.appendseldb
= -1; /* Make sure the first time will not match */
1585 server
.pidfile
= zstrdup("/var/run/redis.pid");
1586 server
.dbfilename
= zstrdup("dump.rdb");
1587 server
.appendfilename
= zstrdup("appendonly.aof");
1588 server
.requirepass
= NULL
;
1589 server
.shareobjects
= 0;
1590 server
.rdbcompression
= 1;
1591 server
.activerehashing
= 1;
1592 server
.maxclients
= 0;
1593 server
.blpop_blocked_clients
= 0;
1594 server
.maxmemory
= 0;
1595 server
.vm_enabled
= 0;
1596 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1597 server
.vm_page_size
= 256; /* 256 bytes per page */
1598 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1599 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1600 server
.vm_max_threads
= 4;
1601 server
.vm_blocked_clients
= 0;
1602 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1603 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1605 resetServerSaveParams();
1607 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1608 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1609 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1610 /* Replication related */
1612 server
.masterauth
= NULL
;
1613 server
.masterhost
= NULL
;
1614 server
.masterport
= 6379;
1615 server
.master
= NULL
;
1616 server
.replstate
= REDIS_REPL_NONE
;
1618 /* Double constants initialization */
1620 R_PosInf
= 1.0/R_Zero
;
1621 R_NegInf
= -1.0/R_Zero
;
1622 R_Nan
= R_Zero
/R_Zero
;
1625 static void initServer() {
1628 signal(SIGHUP
, SIG_IGN
);
1629 signal(SIGPIPE
, SIG_IGN
);
1630 setupSigSegvAction();
1632 server
.devnull
= fopen("/dev/null","w");
1633 if (server
.devnull
== NULL
) {
1634 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1637 server
.clients
= listCreate();
1638 server
.slaves
= listCreate();
1639 server
.monitors
= listCreate();
1640 server
.objfreelist
= listCreate();
1641 createSharedObjects();
1642 server
.el
= aeCreateEventLoop();
1643 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1644 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1645 if (server
.fd
== -1) {
1646 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1649 for (j
= 0; j
< server
.dbnum
; j
++) {
1650 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1651 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1652 server
.db
[j
].blockingkeys
= dictCreate(&keylistDictType
,NULL
);
1653 if (server
.vm_enabled
)
1654 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1655 server
.db
[j
].id
= j
;
1657 server
.pubsub_channels
= dictCreate(&keylistDictType
,NULL
);
1658 server
.pubsub_patterns
= listCreate();
1659 listSetFreeMethod(server
.pubsub_patterns
,freePubsubPattern
);
1660 listSetMatchMethod(server
.pubsub_patterns
,listMatchPubsubPattern
);
1661 server
.cronloops
= 0;
1662 server
.bgsavechildpid
= -1;
1663 server
.bgrewritechildpid
= -1;
1664 server
.bgrewritebuf
= sdsempty();
1665 server
.lastsave
= time(NULL
);
1667 server
.stat_numcommands
= 0;
1668 server
.stat_numconnections
= 0;
1669 server
.stat_expiredkeys
= 0;
1670 server
.stat_starttime
= time(NULL
);
1671 server
.unixtime
= time(NULL
);
1672 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1673 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1674 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1676 if (server
.appendonly
) {
1677 int flags
= O_WRONLY
|O_APPEND
|O_CREAT
;
1679 #ifdef HAVE_O_DIRECT
1680 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
) {
1682 server
.appendfsync
= APPENDFSYNC_NO
;
1686 server
.appendfd
= open(server
.appendfilename
,flags
,0644);
1687 if (server
.appendfd
== -1) {
1688 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1694 if (server
.vm_enabled
) vmInit();
1697 /* Empty the whole database */
1698 static long long emptyDb() {
1700 long long removed
= 0;
1702 for (j
= 0; j
< server
.dbnum
; j
++) {
1703 removed
+= dictSize(server
.db
[j
].dict
);
1704 dictEmpty(server
.db
[j
].dict
);
1705 dictEmpty(server
.db
[j
].expires
);
1710 static int yesnotoi(char *s
) {
1711 if (!strcasecmp(s
,"yes")) return 1;
1712 else if (!strcasecmp(s
,"no")) return 0;
1716 /* I agree, this is a very rudimental way to load a configuration...
1717 will improve later if the config gets more complex */
1718 static void loadServerConfig(char *filename
) {
1720 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1724 if (filename
[0] == '-' && filename
[1] == '\0')
1727 if ((fp
= fopen(filename
,"r")) == NULL
) {
1728 redisLog(REDIS_WARNING
, "Fatal error, can't open config file '%s'", filename
);
1733 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1739 line
= sdstrim(line
," \t\r\n");
1741 /* Skip comments and blank lines*/
1742 if (line
[0] == '#' || line
[0] == '\0') {
1747 /* Split into arguments */
1748 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1749 sdstolower(argv
[0]);
1751 /* Execute config directives */
1752 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1753 server
.maxidletime
= atoi(argv
[1]);
1754 if (server
.maxidletime
< 0) {
1755 err
= "Invalid timeout value"; goto loaderr
;
1757 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1758 server
.port
= atoi(argv
[1]);
1759 if (server
.port
< 1 || server
.port
> 65535) {
1760 err
= "Invalid port"; goto loaderr
;
1762 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1763 server
.bindaddr
= zstrdup(argv
[1]);
1764 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1765 int seconds
= atoi(argv
[1]);
1766 int changes
= atoi(argv
[2]);
1767 if (seconds
< 1 || changes
< 0) {
1768 err
= "Invalid save parameters"; goto loaderr
;
1770 appendServerSaveParams(seconds
,changes
);
1771 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1772 if (chdir(argv
[1]) == -1) {
1773 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1774 argv
[1], strerror(errno
));
1777 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1778 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1779 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1780 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1781 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1783 err
= "Invalid log level. Must be one of debug, notice, warning";
1786 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1789 server
.logfile
= zstrdup(argv
[1]);
1790 if (!strcasecmp(server
.logfile
,"stdout")) {
1791 zfree(server
.logfile
);
1792 server
.logfile
= NULL
;
1794 if (server
.logfile
) {
1795 /* Test if we are able to open the file. The server will not
1796 * be able to abort just for this problem later... */
1797 logfp
= fopen(server
.logfile
,"a");
1798 if (logfp
== NULL
) {
1799 err
= sdscatprintf(sdsempty(),
1800 "Can't open the log file: %s", strerror(errno
));
1805 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1806 server
.dbnum
= atoi(argv
[1]);
1807 if (server
.dbnum
< 1) {
1808 err
= "Invalid number of databases"; goto loaderr
;
1810 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1811 loadServerConfig(argv
[1]);
1812 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1813 server
.maxclients
= atoi(argv
[1]);
1814 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1815 server
.maxmemory
= strtoll(argv
[1], NULL
, 10);
1816 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1817 server
.masterhost
= sdsnew(argv
[1]);
1818 server
.masterport
= atoi(argv
[2]);
1819 server
.replstate
= REDIS_REPL_CONNECT
;
1820 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1821 server
.masterauth
= zstrdup(argv
[1]);
1822 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1823 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1824 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1826 } else if (!strcasecmp(argv
[0],"shareobjects") && argc
== 2) {
1827 if ((server
.shareobjects
= yesnotoi(argv
[1])) == -1) {
1828 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1830 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1831 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1832 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1834 } else if (!strcasecmp(argv
[0],"activerehashing") && argc
== 2) {
1835 if ((server
.activerehashing
= yesnotoi(argv
[1])) == -1) {
1836 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1838 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1839 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1840 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1842 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1843 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1844 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1846 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
1847 if (!strcasecmp(argv
[1],"no")) {
1848 server
.appendfsync
= APPENDFSYNC_NO
;
1849 } else if (!strcasecmp(argv
[1],"always")) {
1850 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1851 } else if (!strcasecmp(argv
[1],"everysec")) {
1852 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1854 err
= "argument must be 'no', 'always' or 'everysec'";
1857 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
1858 server
.requirepass
= zstrdup(argv
[1]);
1859 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
1860 zfree(server
.pidfile
);
1861 server
.pidfile
= zstrdup(argv
[1]);
1862 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
1863 zfree(server
.dbfilename
);
1864 server
.dbfilename
= zstrdup(argv
[1]);
1865 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
1866 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
1867 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1869 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
1870 zfree(server
.vm_swap_file
);
1871 server
.vm_swap_file
= zstrdup(argv
[1]);
1872 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
1873 server
.vm_max_memory
= strtoll(argv
[1], NULL
, 10);
1874 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
1875 server
.vm_page_size
= strtoll(argv
[1], NULL
, 10);
1876 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
1877 server
.vm_pages
= strtoll(argv
[1], NULL
, 10);
1878 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1879 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1880 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
1881 server
.hash_max_zipmap_entries
= strtol(argv
[1], NULL
, 10);
1882 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
1883 server
.hash_max_zipmap_value
= strtol(argv
[1], NULL
, 10);
1884 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1885 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1887 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
1889 for (j
= 0; j
< argc
; j
++)
1894 if (fp
!= stdin
) fclose(fp
);
1898 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
1899 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
1900 fprintf(stderr
, ">>> '%s'\n", line
);
1901 fprintf(stderr
, "%s\n", err
);
1905 static void freeClientArgv(redisClient
*c
) {
1908 for (j
= 0; j
< c
->argc
; j
++)
1909 decrRefCount(c
->argv
[j
]);
1910 for (j
= 0; j
< c
->mbargc
; j
++)
1911 decrRefCount(c
->mbargv
[j
]);
1916 static void freeClient(redisClient
*c
) {
1919 /* Note that if the client we are freeing is blocked into a blocking
1920 * call, we have to set querybuf to NULL *before* to call
1921 * unblockClientWaitingData() to avoid processInputBuffer() will get
1922 * called. Also it is important to remove the file events after
1923 * this, because this call adds the READABLE event. */
1924 sdsfree(c
->querybuf
);
1926 if (c
->flags
& REDIS_BLOCKED
)
1927 unblockClientWaitingData(c
);
1929 /* Unsubscribe from all the pubsub channels */
1930 pubsubUnsubscribeAllChannels(c
,0);
1931 pubsubUnsubscribeAllPatterns(c
,0);
1932 dictRelease(c
->pubsub_channels
);
1933 listRelease(c
->pubsub_patterns
);
1934 /* Obvious cleanup */
1935 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
1936 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
1937 listRelease(c
->reply
);
1940 /* Remove from the list of clients */
1941 ln
= listSearchKey(server
.clients
,c
);
1942 redisAssert(ln
!= NULL
);
1943 listDelNode(server
.clients
,ln
);
1944 /* Remove from the list of clients waiting for swapped keys */
1945 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
1946 ln
= listSearchKey(server
.io_ready_clients
,c
);
1948 listDelNode(server
.io_ready_clients
,ln
);
1949 server
.vm_blocked_clients
--;
1952 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
1953 ln
= listFirst(c
->io_keys
);
1954 dontWaitForSwappedKey(c
,ln
->value
);
1956 listRelease(c
->io_keys
);
1957 /* Master/slave cleanup */
1958 if (c
->flags
& REDIS_SLAVE
) {
1959 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
1961 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
1962 ln
= listSearchKey(l
,c
);
1963 redisAssert(ln
!= NULL
);
1966 if (c
->flags
& REDIS_MASTER
) {
1967 server
.master
= NULL
;
1968 server
.replstate
= REDIS_REPL_CONNECT
;
1970 /* Release memory */
1973 freeClientMultiState(c
);
1977 #define GLUEREPLY_UP_TO (1024)
1978 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
1980 char buf
[GLUEREPLY_UP_TO
];
1985 listRewind(c
->reply
,&li
);
1986 while((ln
= listNext(&li
))) {
1990 objlen
= sdslen(o
->ptr
);
1991 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
1992 memcpy(buf
+copylen
,o
->ptr
,objlen
);
1994 listDelNode(c
->reply
,ln
);
1996 if (copylen
== 0) return;
2000 /* Now the output buffer is empty, add the new single element */
2001 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
2002 listAddNodeHead(c
->reply
,o
);
2005 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2006 redisClient
*c
= privdata
;
2007 int nwritten
= 0, totwritten
= 0, objlen
;
2010 REDIS_NOTUSED(mask
);
2012 /* Use writev() if we have enough buffers to send */
2013 if (!server
.glueoutputbuf
&&
2014 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
2015 !(c
->flags
& REDIS_MASTER
))
2017 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
2021 while(listLength(c
->reply
)) {
2022 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
2023 glueReplyBuffersIfNeeded(c
);
2025 o
= listNodeValue(listFirst(c
->reply
));
2026 objlen
= sdslen(o
->ptr
);
2029 listDelNode(c
->reply
,listFirst(c
->reply
));
2033 if (c
->flags
& REDIS_MASTER
) {
2034 /* Don't reply to a master */
2035 nwritten
= objlen
- c
->sentlen
;
2037 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
2038 if (nwritten
<= 0) break;
2040 c
->sentlen
+= nwritten
;
2041 totwritten
+= nwritten
;
2042 /* If we fully sent the object on head go to the next one */
2043 if (c
->sentlen
== objlen
) {
2044 listDelNode(c
->reply
,listFirst(c
->reply
));
2047 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
2048 * bytes, in a single threaded server it's a good idea to serve
2049 * other clients as well, even if a very large request comes from
2050 * super fast link that is always able to accept data (in real world
2051 * scenario think about 'KEYS *' against the loopback interfae) */
2052 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
2054 if (nwritten
== -1) {
2055 if (errno
== EAGAIN
) {
2058 redisLog(REDIS_VERBOSE
,
2059 "Error writing to client: %s", strerror(errno
));
2064 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
2065 if (listLength(c
->reply
) == 0) {
2067 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2071 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
2073 redisClient
*c
= privdata
;
2074 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
2076 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
2077 int offset
, ion
= 0;
2079 REDIS_NOTUSED(mask
);
2082 while (listLength(c
->reply
)) {
2083 offset
= c
->sentlen
;
2087 /* fill-in the iov[] array */
2088 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
2089 o
= listNodeValue(node
);
2090 objlen
= sdslen(o
->ptr
);
2092 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
2095 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2096 break; /* no more iovecs */
2098 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2099 iov
[ion
].iov_len
= objlen
- offset
;
2100 willwrite
+= objlen
- offset
;
2101 offset
= 0; /* just for the first item */
2108 /* write all collected blocks at once */
2109 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2110 if (errno
!= EAGAIN
) {
2111 redisLog(REDIS_VERBOSE
,
2112 "Error writing to client: %s", strerror(errno
));
2119 totwritten
+= nwritten
;
2120 offset
= c
->sentlen
;
2122 /* remove written robjs from c->reply */
2123 while (nwritten
&& listLength(c
->reply
)) {
2124 o
= listNodeValue(listFirst(c
->reply
));
2125 objlen
= sdslen(o
->ptr
);
2127 if(nwritten
>= objlen
- offset
) {
2128 listDelNode(c
->reply
, listFirst(c
->reply
));
2129 nwritten
-= objlen
- offset
;
2133 c
->sentlen
+= nwritten
;
2141 c
->lastinteraction
= time(NULL
);
2143 if (listLength(c
->reply
) == 0) {
2145 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2149 static struct redisCommand
*lookupCommand(char *name
) {
2151 while(cmdTable
[j
].name
!= NULL
) {
2152 if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
];
2158 /* resetClient prepare the client to process the next command */
2159 static void resetClient(redisClient
*c
) {
2165 /* Call() is the core of Redis execution of a command */
2166 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2169 dirty
= server
.dirty
;
2171 dirty
= server
.dirty
-dirty
;
2173 if (server
.appendonly
&& dirty
)
2174 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2175 if ((dirty
|| cmd
->flags
& REDIS_CMD_FORCE_REPLICATION
) &&
2176 listLength(server
.slaves
))
2177 replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
);
2178 if (listLength(server
.monitors
))
2179 replicationFeedSlaves(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
);
2180 server
.stat_numcommands
++;
2183 /* If this function gets called we already read a whole
2184 * command, argments are in the client argv/argc fields.
2185 * processCommand() execute the command or prepare the
2186 * server for a bulk read from the client.
2188 * If 1 is returned the client is still alive and valid and
2189 * and other operations can be performed by the caller. Otherwise
2190 * if 0 is returned the client was destroied (i.e. after QUIT). */
2191 static int processCommand(redisClient
*c
) {
2192 struct redisCommand
*cmd
;
2194 /* Free some memory if needed (maxmemory setting) */
2195 if (server
.maxmemory
) freeMemoryIfNeeded();
2197 /* Handle the multi bulk command type. This is an alternative protocol
2198 * supported by Redis in order to receive commands that are composed of
2199 * multiple binary-safe "bulk" arguments. The latency of processing is
2200 * a bit higher but this allows things like multi-sets, so if this
2201 * protocol is used only for MSET and similar commands this is a big win. */
2202 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2203 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2204 if (c
->multibulk
<= 0) {
2208 decrRefCount(c
->argv
[c
->argc
-1]);
2212 } else if (c
->multibulk
) {
2213 if (c
->bulklen
== -1) {
2214 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2215 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2219 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2220 decrRefCount(c
->argv
[0]);
2221 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2223 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2228 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2232 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2233 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2237 if (c
->multibulk
== 0) {
2241 /* Here we need to swap the multi-bulk argc/argv with the
2242 * normal argc/argv of the client structure. */
2244 c
->argv
= c
->mbargv
;
2245 c
->mbargv
= auxargv
;
2248 c
->argc
= c
->mbargc
;
2249 c
->mbargc
= auxargc
;
2251 /* We need to set bulklen to something different than -1
2252 * in order for the code below to process the command without
2253 * to try to read the last argument of a bulk command as
2254 * a special argument. */
2256 /* continue below and process the command */
2263 /* -- end of multi bulk commands processing -- */
2265 /* The QUIT command is handled as a special case. Normal command
2266 * procs are unable to close the client connection safely */
2267 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2272 /* Now lookup the command and check ASAP about trivial error conditions
2273 * such wrong arity, bad command name and so forth. */
2274 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2277 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2278 (char*)c
->argv
[0]->ptr
));
2281 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2282 (c
->argc
< -cmd
->arity
)) {
2284 sdscatprintf(sdsempty(),
2285 "-ERR wrong number of arguments for '%s' command\r\n",
2289 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2290 /* This is a bulk command, we have to read the last argument yet. */
2291 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2293 decrRefCount(c
->argv
[c
->argc
-1]);
2294 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2296 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2301 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2302 /* It is possible that the bulk read is already in the
2303 * buffer. Check this condition and handle it accordingly.
2304 * This is just a fast path, alternative to call processInputBuffer().
2305 * It's a good idea since the code is small and this condition
2306 * happens most of the times. */
2307 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2308 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2310 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2312 /* Otherwise return... there is to read the last argument
2313 * from the socket. */
2317 /* Let's try to encode the bulk object to save space. */
2318 if (cmd
->flags
& REDIS_CMD_BULK
)
2319 c
->argv
[c
->argc
-1] = tryObjectEncoding(c
->argv
[c
->argc
-1]);
2321 /* Check if the user is authenticated */
2322 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2323 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2328 /* Handle the maxmemory directive */
2329 if (server
.maxmemory
&& (cmd
->flags
& REDIS_CMD_DENYOOM
) &&
2330 zmalloc_used_memory() > server
.maxmemory
)
2332 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2337 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
2338 if ((dictSize(c
->pubsub_channels
) > 0 || listLength(c
->pubsub_patterns
) > 0)
2340 cmd
->proc
!= subscribeCommand
&& cmd
->proc
!= unsubscribeCommand
&&
2341 cmd
->proc
!= psubscribeCommand
&& cmd
->proc
!= punsubscribeCommand
) {
2342 addReplySds(c
,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n"));
2347 /* Exec the command */
2348 if (c
->flags
& REDIS_MULTI
&& cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
) {
2349 queueMultiCommand(c
,cmd
);
2350 addReply(c
,shared
.queued
);
2352 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2353 blockClientOnSwappedKeys(cmd
,c
)) return 1;
2357 /* Prepare the client for the next command */
2362 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
) {
2367 /* We need 1+(ARGS*3) objects since commands are using the new protocol
2368 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
2369 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
2370 robj
*static_outv
[REDIS_STATIC_ARGS
*3+1];
2373 if (argc
<= REDIS_STATIC_ARGS
) {
2376 outv
= zmalloc(sizeof(robj
*)*(argc
*3+1));
2379 lenobj
= createObject(REDIS_STRING
,
2380 sdscatprintf(sdsempty(), "*%d\r\n", argc
));
2381 lenobj
->refcount
= 0;
2382 outv
[outc
++] = lenobj
;
2383 for (j
= 0; j
< argc
; j
++) {
2384 lenobj
= createObject(REDIS_STRING
,
2385 sdscatprintf(sdsempty(),"$%lu\r\n",
2386 (unsigned long) stringObjectLen(argv
[j
])));
2387 lenobj
->refcount
= 0;
2388 outv
[outc
++] = lenobj
;
2389 outv
[outc
++] = argv
[j
];
2390 outv
[outc
++] = shared
.crlf
;
2393 /* Increment all the refcounts at start and decrement at end in order to
2394 * be sure to free objects if there is no slave in a replication state
2395 * able to be feed with commands */
2396 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2397 listRewind(slaves
,&li
);
2398 while((ln
= listNext(&li
))) {
2399 redisClient
*slave
= ln
->value
;
2401 /* Don't feed slaves that are still waiting for BGSAVE to start */
2402 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2404 /* Feed all the other slaves, MONITORs and so on */
2405 if (slave
->slaveseldb
!= dictid
) {
2409 case 0: selectcmd
= shared
.select0
; break;
2410 case 1: selectcmd
= shared
.select1
; break;
2411 case 2: selectcmd
= shared
.select2
; break;
2412 case 3: selectcmd
= shared
.select3
; break;
2413 case 4: selectcmd
= shared
.select4
; break;
2414 case 5: selectcmd
= shared
.select5
; break;
2415 case 6: selectcmd
= shared
.select6
; break;
2416 case 7: selectcmd
= shared
.select7
; break;
2417 case 8: selectcmd
= shared
.select8
; break;
2418 case 9: selectcmd
= shared
.select9
; break;
2420 selectcmd
= createObject(REDIS_STRING
,
2421 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2422 selectcmd
->refcount
= 0;
2425 addReply(slave
,selectcmd
);
2426 slave
->slaveseldb
= dictid
;
2428 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2430 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2431 if (outv
!= static_outv
) zfree(outv
);
2434 static void processInputBuffer(redisClient
*c
) {
2436 /* Before to process the input buffer, make sure the client is not
2437 * waitig for a blocking operation such as BLPOP. Note that the first
2438 * iteration the client is never blocked, otherwise the processInputBuffer
2439 * would not be called at all, but after the execution of the first commands
2440 * in the input buffer the client may be blocked, and the "goto again"
2441 * will try to reiterate. The following line will make it return asap. */
2442 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2443 if (c
->bulklen
== -1) {
2444 /* Read the first line of the query */
2445 char *p
= strchr(c
->querybuf
,'\n');
2452 query
= c
->querybuf
;
2453 c
->querybuf
= sdsempty();
2454 querylen
= 1+(p
-(query
));
2455 if (sdslen(query
) > querylen
) {
2456 /* leave data after the first line of the query in the buffer */
2457 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2459 *p
= '\0'; /* remove "\n" */
2460 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2461 sdsupdatelen(query
);
2463 /* Now we can split the query in arguments */
2464 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2467 if (c
->argv
) zfree(c
->argv
);
2468 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2470 for (j
= 0; j
< argc
; j
++) {
2471 if (sdslen(argv
[j
])) {
2472 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2480 /* Execute the command. If the client is still valid
2481 * after processCommand() return and there is something
2482 * on the query buffer try to process the next command. */
2483 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2485 /* Nothing to process, argc == 0. Just process the query
2486 * buffer if it's not empty or return to the caller */
2487 if (sdslen(c
->querybuf
)) goto again
;
2490 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2491 redisLog(REDIS_VERBOSE
, "Client protocol error");
2496 /* Bulk read handling. Note that if we are at this point
2497 the client already sent a command terminated with a newline,
2498 we are reading the bulk data that is actually the last
2499 argument of the command. */
2500 int qbl
= sdslen(c
->querybuf
);
2502 if (c
->bulklen
<= qbl
) {
2503 /* Copy everything but the final CRLF as final argument */
2504 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2506 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2507 /* Process the command. If the client is still valid after
2508 * the processing and there is more data in the buffer
2509 * try to parse it. */
2510 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2516 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2517 redisClient
*c
= (redisClient
*) privdata
;
2518 char buf
[REDIS_IOBUF_LEN
];
2521 REDIS_NOTUSED(mask
);
2523 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2525 if (errno
== EAGAIN
) {
2528 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2532 } else if (nread
== 0) {
2533 redisLog(REDIS_VERBOSE
, "Client closed connection");
2538 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2539 c
->lastinteraction
= time(NULL
);
2543 processInputBuffer(c
);
2546 static int selectDb(redisClient
*c
, int id
) {
2547 if (id
< 0 || id
>= server
.dbnum
)
2549 c
->db
= &server
.db
[id
];
2553 static void *dupClientReplyValue(void *o
) {
2554 incrRefCount((robj
*)o
);
2558 static int listMatchObjects(void *a
, void *b
) {
2559 return compareStringObjects(a
,b
) == 0;
2562 static redisClient
*createClient(int fd
) {
2563 redisClient
*c
= zmalloc(sizeof(*c
));
2565 anetNonBlock(NULL
,fd
);
2566 anetTcpNoDelay(NULL
,fd
);
2567 if (!c
) return NULL
;
2570 c
->querybuf
= sdsempty();
2579 c
->lastinteraction
= time(NULL
);
2580 c
->authenticated
= 0;
2581 c
->replstate
= REDIS_REPL_NONE
;
2582 c
->reply
= listCreate();
2583 listSetFreeMethod(c
->reply
,decrRefCount
);
2584 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2585 c
->blockingkeys
= NULL
;
2586 c
->blockingkeysnum
= 0;
2587 c
->io_keys
= listCreate();
2588 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2589 c
->pubsub_channels
= dictCreate(&setDictType
,NULL
);
2590 c
->pubsub_patterns
= listCreate();
2591 listSetFreeMethod(c
->pubsub_patterns
,decrRefCount
);
2592 listSetMatchMethod(c
->pubsub_patterns
,listMatchObjects
);
2593 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2594 readQueryFromClient
, c
) == AE_ERR
) {
2598 listAddNodeTail(server
.clients
,c
);
2599 initClientMultiState(c
);
2603 static void addReply(redisClient
*c
, robj
*obj
) {
2604 if (listLength(c
->reply
) == 0 &&
2605 (c
->replstate
== REDIS_REPL_NONE
||
2606 c
->replstate
== REDIS_REPL_ONLINE
) &&
2607 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2608 sendReplyToClient
, c
) == AE_ERR
) return;
2610 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2611 obj
= dupStringObject(obj
);
2612 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2614 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2617 static void addReplySds(redisClient
*c
, sds s
) {
2618 robj
*o
= createObject(REDIS_STRING
,s
);
2623 static void addReplyDouble(redisClient
*c
, double d
) {
2626 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2627 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2628 (unsigned long) strlen(buf
),buf
));
2631 static void addReplyLong(redisClient
*c
, long l
) {
2636 addReply(c
,shared
.czero
);
2638 } else if (l
== 1) {
2639 addReply(c
,shared
.cone
);
2642 len
= snprintf(buf
,sizeof(buf
),":%ld\r\n",l
);
2643 addReplySds(c
,sdsnewlen(buf
,len
));
2646 static void addReplyLongLong(redisClient
*c
, long long ll
) {
2651 addReply(c
,shared
.czero
);
2653 } else if (ll
== 1) {
2654 addReply(c
,shared
.cone
);
2657 len
= snprintf(buf
,sizeof(buf
),":%lld\r\n",ll
);
2658 addReplySds(c
,sdsnewlen(buf
,len
));
2661 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2666 addReply(c
,shared
.czero
);
2668 } else if (ul
== 1) {
2669 addReply(c
,shared
.cone
);
2672 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2673 addReplySds(c
,sdsnewlen(buf
,len
));
2676 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2679 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2680 len
= sdslen(obj
->ptr
);
2682 long n
= (long)obj
->ptr
;
2684 /* Compute how many bytes will take this integer as a radix 10 string */
2690 while((n
= n
/10) != 0) {
2694 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",(unsigned long)len
));
2697 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2698 addReplyBulkLen(c
,obj
);
2700 addReply(c
,shared
.crlf
);
2703 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2704 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2706 addReply(c
,shared
.nullbulk
);
2708 robj
*o
= createStringObject(s
,strlen(s
));
2714 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2719 REDIS_NOTUSED(mask
);
2720 REDIS_NOTUSED(privdata
);
2722 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2723 if (cfd
== AE_ERR
) {
2724 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2727 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2728 if ((c
= createClient(cfd
)) == NULL
) {
2729 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2730 close(cfd
); /* May be already closed, just ingore errors */
2733 /* If maxclient directive is set and this is one client more... close the
2734 * connection. Note that we create the client instead to check before
2735 * for this condition, since now the socket is already set in nonblocking
2736 * mode and we can send an error for free using the Kernel I/O */
2737 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2738 char *err
= "-ERR max number of clients reached\r\n";
2740 /* That's a best effort error message, don't check write errors */
2741 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2742 /* Nothing to do, Just to avoid the warning... */
2747 server
.stat_numconnections
++;
2750 /* ======================= Redis objects implementation ===================== */
2752 static robj
*createObject(int type
, void *ptr
) {
2755 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2756 if (listLength(server
.objfreelist
)) {
2757 listNode
*head
= listFirst(server
.objfreelist
);
2758 o
= listNodeValue(head
);
2759 listDelNode(server
.objfreelist
,head
);
2760 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2762 if (server
.vm_enabled
) {
2763 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2764 o
= zmalloc(sizeof(*o
));
2766 o
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
));
2770 o
->encoding
= REDIS_ENCODING_RAW
;
2773 if (server
.vm_enabled
) {
2774 /* Note that this code may run in the context of an I/O thread
2775 * and accessing to server.unixtime in theory is an error
2776 * (no locks). But in practice this is safe, and even if we read
2777 * garbage Redis will not fail, as it's just a statistical info */
2778 o
->vm
.atime
= server
.unixtime
;
2779 o
->storage
= REDIS_VM_MEMORY
;
2784 static robj
*createStringObject(char *ptr
, size_t len
) {
2785 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
2788 static robj
*createStringObjectFromLongLong(long long value
) {
2790 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
2791 incrRefCount(shared
.integers
[value
]);
2792 o
= shared
.integers
[value
];
2794 o
= createObject(REDIS_STRING
, NULL
);
2795 if (value
>= LONG_MIN
&& value
<= LONG_MAX
) {
2796 o
->encoding
= REDIS_ENCODING_INT
;
2797 o
->ptr
= (void*)((long)value
);
2799 o
->ptr
= sdscatprintf(sdsempty(),"%lld",value
);
2805 static robj
*dupStringObject(robj
*o
) {
2806 assert(o
->encoding
== REDIS_ENCODING_RAW
);
2807 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
2810 static robj
*createListObject(void) {
2811 list
*l
= listCreate();
2813 listSetFreeMethod(l
,decrRefCount
);
2814 return createObject(REDIS_LIST
,l
);
2817 static robj
*createSetObject(void) {
2818 dict
*d
= dictCreate(&setDictType
,NULL
);
2819 return createObject(REDIS_SET
,d
);
2822 static robj
*createHashObject(void) {
2823 /* All the Hashes start as zipmaps. Will be automatically converted
2824 * into hash tables if there are enough elements or big elements
2826 unsigned char *zm
= zipmapNew();
2827 robj
*o
= createObject(REDIS_HASH
,zm
);
2828 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
2832 static robj
*createZsetObject(void) {
2833 zset
*zs
= zmalloc(sizeof(*zs
));
2835 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
2836 zs
->zsl
= zslCreate();
2837 return createObject(REDIS_ZSET
,zs
);
2840 static void freeStringObject(robj
*o
) {
2841 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2846 static void freeListObject(robj
*o
) {
2847 listRelease((list
*) o
->ptr
);
2850 static void freeSetObject(robj
*o
) {
2851 dictRelease((dict
*) o
->ptr
);
2854 static void freeZsetObject(robj
*o
) {
2857 dictRelease(zs
->dict
);
2862 static void freeHashObject(robj
*o
) {
2863 switch (o
->encoding
) {
2864 case REDIS_ENCODING_HT
:
2865 dictRelease((dict
*) o
->ptr
);
2867 case REDIS_ENCODING_ZIPMAP
:
2871 redisPanic("Unknown hash encoding type");
2876 static void incrRefCount(robj
*o
) {
2880 static void decrRefCount(void *obj
) {
2883 if (o
->refcount
<= 0) redisPanic("decrRefCount against refcount <= 0");
2884 /* Object is a key of a swapped out value, or in the process of being
2886 if (server
.vm_enabled
&&
2887 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
2889 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
);
2890 redisAssert(o
->type
== REDIS_STRING
);
2891 freeStringObject(o
);
2892 vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
);
2893 pthread_mutex_lock(&server
.obj_freelist_mutex
);
2894 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2895 !listAddNodeHead(server
.objfreelist
,o
))
2897 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2898 server
.vm_stats_swapped_objects
--;
2901 /* Object is in memory, or in the process of being swapped out. */
2902 if (--(o
->refcount
) == 0) {
2903 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
2904 vmCancelThreadedIOJob(obj
);
2906 case REDIS_STRING
: freeStringObject(o
); break;
2907 case REDIS_LIST
: freeListObject(o
); break;
2908 case REDIS_SET
: freeSetObject(o
); break;
2909 case REDIS_ZSET
: freeZsetObject(o
); break;
2910 case REDIS_HASH
: freeHashObject(o
); break;
2911 default: redisPanic("Unknown object type"); break;
2913 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2914 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2915 !listAddNodeHead(server
.objfreelist
,o
))
2917 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2921 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
2922 dictEntry
*de
= dictFind(db
->dict
,key
);
2924 robj
*key
= dictGetEntryKey(de
);
2925 robj
*val
= dictGetEntryVal(de
);
2927 if (server
.vm_enabled
) {
2928 if (key
->storage
== REDIS_VM_MEMORY
||
2929 key
->storage
== REDIS_VM_SWAPPING
)
2931 /* If we were swapping the object out, stop it, this key
2933 if (key
->storage
== REDIS_VM_SWAPPING
)
2934 vmCancelThreadedIOJob(key
);
2935 /* Update the access time of the key for the aging algorithm. */
2936 key
->vm
.atime
= server
.unixtime
;
2938 int notify
= (key
->storage
== REDIS_VM_LOADING
);
2940 /* Our value was swapped on disk. Bring it at home. */
2941 redisAssert(val
== NULL
);
2942 val
= vmLoadObject(key
);
2943 dictGetEntryVal(de
) = val
;
2945 /* Clients blocked by the VM subsystem may be waiting for
2947 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
2956 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
2957 expireIfNeeded(db
,key
);
2958 return lookupKey(db
,key
);
2961 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
2962 deleteIfVolatile(db
,key
);
2963 return lookupKey(db
,key
);
2966 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
2967 robj
*o
= lookupKeyRead(c
->db
, key
);
2968 if (!o
) addReply(c
,reply
);
2972 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
2973 robj
*o
= lookupKeyWrite(c
->db
, key
);
2974 if (!o
) addReply(c
,reply
);
2978 static int checkType(redisClient
*c
, robj
*o
, int type
) {
2979 if (o
->type
!= type
) {
2980 addReply(c
,shared
.wrongtypeerr
);
2986 static int deleteKey(redisDb
*db
, robj
*key
) {
2989 /* We need to protect key from destruction: after the first dictDelete()
2990 * it may happen that 'key' is no longer valid if we don't increment
2991 * it's count. This may happen when we get the object reference directly
2992 * from the hash table with dictRandomKey() or dict iterators */
2994 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
);
2995 retval
= dictDelete(db
->dict
,key
);
2998 return retval
== DICT_OK
;
3001 /* Check if the nul-terminated string 's' can be represented by a long
3002 * (that is, is a number that fits into long without any other space or
3003 * character before or after the digits).
3005 * If so, the function returns REDIS_OK and *longval is set to the value
3006 * of the number. Otherwise REDIS_ERR is returned */
3007 static int isStringRepresentableAsLong(sds s
, long *longval
) {
3008 char buf
[32], *endptr
;
3012 value
= strtol(s
, &endptr
, 10);
3013 if (endptr
[0] != '\0') return REDIS_ERR
;
3014 slen
= snprintf(buf
,32,"%ld",value
);
3016 /* If the number converted back into a string is not identical
3017 * then it's not possible to encode the string as integer */
3018 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
3019 if (longval
) *longval
= value
;
3023 /* Try to encode a string object in order to save space */
3024 static robj
*tryObjectEncoding(robj
*o
) {
3028 if (o
->encoding
!= REDIS_ENCODING_RAW
)
3029 return o
; /* Already encoded */
3031 /* It's not safe to encode shared objects: shared objects can be shared
3032 * everywhere in the "object space" of Redis. Encoded objects can only
3033 * appear as "values" (and not, for instance, as keys) */
3034 if (o
->refcount
> 1) return o
;
3036 /* Currently we try to encode only strings */
3037 redisAssert(o
->type
== REDIS_STRING
);
3039 /* Check if we can represent this string as a long integer */
3040 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return o
;
3042 /* Ok, this object can be encoded */
3043 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
3045 incrRefCount(shared
.integers
[value
]);
3046 return shared
.integers
[value
];
3048 o
->encoding
= REDIS_ENCODING_INT
;
3050 o
->ptr
= (void*) value
;
3055 /* Get a decoded version of an encoded object (returned as a new object).
3056 * If the object is already raw-encoded just increment the ref count. */
3057 static robj
*getDecodedObject(robj
*o
) {
3060 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3064 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
3067 snprintf(buf
,32,"%ld",(long)o
->ptr
);
3068 dec
= createStringObject(buf
,strlen(buf
));
3071 redisPanic("Unknown encoding type");
3075 /* Compare two string objects via strcmp() or alike.
3076 * Note that the objects may be integer-encoded. In such a case we
3077 * use snprintf() to get a string representation of the numbers on the stack
3078 * and compare the strings, it's much faster than calling getDecodedObject().
3080 * Important note: if objects are not integer encoded, but binary-safe strings,
3081 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
3083 static int compareStringObjects(robj
*a
, robj
*b
) {
3084 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
3085 char bufa
[128], bufb
[128], *astr
, *bstr
;
3088 if (a
== b
) return 0;
3089 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
3090 snprintf(bufa
,sizeof(bufa
),"%ld",(long) a
->ptr
);
3096 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
3097 snprintf(bufb
,sizeof(bufb
),"%ld",(long) b
->ptr
);
3103 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
3106 static size_t stringObjectLen(robj
*o
) {
3107 redisAssert(o
->type
== REDIS_STRING
);
3108 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3109 return sdslen(o
->ptr
);
3113 return snprintf(buf
,32,"%ld",(long)o
->ptr
);
3117 static int getDoubleFromObject(robj
*o
, double *target
) {
3124 redisAssert(o
->type
== REDIS_STRING
);
3125 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3126 value
= strtod(o
->ptr
, &eptr
);
3127 if (eptr
[0] != '\0') return REDIS_ERR
;
3128 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3129 value
= (long)o
->ptr
;
3131 redisAssert(1 != 1);
3139 static int getDoubleFromObjectOrReply(redisClient
*c
, robj
*o
, double *target
, const char *msg
) {
3141 if (getDoubleFromObject(o
, &value
) != REDIS_OK
) {
3143 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3145 addReplySds(c
, sdsnew("-ERR value is not a double\r\n"));
3154 static int getLongLongFromObject(robj
*o
, long long *target
) {
3161 redisAssert(o
->type
== REDIS_STRING
);
3162 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3163 value
= strtoll(o
->ptr
, &eptr
, 10);
3164 if (eptr
[0] != '\0') return REDIS_ERR
;
3165 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3166 value
= (long)o
->ptr
;
3168 redisAssert(1 != 1);
3176 static int getLongLongFromObjectOrReply(redisClient
*c
, robj
*o
, long long *target
, const char *msg
) {
3178 if (getLongLongFromObject(o
, &value
) != REDIS_OK
) {
3180 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3182 addReplySds(c
, sdsnew("-ERR value is not an integer\r\n"));
3191 static int getLongFromObjectOrReply(redisClient
*c
, robj
*o
, long *target
, const char *msg
) {
3194 if (getLongLongFromObjectOrReply(c
, o
, &value
, msg
) != REDIS_OK
) return REDIS_ERR
;
3195 if (value
< LONG_MIN
|| value
> LONG_MAX
) {
3197 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3199 addReplySds(c
, sdsnew("-ERR value is out of range\r\n"));
3208 /*============================ RDB saving/loading =========================== */
3210 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3211 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3215 static int rdbSaveTime(FILE *fp
, time_t t
) {
3216 int32_t t32
= (int32_t) t
;
3217 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3221 /* check rdbLoadLen() comments for more info */
3222 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3223 unsigned char buf
[2];
3226 /* Save a 6 bit len */
3227 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3228 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3229 } else if (len
< (1<<14)) {
3230 /* Save a 14 bit len */
3231 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3233 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3235 /* Save a 32 bit len */
3236 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3237 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3239 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3244 /* String objects in the form "2391" "-100" without any space and with a
3245 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3246 * encoded as integers to save space */
3247 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3249 char *endptr
, buf
[32];
3251 /* Check if it's possible to encode this value as a number */
3252 value
= strtoll(s
, &endptr
, 10);
3253 if (endptr
[0] != '\0') return 0;
3254 snprintf(buf
,32,"%lld",value
);
3256 /* If the number converted back into a string is not identical
3257 * then it's not possible to encode the string as integer */
3258 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3260 /* Finally check if it fits in our ranges */
3261 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3262 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3263 enc
[1] = value
&0xFF;
3265 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3266 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3267 enc
[1] = value
&0xFF;
3268 enc
[2] = (value
>>8)&0xFF;
3270 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3271 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3272 enc
[1] = value
&0xFF;
3273 enc
[2] = (value
>>8)&0xFF;
3274 enc
[3] = (value
>>16)&0xFF;
3275 enc
[4] = (value
>>24)&0xFF;
3282 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3283 size_t comprlen
, outlen
;
3287 /* We require at least four bytes compression for this to be worth it */
3288 if (len
<= 4) return 0;
3290 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3291 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3292 if (comprlen
== 0) {
3296 /* Data compressed! Let's save it on disk */
3297 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3298 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3299 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3300 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3301 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3310 /* Save a string objet as [len][data] on disk. If the object is a string
3311 * representation of an integer value we try to safe it in a special form */
3312 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3315 /* Try integer encoding */
3317 unsigned char buf
[5];
3318 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3319 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3324 /* Try LZF compression - under 20 bytes it's unable to compress even
3325 * aaaaaaaaaaaaaaaaaa so skip it */
3326 if (server
.rdbcompression
&& len
> 20) {
3329 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3330 if (retval
== -1) return -1;
3331 if (retval
> 0) return 0;
3332 /* retval == 0 means data can't be compressed, save the old way */
3335 /* Store verbatim */
3336 if (rdbSaveLen(fp
,len
) == -1) return -1;
3337 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3341 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3342 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3345 /* Avoid incr/decr ref count business when possible.
3346 * This plays well with copy-on-write given that we are probably
3347 * in a child process (BGSAVE). Also this makes sure key objects
3348 * of swapped objects are not incRefCount-ed (an assert does not allow
3349 * this in order to avoid bugs) */
3350 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
3351 obj
= getDecodedObject(obj
);
3352 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3355 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3360 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3361 * 8 bit integer specifing the length of the representation.
3362 * This 8 bit integer has special values in order to specify the following
3368 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3369 unsigned char buf
[128];
3375 } else if (!isfinite(val
)) {
3377 buf
[0] = (val
< 0) ? 255 : 254;
3379 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3380 buf
[0] = strlen((char*)buf
+1);
3383 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3387 /* Save a Redis object. */
3388 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3389 if (o
->type
== REDIS_STRING
) {
3390 /* Save a string value */
3391 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3392 } else if (o
->type
== REDIS_LIST
) {
3393 /* Save a list value */
3394 list
*list
= o
->ptr
;
3398 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3399 listRewind(list
,&li
);
3400 while((ln
= listNext(&li
))) {
3401 robj
*eleobj
= listNodeValue(ln
);
3403 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3405 } else if (o
->type
== REDIS_SET
) {
3406 /* Save a set value */
3408 dictIterator
*di
= dictGetIterator(set
);
3411 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3412 while((de
= dictNext(di
)) != NULL
) {
3413 robj
*eleobj
= dictGetEntryKey(de
);
3415 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3417 dictReleaseIterator(di
);
3418 } else if (o
->type
== REDIS_ZSET
) {
3419 /* Save a set value */
3421 dictIterator
*di
= dictGetIterator(zs
->dict
);
3424 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3425 while((de
= dictNext(di
)) != NULL
) {
3426 robj
*eleobj
= dictGetEntryKey(de
);
3427 double *score
= dictGetEntryVal(de
);
3429 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3430 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3432 dictReleaseIterator(di
);
3433 } else if (o
->type
== REDIS_HASH
) {
3434 /* Save a hash value */
3435 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3436 unsigned char *p
= zipmapRewind(o
->ptr
);
3437 unsigned int count
= zipmapLen(o
->ptr
);
3438 unsigned char *key
, *val
;
3439 unsigned int klen
, vlen
;
3441 if (rdbSaveLen(fp
,count
) == -1) return -1;
3442 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3443 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3444 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3447 dictIterator
*di
= dictGetIterator(o
->ptr
);
3450 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3451 while((de
= dictNext(di
)) != NULL
) {
3452 robj
*key
= dictGetEntryKey(de
);
3453 robj
*val
= dictGetEntryVal(de
);
3455 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3456 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3458 dictReleaseIterator(di
);
3461 redisPanic("Unknown object type");
3466 /* Return the length the object will have on disk if saved with
3467 * the rdbSaveObject() function. Currently we use a trick to get
3468 * this length with very little changes to the code. In the future
3469 * we could switch to a faster solution. */
3470 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3471 if (fp
== NULL
) fp
= server
.devnull
;
3473 assert(rdbSaveObject(fp
,o
) != 1);
3477 /* Return the number of pages required to save this object in the swap file */
3478 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3479 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3481 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3484 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3485 static int rdbSave(char *filename
) {
3486 dictIterator
*di
= NULL
;
3491 time_t now
= time(NULL
);
3493 /* Wait for I/O therads to terminate, just in case this is a
3494 * foreground-saving, to avoid seeking the swap file descriptor at the
3496 if (server
.vm_enabled
)
3497 waitEmptyIOJobsQueue();
3499 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3500 fp
= fopen(tmpfile
,"w");
3502 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3505 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3506 for (j
= 0; j
< server
.dbnum
; j
++) {
3507 redisDb
*db
= server
.db
+j
;
3509 if (dictSize(d
) == 0) continue;
3510 di
= dictGetIterator(d
);
3516 /* Write the SELECT DB opcode */
3517 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3518 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3520 /* Iterate this DB writing every entry */
3521 while((de
= dictNext(di
)) != NULL
) {
3522 robj
*key
= dictGetEntryKey(de
);
3523 robj
*o
= dictGetEntryVal(de
);
3524 time_t expiretime
= getExpire(db
,key
);
3526 /* Save the expire time */
3527 if (expiretime
!= -1) {
3528 /* If this key is already expired skip it */
3529 if (expiretime
< now
) continue;
3530 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3531 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3533 /* Save the key and associated value. This requires special
3534 * handling if the value is swapped out. */
3535 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
3536 key
->storage
== REDIS_VM_SWAPPING
) {
3537 /* Save type, key, value */
3538 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3539 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3540 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3542 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3544 /* Get a preview of the object in memory */
3545 po
= vmPreviewObject(key
);
3546 /* Save type, key, value */
3547 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
;
3548 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3549 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3550 /* Remove the loaded object from memory */
3554 dictReleaseIterator(di
);
3557 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3559 /* Make sure data will not remain on the OS's output buffers */
3564 /* Use RENAME to make sure the DB file is changed atomically only
3565 * if the generate DB file is ok. */
3566 if (rename(tmpfile
,filename
) == -1) {
3567 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3571 redisLog(REDIS_NOTICE
,"DB saved on disk");
3573 server
.lastsave
= time(NULL
);
3579 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3580 if (di
) dictReleaseIterator(di
);
3584 static int rdbSaveBackground(char *filename
) {
3587 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3588 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3589 if ((childpid
= fork()) == 0) {
3591 if (server
.vm_enabled
) vmReopenSwapFile();
3593 if (rdbSave(filename
) == REDIS_OK
) {
3600 if (childpid
== -1) {
3601 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3605 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3606 server
.bgsavechildpid
= childpid
;
3607 updateDictResizePolicy();
3610 return REDIS_OK
; /* unreached */
3613 static void rdbRemoveTempFile(pid_t childpid
) {
3616 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
3620 static int rdbLoadType(FILE *fp
) {
3622 if (fread(&type
,1,1,fp
) == 0) return -1;
3626 static time_t rdbLoadTime(FILE *fp
) {
3628 if (fread(&t32
,4,1,fp
) == 0) return -1;
3629 return (time_t) t32
;
3632 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3633 * of this file for a description of how this are stored on disk.
3635 * isencoded is set to 1 if the readed length is not actually a length but
3636 * an "encoding type", check the above comments for more info */
3637 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
3638 unsigned char buf
[2];
3642 if (isencoded
) *isencoded
= 0;
3643 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3644 type
= (buf
[0]&0xC0)>>6;
3645 if (type
== REDIS_RDB_6BITLEN
) {
3646 /* Read a 6 bit len */
3648 } else if (type
== REDIS_RDB_ENCVAL
) {
3649 /* Read a 6 bit len encoding type */
3650 if (isencoded
) *isencoded
= 1;
3652 } else if (type
== REDIS_RDB_14BITLEN
) {
3653 /* Read a 14 bit len */
3654 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3655 return ((buf
[0]&0x3F)<<8)|buf
[1];
3657 /* Read a 32 bit len */
3658 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
3663 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
) {
3664 unsigned char enc
[4];
3667 if (enctype
== REDIS_RDB_ENC_INT8
) {
3668 if (fread(enc
,1,1,fp
) == 0) return NULL
;
3669 val
= (signed char)enc
[0];
3670 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
3672 if (fread(enc
,2,1,fp
) == 0) return NULL
;
3673 v
= enc
[0]|(enc
[1]<<8);
3675 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
3677 if (fread(enc
,4,1,fp
) == 0) return NULL
;
3678 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
3681 val
= 0; /* anti-warning */
3682 redisPanic("Unknown RDB integer encoding type");
3684 return createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",val
));
3687 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
3688 unsigned int len
, clen
;
3689 unsigned char *c
= NULL
;
3692 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3693 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3694 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
3695 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
3696 if (fread(c
,clen
,1,fp
) == 0) goto err
;
3697 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
3699 return createObject(REDIS_STRING
,val
);
3706 static robj
*rdbLoadStringObject(FILE*fp
) {
3711 len
= rdbLoadLen(fp
,&isencoded
);
3714 case REDIS_RDB_ENC_INT8
:
3715 case REDIS_RDB_ENC_INT16
:
3716 case REDIS_RDB_ENC_INT32
:
3717 return rdbLoadIntegerObject(fp
,len
);
3718 case REDIS_RDB_ENC_LZF
:
3719 return rdbLoadLzfStringObject(fp
);
3721 redisPanic("Unknown RDB encoding type");
3725 if (len
== REDIS_RDB_LENERR
) return NULL
;
3726 val
= sdsnewlen(NULL
,len
);
3727 if (len
&& fread(val
,len
,1,fp
) == 0) {
3731 return createObject(REDIS_STRING
,val
);
3734 /* For information about double serialization check rdbSaveDoubleValue() */
3735 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
3739 if (fread(&len
,1,1,fp
) == 0) return -1;
3741 case 255: *val
= R_NegInf
; return 0;
3742 case 254: *val
= R_PosInf
; return 0;
3743 case 253: *val
= R_Nan
; return 0;
3745 if (fread(buf
,len
,1,fp
) == 0) return -1;
3747 sscanf(buf
, "%lg", val
);
3752 /* Load a Redis object of the specified type from the specified file.
3753 * On success a newly allocated object is returned, otherwise NULL. */
3754 static robj
*rdbLoadObject(int type
, FILE *fp
) {
3757 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
3758 if (type
== REDIS_STRING
) {
3759 /* Read string value */
3760 if ((o
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3761 o
= tryObjectEncoding(o
);
3762 } else if (type
== REDIS_LIST
|| type
== REDIS_SET
) {
3763 /* Read list/set value */
3766 if ((listlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3767 o
= (type
== REDIS_LIST
) ? createListObject() : createSetObject();
3768 /* It's faster to expand the dict to the right size asap in order
3769 * to avoid rehashing */
3770 if (type
== REDIS_SET
&& listlen
> DICT_HT_INITIAL_SIZE
)
3771 dictExpand(o
->ptr
,listlen
);
3772 /* Load every single element of the list/set */
3776 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3777 ele
= tryObjectEncoding(ele
);
3778 if (type
== REDIS_LIST
) {
3779 listAddNodeTail((list
*)o
->ptr
,ele
);
3781 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
3784 } else if (type
== REDIS_ZSET
) {
3785 /* Read list/set value */
3789 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3790 o
= createZsetObject();
3792 /* Load every single element of the list/set */
3795 double *score
= zmalloc(sizeof(double));
3797 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3798 ele
= tryObjectEncoding(ele
);
3799 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
3800 dictAdd(zs
->dict
,ele
,score
);
3801 zslInsert(zs
->zsl
,*score
,ele
);
3802 incrRefCount(ele
); /* added to skiplist */
3804 } else if (type
== REDIS_HASH
) {
3807 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3808 o
= createHashObject();
3809 /* Too many entries? Use an hash table. */
3810 if (hashlen
> server
.hash_max_zipmap_entries
)
3811 convertToRealHash(o
);
3812 /* Load every key/value, then set it into the zipmap or hash
3813 * table, as needed. */
3817 if ((key
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3818 if ((val
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3819 /* If we are using a zipmap and there are too big values
3820 * the object is converted to real hash table encoding. */
3821 if (o
->encoding
!= REDIS_ENCODING_HT
&&
3822 (sdslen(key
->ptr
) > server
.hash_max_zipmap_value
||
3823 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
))
3825 convertToRealHash(o
);
3828 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3829 unsigned char *zm
= o
->ptr
;
3831 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
3832 val
->ptr
,sdslen(val
->ptr
),NULL
);
3837 key
= tryObjectEncoding(key
);
3838 val
= tryObjectEncoding(val
);
3839 dictAdd((dict
*)o
->ptr
,key
,val
);
3843 redisPanic("Unknown object type");
3848 static int rdbLoad(char *filename
) {
3850 robj
*keyobj
= NULL
;
3852 int type
, retval
, rdbver
;
3853 dict
*d
= server
.db
[0].dict
;
3854 redisDb
*db
= server
.db
+0;
3856 time_t expiretime
= -1, now
= time(NULL
);
3857 long long loadedkeys
= 0;
3859 fp
= fopen(filename
,"r");
3860 if (!fp
) return REDIS_ERR
;
3861 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
3863 if (memcmp(buf
,"REDIS",5) != 0) {
3865 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
3868 rdbver
= atoi(buf
+5);
3871 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
3878 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3879 if (type
== REDIS_EXPIRETIME
) {
3880 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
3881 /* We read the time so we need to read the object type again */
3882 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3884 if (type
== REDIS_EOF
) break;
3885 /* Handle SELECT DB opcode as a special case */
3886 if (type
== REDIS_SELECTDB
) {
3887 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
3889 if (dbid
>= (unsigned)server
.dbnum
) {
3890 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
3893 db
= server
.db
+dbid
;
3898 if ((keyobj
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
3900 if ((o
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
3901 /* Add the new object in the hash table */
3902 retval
= dictAdd(d
,keyobj
,o
);
3903 if (retval
== DICT_ERR
) {
3904 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj
->ptr
);
3907 /* Set the expire time if needed */
3908 if (expiretime
!= -1) {
3909 setExpire(db
,keyobj
,expiretime
);
3910 /* Delete this key if already expired */
3911 if (expiretime
< now
) deleteKey(db
,keyobj
);
3915 /* Handle swapping while loading big datasets when VM is on */
3917 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
3918 while (zmalloc_used_memory() > server
.vm_max_memory
) {
3919 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
3926 eoferr
: /* unexpected end of file is handled here with a fatal exit */
3927 if (keyobj
) decrRefCount(keyobj
);
3928 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
3930 return REDIS_ERR
; /* Just to avoid warning */
3933 /*================================== Commands =============================== */
3935 static void authCommand(redisClient
*c
) {
3936 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
3937 c
->authenticated
= 1;
3938 addReply(c
,shared
.ok
);
3940 c
->authenticated
= 0;
3941 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
3945 static void pingCommand(redisClient
*c
) {
3946 addReply(c
,shared
.pong
);
3949 static void echoCommand(redisClient
*c
) {
3950 addReplyBulk(c
,c
->argv
[1]);
3953 /*=================================== Strings =============================== */
3955 static void setGenericCommand(redisClient
*c
, int nx
) {
3958 if (nx
) deleteIfVolatile(c
->db
,c
->argv
[1]);
3959 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3960 if (retval
== DICT_ERR
) {
3962 /* If the key is about a swapped value, we want a new key object
3963 * to overwrite the old. So we delete the old key in the database.
3964 * This will also make sure that swap pages about the old object
3965 * will be marked as free. */
3966 if (server
.vm_enabled
&& deleteIfSwapped(c
->db
,c
->argv
[1]))
3967 incrRefCount(c
->argv
[1]);
3968 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3969 incrRefCount(c
->argv
[2]);
3971 addReply(c
,shared
.czero
);
3975 incrRefCount(c
->argv
[1]);
3976 incrRefCount(c
->argv
[2]);
3979 removeExpire(c
->db
,c
->argv
[1]);
3980 addReply(c
, nx
? shared
.cone
: shared
.ok
);
3983 static void setCommand(redisClient
*c
) {
3984 setGenericCommand(c
,0);
3987 static void setnxCommand(redisClient
*c
) {
3988 setGenericCommand(c
,1);
3991 static int getGenericCommand(redisClient
*c
) {
3994 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
3997 if (o
->type
!= REDIS_STRING
) {
3998 addReply(c
,shared
.wrongtypeerr
);
4006 static void getCommand(redisClient
*c
) {
4007 getGenericCommand(c
);
4010 static void getsetCommand(redisClient
*c
) {
4011 if (getGenericCommand(c
) == REDIS_ERR
) return;
4012 if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) {
4013 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4015 incrRefCount(c
->argv
[1]);
4017 incrRefCount(c
->argv
[2]);
4019 removeExpire(c
->db
,c
->argv
[1]);
4022 static void mgetCommand(redisClient
*c
) {
4025 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
4026 for (j
= 1; j
< c
->argc
; j
++) {
4027 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
4029 addReply(c
,shared
.nullbulk
);
4031 if (o
->type
!= REDIS_STRING
) {
4032 addReply(c
,shared
.nullbulk
);
4040 static void msetGenericCommand(redisClient
*c
, int nx
) {
4041 int j
, busykeys
= 0;
4043 if ((c
->argc
% 2) == 0) {
4044 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
4047 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
4048 * set nothing at all if at least one already key exists. */
4050 for (j
= 1; j
< c
->argc
; j
+= 2) {
4051 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
4057 addReply(c
, shared
.czero
);
4061 for (j
= 1; j
< c
->argc
; j
+= 2) {
4064 c
->argv
[j
+1] = tryObjectEncoding(c
->argv
[j
+1]);
4065 retval
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
4066 if (retval
== DICT_ERR
) {
4067 dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
4068 incrRefCount(c
->argv
[j
+1]);
4070 incrRefCount(c
->argv
[j
]);
4071 incrRefCount(c
->argv
[j
+1]);
4073 removeExpire(c
->db
,c
->argv
[j
]);
4075 server
.dirty
+= (c
->argc
-1)/2;
4076 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4079 static void msetCommand(redisClient
*c
) {
4080 msetGenericCommand(c
,0);
4083 static void msetnxCommand(redisClient
*c
) {
4084 msetGenericCommand(c
,1);
4087 static void incrDecrCommand(redisClient
*c
, long long incr
) {
4092 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4094 if (getLongLongFromObjectOrReply(c
, o
, &value
, NULL
) != REDIS_OK
) return;
4097 o
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
4098 o
= tryObjectEncoding(o
);
4099 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],o
);
4100 if (retval
== DICT_ERR
) {
4101 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4102 removeExpire(c
->db
,c
->argv
[1]);
4104 incrRefCount(c
->argv
[1]);
4107 addReply(c
,shared
.colon
);
4109 addReply(c
,shared
.crlf
);
4112 static void incrCommand(redisClient
*c
) {
4113 incrDecrCommand(c
,1);
4116 static void decrCommand(redisClient
*c
) {
4117 incrDecrCommand(c
,-1);
4120 static void incrbyCommand(redisClient
*c
) {
4123 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4124 incrDecrCommand(c
,incr
);
4127 static void decrbyCommand(redisClient
*c
) {
4130 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4131 incrDecrCommand(c
,-incr
);
4134 static void appendCommand(redisClient
*c
) {
4139 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4141 /* Create the key */
4142 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4143 incrRefCount(c
->argv
[1]);
4144 incrRefCount(c
->argv
[2]);
4145 totlen
= stringObjectLen(c
->argv
[2]);
4149 de
= dictFind(c
->db
->dict
,c
->argv
[1]);
4152 o
= dictGetEntryVal(de
);
4153 if (o
->type
!= REDIS_STRING
) {
4154 addReply(c
,shared
.wrongtypeerr
);
4157 /* If the object is specially encoded or shared we have to make
4159 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
4160 robj
*decoded
= getDecodedObject(o
);
4162 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
4163 decrRefCount(decoded
);
4164 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4167 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
4168 o
->ptr
= sdscatlen(o
->ptr
,
4169 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
4171 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
4172 (unsigned long) c
->argv
[2]->ptr
);
4174 totlen
= sdslen(o
->ptr
);
4177 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
4180 static void substrCommand(redisClient
*c
) {
4182 long start
= atoi(c
->argv
[2]->ptr
);
4183 long end
= atoi(c
->argv
[3]->ptr
);
4184 size_t rangelen
, strlen
;
4187 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4188 checkType(c
,o
,REDIS_STRING
)) return;
4190 o
= getDecodedObject(o
);
4191 strlen
= sdslen(o
->ptr
);
4193 /* convert negative indexes */
4194 if (start
< 0) start
= strlen
+start
;
4195 if (end
< 0) end
= strlen
+end
;
4196 if (start
< 0) start
= 0;
4197 if (end
< 0) end
= 0;
4199 /* indexes sanity checks */
4200 if (start
> end
|| (size_t)start
>= strlen
) {
4201 /* Out of range start or start > end result in null reply */
4202 addReply(c
,shared
.nullbulk
);
4206 if ((size_t)end
>= strlen
) end
= strlen
-1;
4207 rangelen
= (end
-start
)+1;
4209 /* Return the result */
4210 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4211 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4212 addReplySds(c
,range
);
4213 addReply(c
,shared
.crlf
);
4217 /* ========================= Type agnostic commands ========================= */
4219 static void delCommand(redisClient
*c
) {
4222 for (j
= 1; j
< c
->argc
; j
++) {
4223 if (deleteKey(c
->db
,c
->argv
[j
])) {
4228 addReplyLong(c
,deleted
);
4231 static void existsCommand(redisClient
*c
) {
4232 addReply(c
,lookupKeyRead(c
->db
,c
->argv
[1]) ? shared
.cone
: shared
.czero
);
4235 static void selectCommand(redisClient
*c
) {
4236 int id
= atoi(c
->argv
[1]->ptr
);
4238 if (selectDb(c
,id
) == REDIS_ERR
) {
4239 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4241 addReply(c
,shared
.ok
);
4245 static void randomkeyCommand(redisClient
*c
) {
4249 de
= dictGetRandomKey(c
->db
->dict
);
4250 if (!de
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break;
4253 addReply(c
,shared
.plus
);
4254 addReply(c
,shared
.crlf
);
4256 addReply(c
,shared
.plus
);
4257 addReply(c
,dictGetEntryKey(de
));
4258 addReply(c
,shared
.crlf
);
4262 static void keysCommand(redisClient
*c
) {
4265 sds pattern
= c
->argv
[1]->ptr
;
4266 int plen
= sdslen(pattern
);
4267 unsigned long numkeys
= 0;
4268 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4270 di
= dictGetIterator(c
->db
->dict
);
4272 decrRefCount(lenobj
);
4273 while((de
= dictNext(di
)) != NULL
) {
4274 robj
*keyobj
= dictGetEntryKey(de
);
4276 sds key
= keyobj
->ptr
;
4277 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4278 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4279 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4280 addReplyBulk(c
,keyobj
);
4285 dictReleaseIterator(di
);
4286 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4289 static void dbsizeCommand(redisClient
*c
) {
4291 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4294 static void lastsaveCommand(redisClient
*c
) {
4296 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4299 static void typeCommand(redisClient
*c
) {
4303 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4308 case REDIS_STRING
: type
= "+string"; break;
4309 case REDIS_LIST
: type
= "+list"; break;
4310 case REDIS_SET
: type
= "+set"; break;
4311 case REDIS_ZSET
: type
= "+zset"; break;
4312 case REDIS_HASH
: type
= "+hash"; break;
4313 default: type
= "+unknown"; break;
4316 addReplySds(c
,sdsnew(type
));
4317 addReply(c
,shared
.crlf
);
4320 static void saveCommand(redisClient
*c
) {
4321 if (server
.bgsavechildpid
!= -1) {
4322 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4325 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4326 addReply(c
,shared
.ok
);
4328 addReply(c
,shared
.err
);
4332 static void bgsaveCommand(redisClient
*c
) {
4333 if (server
.bgsavechildpid
!= -1) {
4334 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4337 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4338 char *status
= "+Background saving started\r\n";
4339 addReplySds(c
,sdsnew(status
));
4341 addReply(c
,shared
.err
);
4345 static void shutdownCommand(redisClient
*c
) {
4346 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4347 /* Kill the saving child if there is a background saving in progress.
4348 We want to avoid race conditions, for instance our saving child may
4349 overwrite the synchronous saving did by SHUTDOWN. */
4350 if (server
.bgsavechildpid
!= -1) {
4351 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4352 kill(server
.bgsavechildpid
,SIGKILL
);
4353 rdbRemoveTempFile(server
.bgsavechildpid
);
4355 if (server
.appendonly
) {
4356 /* Append only file: fsync() the AOF and exit */
4357 fsync(server
.appendfd
);
4358 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4361 /* Snapshotting. Perform a SYNC SAVE and exit */
4362 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4363 if (server
.daemonize
)
4364 unlink(server
.pidfile
);
4365 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4366 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4367 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4370 /* Ooops.. error saving! The best we can do is to continue
4371 * operating. Note that if there was a background saving process,
4372 * in the next cron() Redis will be notified that the background
4373 * saving aborted, handling special stuff like slaves pending for
4374 * synchronization... */
4375 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4377 sdsnew("-ERR can't quit, problems saving the DB\r\n"));
4382 static void renameGenericCommand(redisClient
*c
, int nx
) {
4385 /* To use the same key as src and dst is probably an error */
4386 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4387 addReply(c
,shared
.sameobjecterr
);
4391 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4395 deleteIfVolatile(c
->db
,c
->argv
[2]);
4396 if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) {
4399 addReply(c
,shared
.czero
);
4402 dictReplace(c
->db
->dict
,c
->argv
[2],o
);
4404 incrRefCount(c
->argv
[2]);
4406 deleteKey(c
->db
,c
->argv
[1]);
4408 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4411 static void renameCommand(redisClient
*c
) {
4412 renameGenericCommand(c
,0);
4415 static void renamenxCommand(redisClient
*c
) {
4416 renameGenericCommand(c
,1);
4419 static void moveCommand(redisClient
*c
) {
4424 /* Obtain source and target DB pointers */
4427 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4428 addReply(c
,shared
.outofrangeerr
);
4432 selectDb(c
,srcid
); /* Back to the source DB */
4434 /* If the user is moving using as target the same
4435 * DB as the source DB it is probably an error. */
4437 addReply(c
,shared
.sameobjecterr
);
4441 /* Check if the element exists and get a reference */
4442 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4444 addReply(c
,shared
.czero
);
4448 /* Try to add the element to the target DB */
4449 deleteIfVolatile(dst
,c
->argv
[1]);
4450 if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) {
4451 addReply(c
,shared
.czero
);
4454 incrRefCount(c
->argv
[1]);
4457 /* OK! key moved, free the entry in the source DB */
4458 deleteKey(src
,c
->argv
[1]);
4460 addReply(c
,shared
.cone
);
4463 /* =================================== Lists ================================ */
4464 static void pushGenericCommand(redisClient
*c
, int where
) {
4468 lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4470 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4471 addReply(c
,shared
.cone
);
4474 lobj
= createListObject();
4476 if (where
== REDIS_HEAD
) {
4477 listAddNodeHead(list
,c
->argv
[2]);
4479 listAddNodeTail(list
,c
->argv
[2]);
4481 dictAdd(c
->db
->dict
,c
->argv
[1],lobj
);
4482 incrRefCount(c
->argv
[1]);
4483 incrRefCount(c
->argv
[2]);
4485 if (lobj
->type
!= REDIS_LIST
) {
4486 addReply(c
,shared
.wrongtypeerr
);
4489 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4490 addReply(c
,shared
.cone
);
4494 if (where
== REDIS_HEAD
) {
4495 listAddNodeHead(list
,c
->argv
[2]);
4497 listAddNodeTail(list
,c
->argv
[2]);
4499 incrRefCount(c
->argv
[2]);
4502 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",listLength(list
)));
4505 static void lpushCommand(redisClient
*c
) {
4506 pushGenericCommand(c
,REDIS_HEAD
);
4509 static void rpushCommand(redisClient
*c
) {
4510 pushGenericCommand(c
,REDIS_TAIL
);
4513 static void llenCommand(redisClient
*c
) {
4517 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4518 checkType(c
,o
,REDIS_LIST
)) return;
4521 addReplyUlong(c
,listLength(l
));
4524 static void lindexCommand(redisClient
*c
) {
4526 int index
= atoi(c
->argv
[2]->ptr
);
4530 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4531 checkType(c
,o
,REDIS_LIST
)) return;
4534 ln
= listIndex(list
, index
);
4536 addReply(c
,shared
.nullbulk
);
4538 robj
*ele
= listNodeValue(ln
);
4539 addReplyBulk(c
,ele
);
4543 static void lsetCommand(redisClient
*c
) {
4545 int index
= atoi(c
->argv
[2]->ptr
);
4549 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
||
4550 checkType(c
,o
,REDIS_LIST
)) return;
4553 ln
= listIndex(list
, index
);
4555 addReply(c
,shared
.outofrangeerr
);
4557 robj
*ele
= listNodeValue(ln
);
4560 listNodeValue(ln
) = c
->argv
[3];
4561 incrRefCount(c
->argv
[3]);
4562 addReply(c
,shared
.ok
);
4567 static void popGenericCommand(redisClient
*c
, int where
) {
4572 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4573 checkType(c
,o
,REDIS_LIST
)) return;
4576 if (where
== REDIS_HEAD
)
4577 ln
= listFirst(list
);
4579 ln
= listLast(list
);
4582 addReply(c
,shared
.nullbulk
);
4584 robj
*ele
= listNodeValue(ln
);
4585 addReplyBulk(c
,ele
);
4586 listDelNode(list
,ln
);
4587 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4592 static void lpopCommand(redisClient
*c
) {
4593 popGenericCommand(c
,REDIS_HEAD
);
4596 static void rpopCommand(redisClient
*c
) {
4597 popGenericCommand(c
,REDIS_TAIL
);
4600 static void lrangeCommand(redisClient
*c
) {
4602 int start
= atoi(c
->argv
[2]->ptr
);
4603 int end
= atoi(c
->argv
[3]->ptr
);
4610 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
4611 || checkType(c
,o
,REDIS_LIST
)) return;
4613 llen
= listLength(list
);
4615 /* convert negative indexes */
4616 if (start
< 0) start
= llen
+start
;
4617 if (end
< 0) end
= llen
+end
;
4618 if (start
< 0) start
= 0;
4619 if (end
< 0) end
= 0;
4621 /* indexes sanity checks */
4622 if (start
> end
|| start
>= llen
) {
4623 /* Out of range start or start > end result in empty list */
4624 addReply(c
,shared
.emptymultibulk
);
4627 if (end
>= llen
) end
= llen
-1;
4628 rangelen
= (end
-start
)+1;
4630 /* Return the result in form of a multi-bulk reply */
4631 ln
= listIndex(list
, start
);
4632 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
4633 for (j
= 0; j
< rangelen
; j
++) {
4634 ele
= listNodeValue(ln
);
4635 addReplyBulk(c
,ele
);
4640 static void ltrimCommand(redisClient
*c
) {
4642 int start
= atoi(c
->argv
[2]->ptr
);
4643 int end
= atoi(c
->argv
[3]->ptr
);
4645 int j
, ltrim
, rtrim
;
4649 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
4650 checkType(c
,o
,REDIS_LIST
)) return;
4652 llen
= listLength(list
);
4654 /* convert negative indexes */
4655 if (start
< 0) start
= llen
+start
;
4656 if (end
< 0) end
= llen
+end
;
4657 if (start
< 0) start
= 0;
4658 if (end
< 0) end
= 0;
4660 /* indexes sanity checks */
4661 if (start
> end
|| start
>= llen
) {
4662 /* Out of range start or start > end result in empty list */
4666 if (end
>= llen
) end
= llen
-1;
4671 /* Remove list elements to perform the trim */
4672 for (j
= 0; j
< ltrim
; j
++) {
4673 ln
= listFirst(list
);
4674 listDelNode(list
,ln
);
4676 for (j
= 0; j
< rtrim
; j
++) {
4677 ln
= listLast(list
);
4678 listDelNode(list
,ln
);
4680 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4682 addReply(c
,shared
.ok
);
4685 static void lremCommand(redisClient
*c
) {
4688 listNode
*ln
, *next
;
4689 int toremove
= atoi(c
->argv
[2]->ptr
);
4693 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4694 checkType(c
,o
,REDIS_LIST
)) return;
4698 toremove
= -toremove
;
4701 ln
= fromtail
? list
->tail
: list
->head
;
4703 robj
*ele
= listNodeValue(ln
);
4705 next
= fromtail
? ln
->prev
: ln
->next
;
4706 if (compareStringObjects(ele
,c
->argv
[3]) == 0) {
4707 listDelNode(list
,ln
);
4710 if (toremove
&& removed
== toremove
) break;
4714 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4715 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
4718 /* This is the semantic of this command:
4719 * RPOPLPUSH srclist dstlist:
4720 * IF LLEN(srclist) > 0
4721 * element = RPOP srclist
4722 * LPUSH dstlist element
4729 * The idea is to be able to get an element from a list in a reliable way
4730 * since the element is not just returned but pushed against another list
4731 * as well. This command was originally proposed by Ezra Zygmuntowicz.
4733 static void rpoplpushcommand(redisClient
*c
) {
4738 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4739 checkType(c
,sobj
,REDIS_LIST
)) return;
4740 srclist
= sobj
->ptr
;
4741 ln
= listLast(srclist
);
4744 addReply(c
,shared
.nullbulk
);
4746 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4747 robj
*ele
= listNodeValue(ln
);
4750 if (dobj
&& dobj
->type
!= REDIS_LIST
) {
4751 addReply(c
,shared
.wrongtypeerr
);
4755 /* Add the element to the target list (unless it's directly
4756 * passed to some BLPOP-ing client */
4757 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) {
4759 /* Create the list if the key does not exist */
4760 dobj
= createListObject();
4761 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
);
4762 incrRefCount(c
->argv
[2]);
4764 dstlist
= dobj
->ptr
;
4765 listAddNodeHead(dstlist
,ele
);
4769 /* Send the element to the client as reply as well */
4770 addReplyBulk(c
,ele
);
4772 /* Finally remove the element from the source list */
4773 listDelNode(srclist
,ln
);
4774 if (listLength(srclist
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4779 /* ==================================== Sets ================================ */
4781 static void saddCommand(redisClient
*c
) {
4784 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4786 set
= createSetObject();
4787 dictAdd(c
->db
->dict
,c
->argv
[1],set
);
4788 incrRefCount(c
->argv
[1]);
4790 if (set
->type
!= REDIS_SET
) {
4791 addReply(c
,shared
.wrongtypeerr
);
4795 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
4796 incrRefCount(c
->argv
[2]);
4798 addReply(c
,shared
.cone
);
4800 addReply(c
,shared
.czero
);
4804 static void sremCommand(redisClient
*c
) {
4807 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4808 checkType(c
,set
,REDIS_SET
)) return;
4810 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
4812 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4813 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4814 addReply(c
,shared
.cone
);
4816 addReply(c
,shared
.czero
);
4820 static void smoveCommand(redisClient
*c
) {
4821 robj
*srcset
, *dstset
;
4823 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4824 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4826 /* If the source key does not exist return 0, if it's of the wrong type
4828 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
4829 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
4832 /* Error if the destination key is not a set as well */
4833 if (dstset
&& dstset
->type
!= REDIS_SET
) {
4834 addReply(c
,shared
.wrongtypeerr
);
4837 /* Remove the element from the source set */
4838 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
4839 /* Key not found in the src set! return zero */
4840 addReply(c
,shared
.czero
);
4843 if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset
!= dstset
)
4844 deleteKey(c
->db
,c
->argv
[1]);
4846 /* Add the element to the destination set */
4848 dstset
= createSetObject();
4849 dictAdd(c
->db
->dict
,c
->argv
[2],dstset
);
4850 incrRefCount(c
->argv
[2]);
4852 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
4853 incrRefCount(c
->argv
[3]);
4854 addReply(c
,shared
.cone
);
4857 static void sismemberCommand(redisClient
*c
) {
4860 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4861 checkType(c
,set
,REDIS_SET
)) return;
4863 if (dictFind(set
->ptr
,c
->argv
[2]))
4864 addReply(c
,shared
.cone
);
4866 addReply(c
,shared
.czero
);
4869 static void scardCommand(redisClient
*c
) {
4873 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4874 checkType(c
,o
,REDIS_SET
)) return;
4877 addReplyUlong(c
,dictSize(s
));
4880 static void spopCommand(redisClient
*c
) {
4884 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4885 checkType(c
,set
,REDIS_SET
)) return;
4887 de
= dictGetRandomKey(set
->ptr
);
4889 addReply(c
,shared
.nullbulk
);
4891 robj
*ele
= dictGetEntryKey(de
);
4893 addReplyBulk(c
,ele
);
4894 dictDelete(set
->ptr
,ele
);
4895 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4896 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4901 static void srandmemberCommand(redisClient
*c
) {
4905 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4906 checkType(c
,set
,REDIS_SET
)) return;
4908 de
= dictGetRandomKey(set
->ptr
);
4910 addReply(c
,shared
.nullbulk
);
4912 robj
*ele
= dictGetEntryKey(de
);
4914 addReplyBulk(c
,ele
);
4918 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
4919 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
4921 return dictSize(*d1
)-dictSize(*d2
);
4924 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
4925 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4928 robj
*lenobj
= NULL
, *dstset
= NULL
;
4929 unsigned long j
, cardinality
= 0;
4931 for (j
= 0; j
< setsnum
; j
++) {
4935 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4936 lookupKeyRead(c
->db
,setskeys
[j
]);
4940 if (deleteKey(c
->db
,dstkey
))
4942 addReply(c
,shared
.czero
);
4944 addReply(c
,shared
.emptymultibulk
);
4948 if (setobj
->type
!= REDIS_SET
) {
4950 addReply(c
,shared
.wrongtypeerr
);
4953 dv
[j
] = setobj
->ptr
;
4955 /* Sort sets from the smallest to largest, this will improve our
4956 * algorithm's performace */
4957 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
4959 /* The first thing we should output is the total number of elements...
4960 * since this is a multi-bulk write, but at this stage we don't know
4961 * the intersection set size, so we use a trick, append an empty object
4962 * to the output list and save the pointer to later modify it with the
4965 lenobj
= createObject(REDIS_STRING
,NULL
);
4967 decrRefCount(lenobj
);
4969 /* If we have a target key where to store the resulting set
4970 * create this key with an empty set inside */
4971 dstset
= createSetObject();
4974 /* Iterate all the elements of the first (smallest) set, and test
4975 * the element against all the other sets, if at least one set does
4976 * not include the element it is discarded */
4977 di
= dictGetIterator(dv
[0]);
4979 while((de
= dictNext(di
)) != NULL
) {
4982 for (j
= 1; j
< setsnum
; j
++)
4983 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
4985 continue; /* at least one set does not contain the member */
4986 ele
= dictGetEntryKey(de
);
4988 addReplyBulk(c
,ele
);
4991 dictAdd(dstset
->ptr
,ele
,NULL
);
4995 dictReleaseIterator(di
);
4998 /* Store the resulting set into the target, if the intersection
4999 * is not an empty set. */
5000 deleteKey(c
->db
,dstkey
);
5001 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5002 dictAdd(c
->db
->dict
,dstkey
,dstset
);
5003 incrRefCount(dstkey
);
5004 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
5006 decrRefCount(dstset
);
5007 addReply(c
,shared
.czero
);
5011 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
5016 static void sinterCommand(redisClient
*c
) {
5017 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
5020 static void sinterstoreCommand(redisClient
*c
) {
5021 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
5024 #define REDIS_OP_UNION 0
5025 #define REDIS_OP_DIFF 1
5026 #define REDIS_OP_INTER 2
5028 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
5029 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
5032 robj
*dstset
= NULL
;
5033 int j
, cardinality
= 0;
5035 for (j
= 0; j
< setsnum
; j
++) {
5039 lookupKeyWrite(c
->db
,setskeys
[j
]) :
5040 lookupKeyRead(c
->db
,setskeys
[j
]);
5045 if (setobj
->type
!= REDIS_SET
) {
5047 addReply(c
,shared
.wrongtypeerr
);
5050 dv
[j
] = setobj
->ptr
;
5053 /* We need a temp set object to store our union. If the dstkey
5054 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
5055 * this set object will be the resulting object to set into the target key*/
5056 dstset
= createSetObject();
5058 /* Iterate all the elements of all the sets, add every element a single
5059 * time to the result set */
5060 for (j
= 0; j
< setsnum
; j
++) {
5061 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
5062 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
5064 di
= dictGetIterator(dv
[j
]);
5066 while((de
= dictNext(di
)) != NULL
) {
5069 /* dictAdd will not add the same element multiple times */
5070 ele
= dictGetEntryKey(de
);
5071 if (op
== REDIS_OP_UNION
|| j
== 0) {
5072 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
5076 } else if (op
== REDIS_OP_DIFF
) {
5077 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
5082 dictReleaseIterator(di
);
5084 /* result set is empty? Exit asap. */
5085 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
5088 /* Output the content of the resulting set, if not in STORE mode */
5090 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
5091 di
= dictGetIterator(dstset
->ptr
);
5092 while((de
= dictNext(di
)) != NULL
) {
5095 ele
= dictGetEntryKey(de
);
5096 addReplyBulk(c
,ele
);
5098 dictReleaseIterator(di
);
5099 decrRefCount(dstset
);
5101 /* If we have a target key where to store the resulting set
5102 * create this key with the result set inside */
5103 deleteKey(c
->db
,dstkey
);
5104 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5105 dictAdd(c
->db
->dict
,dstkey
,dstset
);
5106 incrRefCount(dstkey
);
5107 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
5109 decrRefCount(dstset
);
5110 addReply(c
,shared
.czero
);
5117 static void sunionCommand(redisClient
*c
) {
5118 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
5121 static void sunionstoreCommand(redisClient
*c
) {
5122 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
5125 static void sdiffCommand(redisClient
*c
) {
5126 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
5129 static void sdiffstoreCommand(redisClient
*c
) {
5130 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
5133 /* ==================================== ZSets =============================== */
5135 /* ZSETs are ordered sets using two data structures to hold the same elements
5136 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
5139 * The elements are added to an hash table mapping Redis objects to scores.
5140 * At the same time the elements are added to a skip list mapping scores
5141 * to Redis objects (so objects are sorted by scores in this "view"). */
5143 /* This skiplist implementation is almost a C translation of the original
5144 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
5145 * Alternative to Balanced Trees", modified in three ways:
5146 * a) this implementation allows for repeated values.
5147 * b) the comparison is not just by key (our 'score') but by satellite data.
5148 * c) there is a back pointer, so it's a doubly linked list with the back
5149 * pointers being only at "level 1". This allows to traverse the list
5150 * from tail to head, useful for ZREVRANGE. */
5152 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
5153 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
5155 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
5157 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
5163 static zskiplist
*zslCreate(void) {
5167 zsl
= zmalloc(sizeof(*zsl
));
5170 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
5171 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
5172 zsl
->header
->forward
[j
] = NULL
;
5174 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
5175 if (j
< ZSKIPLIST_MAXLEVEL
-1)
5176 zsl
->header
->span
[j
] = 0;
5178 zsl
->header
->backward
= NULL
;
5183 static void zslFreeNode(zskiplistNode
*node
) {
5184 decrRefCount(node
->obj
);
5185 zfree(node
->forward
);
5190 static void zslFree(zskiplist
*zsl
) {
5191 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
5193 zfree(zsl
->header
->forward
);
5194 zfree(zsl
->header
->span
);
5197 next
= node
->forward
[0];
5204 static int zslRandomLevel(void) {
5206 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
5208 return (level
<ZSKIPLIST_MAXLEVEL
) ? level
: ZSKIPLIST_MAXLEVEL
;
5211 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
5212 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5213 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
5217 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5218 /* store rank that is crossed to reach the insert position */
5219 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
5221 while (x
->forward
[i
] &&
5222 (x
->forward
[i
]->score
< score
||
5223 (x
->forward
[i
]->score
== score
&&
5224 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
5225 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
5230 /* we assume the key is not already inside, since we allow duplicated
5231 * scores, and the re-insertion of score and redis object should never
5232 * happpen since the caller of zslInsert() should test in the hash table
5233 * if the element is already inside or not. */
5234 level
= zslRandomLevel();
5235 if (level
> zsl
->level
) {
5236 for (i
= zsl
->level
; i
< level
; i
++) {
5238 update
[i
] = zsl
->header
;
5239 update
[i
]->span
[i
-1] = zsl
->length
;
5243 x
= zslCreateNode(level
,score
,obj
);
5244 for (i
= 0; i
< level
; i
++) {
5245 x
->forward
[i
] = update
[i
]->forward
[i
];
5246 update
[i
]->forward
[i
] = x
;
5248 /* update span covered by update[i] as x is inserted here */
5250 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5251 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5255 /* increment span for untouched levels */
5256 for (i
= level
; i
< zsl
->level
; i
++) {
5257 update
[i
]->span
[i
-1]++;
5260 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
5262 x
->forward
[0]->backward
= x
;
5268 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
5269 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
5271 for (i
= 0; i
< zsl
->level
; i
++) {
5272 if (update
[i
]->forward
[i
] == x
) {
5274 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5276 update
[i
]->forward
[i
] = x
->forward
[i
];
5278 /* invariant: i > 0, because update[0]->forward[0]
5279 * is always equal to x */
5280 update
[i
]->span
[i
-1] -= 1;
5283 if (x
->forward
[0]) {
5284 x
->forward
[0]->backward
= x
->backward
;
5286 zsl
->tail
= x
->backward
;
5288 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5293 /* Delete an element with matching score/object from the skiplist. */
5294 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
5295 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5299 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5300 while (x
->forward
[i
] &&
5301 (x
->forward
[i
]->score
< score
||
5302 (x
->forward
[i
]->score
== score
&&
5303 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
5307 /* We may have multiple elements with the same score, what we need
5308 * is to find the element with both the right score and object. */
5310 if (x
&& score
== x
->score
&& compareStringObjects(x
->obj
,obj
) == 0) {
5311 zslDeleteNode(zsl
, x
, update
);
5315 return 0; /* not found */
5317 return 0; /* not found */
5320 /* Delete all the elements with score between min and max from the skiplist.
5321 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5322 * Note that this function takes the reference to the hash table view of the
5323 * sorted set, in order to remove the elements from the hash table too. */
5324 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
5325 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5326 unsigned long removed
= 0;
5330 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5331 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
5335 /* We may have multiple elements with the same score, what we need
5336 * is to find the element with both the right score and object. */
5338 while (x
&& x
->score
<= max
) {
5339 zskiplistNode
*next
= x
->forward
[0];
5340 zslDeleteNode(zsl
, x
, update
);
5341 dictDelete(dict
,x
->obj
);
5346 return removed
; /* not found */
5349 /* Delete all the elements with rank between start and end from the skiplist.
5350 * Start and end are inclusive. Note that start and end need to be 1-based */
5351 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
5352 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5353 unsigned long traversed
= 0, removed
= 0;
5357 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5358 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
5359 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5367 while (x
&& traversed
<= end
) {
5368 zskiplistNode
*next
= x
->forward
[0];
5369 zslDeleteNode(zsl
, x
, update
);
5370 dictDelete(dict
,x
->obj
);
5379 /* Find the first node having a score equal or greater than the specified one.
5380 * Returns NULL if there is no match. */
5381 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
5386 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5387 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
5390 /* We may have multiple elements with the same score, what we need
5391 * is to find the element with both the right score and object. */
5392 return x
->forward
[0];
5395 /* Find the rank for an element by both score and key.
5396 * Returns 0 when the element cannot be found, rank otherwise.
5397 * Note that the rank is 1-based due to the span of zsl->header to the
5399 static unsigned long zslGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
5401 unsigned long rank
= 0;
5405 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5406 while (x
->forward
[i
] &&
5407 (x
->forward
[i
]->score
< score
||
5408 (x
->forward
[i
]->score
== score
&&
5409 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
5410 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
5414 /* x might be equal to zsl->header, so test if obj is non-NULL */
5415 if (x
->obj
&& compareStringObjects(x
->obj
,o
) == 0) {
5422 /* Finds an element by its rank. The rank argument needs to be 1-based. */
5423 zskiplistNode
* zslGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
5425 unsigned long traversed
= 0;
5429 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5430 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
5432 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5435 if (traversed
== rank
) {
5442 /* The actual Z-commands implementations */
5444 /* This generic command implements both ZADD and ZINCRBY.
5445 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
5446 * the increment if the operation is a ZINCRBY (doincrement == 1). */
5447 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
5452 zsetobj
= lookupKeyWrite(c
->db
,key
);
5453 if (zsetobj
== NULL
) {
5454 zsetobj
= createZsetObject();
5455 dictAdd(c
->db
->dict
,key
,zsetobj
);
5458 if (zsetobj
->type
!= REDIS_ZSET
) {
5459 addReply(c
,shared
.wrongtypeerr
);
5465 /* Ok now since we implement both ZADD and ZINCRBY here the code
5466 * needs to handle the two different conditions. It's all about setting
5467 * '*score', that is, the new score to set, to the right value. */
5468 score
= zmalloc(sizeof(double));
5472 /* Read the old score. If the element was not present starts from 0 */
5473 de
= dictFind(zs
->dict
,ele
);
5475 double *oldscore
= dictGetEntryVal(de
);
5476 *score
= *oldscore
+ scoreval
;
5484 /* What follows is a simple remove and re-insert operation that is common
5485 * to both ZADD and ZINCRBY... */
5486 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
5487 /* case 1: New element */
5488 incrRefCount(ele
); /* added to hash */
5489 zslInsert(zs
->zsl
,*score
,ele
);
5490 incrRefCount(ele
); /* added to skiplist */
5493 addReplyDouble(c
,*score
);
5495 addReply(c
,shared
.cone
);
5500 /* case 2: Score update operation */
5501 de
= dictFind(zs
->dict
,ele
);
5502 redisAssert(de
!= NULL
);
5503 oldscore
= dictGetEntryVal(de
);
5504 if (*score
!= *oldscore
) {
5507 /* Remove and insert the element in the skip list with new score */
5508 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
5509 redisAssert(deleted
!= 0);
5510 zslInsert(zs
->zsl
,*score
,ele
);
5512 /* Update the score in the hash table */
5513 dictReplace(zs
->dict
,ele
,score
);
5519 addReplyDouble(c
,*score
);
5521 addReply(c
,shared
.czero
);
5525 static void zaddCommand(redisClient
*c
) {
5528 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
5529 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
5532 static void zincrbyCommand(redisClient
*c
) {
5535 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
5536 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
5539 static void zremCommand(redisClient
*c
) {
5546 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5547 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5550 de
= dictFind(zs
->dict
,c
->argv
[2]);
5552 addReply(c
,shared
.czero
);
5555 /* Delete from the skiplist */
5556 oldscore
= dictGetEntryVal(de
);
5557 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
5558 redisAssert(deleted
!= 0);
5560 /* Delete from the hash table */
5561 dictDelete(zs
->dict
,c
->argv
[2]);
5562 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5563 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5565 addReply(c
,shared
.cone
);
5568 static void zremrangebyscoreCommand(redisClient
*c
) {
5575 if ((getDoubleFromObjectOrReply(c
, c
->argv
[2], &min
, NULL
) != REDIS_OK
) ||
5576 (getDoubleFromObjectOrReply(c
, c
->argv
[3], &max
, NULL
) != REDIS_OK
)) return;
5578 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5579 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5582 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
5583 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5584 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5585 server
.dirty
+= deleted
;
5586 addReplyLong(c
,deleted
);
5589 static void zremrangebyrankCommand(redisClient
*c
) {
5597 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
5598 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
5600 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5601 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5603 llen
= zs
->zsl
->length
;
5605 /* convert negative indexes */
5606 if (start
< 0) start
= llen
+start
;
5607 if (end
< 0) end
= llen
+end
;
5608 if (start
< 0) start
= 0;
5609 if (end
< 0) end
= 0;
5611 /* indexes sanity checks */
5612 if (start
> end
|| start
>= llen
) {
5613 addReply(c
,shared
.czero
);
5616 if (end
>= llen
) end
= llen
-1;
5618 /* increment start and end because zsl*Rank functions
5619 * use 1-based rank */
5620 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
5621 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5622 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5623 server
.dirty
+= deleted
;
5624 addReplyLong(c
, deleted
);
5632 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
5633 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
5634 unsigned long size1
, size2
;
5635 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
5636 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
5637 return size1
- size2
;
5640 #define REDIS_AGGR_SUM 1
5641 #define REDIS_AGGR_MIN 2
5642 #define REDIS_AGGR_MAX 3
5644 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
5645 if (aggregate
== REDIS_AGGR_SUM
) {
5646 *target
= *target
+ val
;
5647 } else if (aggregate
== REDIS_AGGR_MIN
) {
5648 *target
= val
< *target
? val
: *target
;
5649 } else if (aggregate
== REDIS_AGGR_MAX
) {
5650 *target
= val
> *target
? val
: *target
;
5653 redisPanic("Unknown ZUNION/INTER aggregate type");
5657 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
5659 int aggregate
= REDIS_AGGR_SUM
;
5666 /* expect zsetnum input keys to be given */
5667 zsetnum
= atoi(c
->argv
[2]->ptr
);
5669 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNION/ZINTER\r\n"));
5673 /* test if the expected number of keys would overflow */
5674 if (3+zsetnum
> c
->argc
) {
5675 addReply(c
,shared
.syntaxerr
);
5679 /* read keys to be used for input */
5680 src
= zmalloc(sizeof(zsetopsrc
) * zsetnum
);
5681 for (i
= 0, j
= 3; i
< zsetnum
; i
++, j
++) {
5682 robj
*zsetobj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
5686 if (zsetobj
->type
!= REDIS_ZSET
) {
5688 addReply(c
,shared
.wrongtypeerr
);
5691 src
[i
].dict
= ((zset
*)zsetobj
->ptr
)->dict
;
5694 /* default all weights to 1 */
5695 src
[i
].weight
= 1.0;
5698 /* parse optional extra arguments */
5700 int remaining
= c
->argc
- j
;
5703 if (remaining
>= (zsetnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
5705 for (i
= 0; i
< zsetnum
; i
++, j
++, remaining
--) {
5706 if (getDoubleFromObjectOrReply(c
, c
->argv
[j
], &src
[i
].weight
, NULL
) != REDIS_OK
)
5709 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
5711 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
5712 aggregate
= REDIS_AGGR_SUM
;
5713 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
5714 aggregate
= REDIS_AGGR_MIN
;
5715 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
5716 aggregate
= REDIS_AGGR_MAX
;
5719 addReply(c
,shared
.syntaxerr
);
5725 addReply(c
,shared
.syntaxerr
);
5731 /* sort sets from the smallest to largest, this will improve our
5732 * algorithm's performance */
5733 qsort(src
,zsetnum
,sizeof(zsetopsrc
), qsortCompareZsetopsrcByCardinality
);
5735 dstobj
= createZsetObject();
5736 dstzset
= dstobj
->ptr
;
5738 if (op
== REDIS_OP_INTER
) {
5739 /* skip going over all entries if the smallest zset is NULL or empty */
5740 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
5741 /* precondition: as src[0].dict is non-empty and the zsets are ordered
5742 * from small to large, all src[i > 0].dict are non-empty too */
5743 di
= dictGetIterator(src
[0].dict
);
5744 while((de
= dictNext(di
)) != NULL
) {
5745 double *score
= zmalloc(sizeof(double)), value
;
5746 *score
= src
[0].weight
* (*(double*)dictGetEntryVal(de
));
5748 for (j
= 1; j
< zsetnum
; j
++) {
5749 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5751 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5752 zunionInterAggregate(score
, value
, aggregate
);
5758 /* skip entry when not present in every source dict */
5762 robj
*o
= dictGetEntryKey(de
);
5763 dictAdd(dstzset
->dict
,o
,score
);
5764 incrRefCount(o
); /* added to dictionary */
5765 zslInsert(dstzset
->zsl
,*score
,o
);
5766 incrRefCount(o
); /* added to skiplist */
5769 dictReleaseIterator(di
);
5771 } else if (op
== REDIS_OP_UNION
) {
5772 for (i
= 0; i
< zsetnum
; i
++) {
5773 if (!src
[i
].dict
) continue;
5775 di
= dictGetIterator(src
[i
].dict
);
5776 while((de
= dictNext(di
)) != NULL
) {
5777 /* skip key when already processed */
5778 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
5780 double *score
= zmalloc(sizeof(double)), value
;
5781 *score
= src
[i
].weight
* (*(double*)dictGetEntryVal(de
));
5783 /* because the zsets are sorted by size, its only possible
5784 * for sets at larger indices to hold this entry */
5785 for (j
= (i
+1); j
< zsetnum
; j
++) {
5786 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5788 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5789 zunionInterAggregate(score
, value
, aggregate
);
5793 robj
*o
= dictGetEntryKey(de
);
5794 dictAdd(dstzset
->dict
,o
,score
);
5795 incrRefCount(o
); /* added to dictionary */
5796 zslInsert(dstzset
->zsl
,*score
,o
);
5797 incrRefCount(o
); /* added to skiplist */
5799 dictReleaseIterator(di
);
5802 /* unknown operator */
5803 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
5806 deleteKey(c
->db
,dstkey
);
5807 if (dstzset
->zsl
->length
) {
5808 dictAdd(c
->db
->dict
,dstkey
,dstobj
);
5809 incrRefCount(dstkey
);
5810 addReplyLong(c
, dstzset
->zsl
->length
);
5813 decrRefCount(dstobj
);
5814 addReply(c
, shared
.czero
);
5819 static void zunionCommand(redisClient
*c
) {
5820 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
5823 static void zinterCommand(redisClient
*c
) {
5824 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
5827 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
5839 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
5840 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
5842 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
5844 } else if (c
->argc
>= 5) {
5845 addReply(c
,shared
.syntaxerr
);
5849 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
5850 || checkType(c
,o
,REDIS_ZSET
)) return;
5855 /* convert negative indexes */
5856 if (start
< 0) start
= llen
+start
;
5857 if (end
< 0) end
= llen
+end
;
5858 if (start
< 0) start
= 0;
5859 if (end
< 0) end
= 0;
5861 /* indexes sanity checks */
5862 if (start
> end
|| start
>= llen
) {
5863 /* Out of range start or start > end result in empty list */
5864 addReply(c
,shared
.emptymultibulk
);
5867 if (end
>= llen
) end
= llen
-1;
5868 rangelen
= (end
-start
)+1;
5870 /* check if starting point is trivial, before searching
5871 * the element in log(N) time */
5873 ln
= start
== 0 ? zsl
->tail
: zslGetElementByRank(zsl
, llen
-start
);
5876 zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1);
5879 /* Return the result in form of a multi-bulk reply */
5880 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
5881 withscores
? (rangelen
*2) : rangelen
));
5882 for (j
= 0; j
< rangelen
; j
++) {
5884 addReplyBulk(c
,ele
);
5886 addReplyDouble(c
,ln
->score
);
5887 ln
= reverse
? ln
->backward
: ln
->forward
[0];
5891 static void zrangeCommand(redisClient
*c
) {
5892 zrangeGenericCommand(c
,0);
5895 static void zrevrangeCommand(redisClient
*c
) {
5896 zrangeGenericCommand(c
,1);
5899 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
5900 * If justcount is non-zero, just the count is returned. */
5901 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
5904 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
5905 int offset
= 0, limit
= -1;
5909 /* Parse the min-max interval. If one of the values is prefixed
5910 * by the "(" character, it's considered "open". For instance
5911 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
5912 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
5913 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
5914 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
5917 min
= strtod(c
->argv
[2]->ptr
,NULL
);
5919 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
5920 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
5923 max
= strtod(c
->argv
[3]->ptr
,NULL
);
5926 /* Parse "WITHSCORES": note that if the command was called with
5927 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
5928 * enter the following paths to parse WITHSCORES and LIMIT. */
5929 if (c
->argc
== 5 || c
->argc
== 8) {
5930 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
5935 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
5939 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
5944 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
5945 addReply(c
,shared
.syntaxerr
);
5947 } else if (c
->argc
== (7 + withscores
)) {
5948 offset
= atoi(c
->argv
[5]->ptr
);
5949 limit
= atoi(c
->argv
[6]->ptr
);
5950 if (offset
< 0) offset
= 0;
5953 /* Ok, lookup the key and get the range */
5954 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
5956 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
5958 if (o
->type
!= REDIS_ZSET
) {
5959 addReply(c
,shared
.wrongtypeerr
);
5961 zset
*zsetobj
= o
->ptr
;
5962 zskiplist
*zsl
= zsetobj
->zsl
;
5964 robj
*ele
, *lenobj
= NULL
;
5965 unsigned long rangelen
= 0;
5967 /* Get the first node with the score >= min, or with
5968 * score > min if 'minex' is true. */
5969 ln
= zslFirstWithScore(zsl
,min
);
5970 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
5973 /* No element matching the speciifed interval */
5974 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
5978 /* We don't know in advance how many matching elements there
5979 * are in the list, so we push this object that will represent
5980 * the multi-bulk length in the output buffer, and will "fix"
5983 lenobj
= createObject(REDIS_STRING
,NULL
);
5985 decrRefCount(lenobj
);
5988 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
5991 ln
= ln
->forward
[0];
5994 if (limit
== 0) break;
5997 addReplyBulk(c
,ele
);
5999 addReplyDouble(c
,ln
->score
);
6001 ln
= ln
->forward
[0];
6003 if (limit
> 0) limit
--;
6006 addReplyLong(c
,(long)rangelen
);
6008 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
6009 withscores
? (rangelen
*2) : rangelen
);
6015 static void zrangebyscoreCommand(redisClient
*c
) {
6016 genericZrangebyscoreCommand(c
,0);
6019 static void zcountCommand(redisClient
*c
) {
6020 genericZrangebyscoreCommand(c
,1);
6023 static void zcardCommand(redisClient
*c
) {
6027 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6028 checkType(c
,o
,REDIS_ZSET
)) return;
6031 addReplyUlong(c
,zs
->zsl
->length
);
6034 static void zscoreCommand(redisClient
*c
) {
6039 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6040 checkType(c
,o
,REDIS_ZSET
)) return;
6043 de
= dictFind(zs
->dict
,c
->argv
[2]);
6045 addReply(c
,shared
.nullbulk
);
6047 double *score
= dictGetEntryVal(de
);
6049 addReplyDouble(c
,*score
);
6053 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
6061 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6062 checkType(c
,o
,REDIS_ZSET
)) return;
6066 de
= dictFind(zs
->dict
,c
->argv
[2]);
6068 addReply(c
,shared
.nullbulk
);
6072 score
= dictGetEntryVal(de
);
6073 rank
= zslGetRank(zsl
, *score
, c
->argv
[2]);
6076 addReplyLong(c
, zsl
->length
- rank
);
6078 addReplyLong(c
, rank
-1);
6081 addReply(c
,shared
.nullbulk
);
6085 static void zrankCommand(redisClient
*c
) {
6086 zrankGenericCommand(c
, 0);
6089 static void zrevrankCommand(redisClient
*c
) {
6090 zrankGenericCommand(c
, 1);
6093 /* ========================= Hashes utility functions ======================= */
6094 #define REDIS_HASH_KEY 1
6095 #define REDIS_HASH_VALUE 2
6097 /* Check the length of a number of objects to see if we need to convert a
6098 * zipmap to a real hash. Note that we only check string encoded objects
6099 * as their string length can be queried in constant time. */
6100 static void hashTryConversion(robj
*subject
, robj
**argv
, int start
, int end
) {
6102 if (subject
->encoding
!= REDIS_ENCODING_ZIPMAP
) return;
6104 for (i
= start
; i
<= end
; i
++) {
6105 if (argv
[i
]->encoding
== REDIS_ENCODING_RAW
&&
6106 sdslen(argv
[i
]->ptr
) > server
.hash_max_zipmap_value
)
6108 convertToRealHash(subject
);
6114 /* Encode given objects in-place when the hash uses a dict. */
6115 static void hashTryObjectEncoding(robj
*subject
, robj
**o1
, robj
**o2
) {
6116 if (subject
->encoding
== REDIS_ENCODING_HT
) {
6117 if (o1
) *o1
= tryObjectEncoding(*o1
);
6118 if (o2
) *o2
= tryObjectEncoding(*o2
);
6122 /* Get the value from a hash identified by key. Returns either a string
6123 * object or NULL if the value cannot be found. The refcount of the object
6124 * is always increased by 1 when the value was found. */
6125 static robj
*hashGet(robj
*o
, robj
*key
) {
6127 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6130 key
= getDecodedObject(key
);
6131 if (zipmapGet(o
->ptr
,key
->ptr
,sdslen(key
->ptr
),&v
,&vlen
)) {
6132 value
= createStringObject((char*)v
,vlen
);
6136 dictEntry
*de
= dictFind(o
->ptr
,key
);
6138 value
= dictGetEntryVal(de
);
6139 incrRefCount(value
);
6145 /* Test if the key exists in the given hash. Returns 1 if the key
6146 * exists and 0 when it doesn't. */
6147 static int hashExists(robj
*o
, robj
*key
) {
6148 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6149 key
= getDecodedObject(key
);
6150 if (zipmapExists(o
->ptr
,key
->ptr
,sdslen(key
->ptr
))) {
6156 if (dictFind(o
->ptr
,key
) != NULL
) {
6163 /* Add an element, discard the old if the key already exists.
6164 * Return 0 on insert and 1 on update. */
6165 static int hashSet(robj
*o
, robj
*key
, robj
*value
) {
6167 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6168 key
= getDecodedObject(key
);
6169 value
= getDecodedObject(value
);
6170 o
->ptr
= zipmapSet(o
->ptr
,
6171 key
->ptr
,sdslen(key
->ptr
),
6172 value
->ptr
,sdslen(value
->ptr
), &update
);
6174 decrRefCount(value
);
6176 /* Check if the zipmap needs to be upgraded to a real hash table */
6177 if (zipmapLen(o
->ptr
) > server
.hash_max_zipmap_entries
)
6178 convertToRealHash(o
);
6180 if (dictReplace(o
->ptr
,key
,value
)) {
6187 incrRefCount(value
);
6192 /* Delete an element from a hash.
6193 * Return 1 on deleted and 0 on not found. */
6194 static int hashDelete(robj
*o
, robj
*key
) {
6196 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6197 key
= getDecodedObject(key
);
6198 o
->ptr
= zipmapDel(o
->ptr
,key
->ptr
,sdslen(key
->ptr
), &deleted
);
6201 deleted
= dictDelete((dict
*)o
->ptr
,key
) == DICT_OK
;
6202 /* Always check if the dictionary needs a resize after a delete. */
6203 if (deleted
&& htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
6208 /* Return the number of elements in a hash. */
6209 static unsigned long hashLength(robj
*o
) {
6210 return (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
6211 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
6214 /* Structure to hold hash iteration abstration. Note that iteration over
6215 * hashes involves both fields and values. Because it is possible that
6216 * not both are required, store pointers in the iterator to avoid
6217 * unnecessary memory allocation for fields/values. */
6221 unsigned char *zk
, *zv
;
6222 unsigned int zklen
, zvlen
;
6228 static hashIterator
*hashInitIterator(robj
*subject
) {
6229 hashIterator
*hi
= zmalloc(sizeof(hashIterator
));
6230 hi
->encoding
= subject
->encoding
;
6231 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6232 hi
->zi
= zipmapRewind(subject
->ptr
);
6233 } else if (hi
->encoding
== REDIS_ENCODING_HT
) {
6234 hi
->di
= dictGetIterator(subject
->ptr
);
6241 static void hashReleaseIterator(hashIterator
*hi
) {
6242 if (hi
->encoding
== REDIS_ENCODING_HT
) {
6243 dictReleaseIterator(hi
->di
);
6248 /* Move to the next entry in the hash. Return REDIS_OK when the next entry
6249 * could be found and REDIS_ERR when the iterator reaches the end. */
6250 static int hashNext(hashIterator
*hi
) {
6251 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6252 if ((hi
->zi
= zipmapNext(hi
->zi
, &hi
->zk
, &hi
->zklen
,
6253 &hi
->zv
, &hi
->zvlen
)) == NULL
) return REDIS_ERR
;
6255 if ((hi
->de
= dictNext(hi
->di
)) == NULL
) return REDIS_ERR
;
6260 /* Get key or value object at current iteration position.
6261 * This increases the refcount of the field object by 1. */
6262 static robj
*hashCurrent(hashIterator
*hi
, int what
) {
6264 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6265 if (what
& REDIS_HASH_KEY
) {
6266 o
= createStringObject((char*)hi
->zk
,hi
->zklen
);
6268 o
= createStringObject((char*)hi
->zv
,hi
->zvlen
);
6271 if (what
& REDIS_HASH_KEY
) {
6272 o
= dictGetEntryKey(hi
->de
);
6274 o
= dictGetEntryVal(hi
->de
);
6281 static robj
*hashLookupWriteOrCreate(redisClient
*c
, robj
*key
) {
6282 robj
*o
= lookupKeyWrite(c
->db
,key
);
6284 o
= createHashObject();
6285 dictAdd(c
->db
->dict
,key
,o
);
6288 if (o
->type
!= REDIS_HASH
) {
6289 addReply(c
,shared
.wrongtypeerr
);
6296 /* ============================= Hash commands ============================== */
6297 static void hsetCommand(redisClient
*c
) {
6301 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6302 hashTryConversion(o
,c
->argv
,2,3);
6303 hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
6304 update
= hashSet(o
,c
->argv
[2],c
->argv
[3]);
6305 addReply(c
, update
? shared
.czero
: shared
.cone
);
6309 static void hsetnxCommand(redisClient
*c
) {
6311 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6312 hashTryConversion(o
,c
->argv
,2,3);
6314 if (hashExists(o
, c
->argv
[2])) {
6315 addReply(c
, shared
.czero
);
6317 hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
6318 hashSet(o
,c
->argv
[2],c
->argv
[3]);
6319 addReply(c
, shared
.cone
);
6324 static void hmsetCommand(redisClient
*c
) {
6328 if ((c
->argc
% 2) == 1) {
6329 addReplySds(c
,sdsnew("-ERR wrong number of arguments for HMSET\r\n"));
6333 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6334 hashTryConversion(o
,c
->argv
,2,c
->argc
-1);
6335 for (i
= 2; i
< c
->argc
; i
+= 2) {
6336 hashTryObjectEncoding(o
,&c
->argv
[i
], &c
->argv
[i
+1]);
6337 hashSet(o
,c
->argv
[i
],c
->argv
[i
+1]);
6339 addReply(c
, shared
.ok
);
6343 static void hincrbyCommand(redisClient
*c
) {
6344 long long value
, incr
;
6345 robj
*o
, *current
, *new;
6347 if (getLongLongFromObjectOrReply(c
,c
->argv
[3],&incr
,NULL
) != REDIS_OK
) return;
6348 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6349 if ((current
= hashGet(o
,c
->argv
[2])) != NULL
) {
6350 if (current
->encoding
== REDIS_ENCODING_RAW
)
6351 value
= strtoll(current
->ptr
,NULL
,10);
6352 else if (current
->encoding
== REDIS_ENCODING_INT
)
6353 value
= (long)current
->ptr
;
6355 redisAssert(1 != 1);
6356 decrRefCount(current
);
6362 new = createStringObjectFromLongLong(value
);
6363 hashTryObjectEncoding(o
,&c
->argv
[2],NULL
);
6364 hashSet(o
,c
->argv
[2],new);
6366 addReplyLongLong(c
,value
);
6370 static void hgetCommand(redisClient
*c
) {
6372 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6373 checkType(c
,o
,REDIS_HASH
)) return;
6375 if ((value
= hashGet(o
,c
->argv
[2])) != NULL
) {
6376 addReplyBulk(c
,value
);
6377 decrRefCount(value
);
6379 addReply(c
,shared
.nullbulk
);
6383 static void hmgetCommand(redisClient
*c
) {
6386 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
6387 if (o
!= NULL
&& o
->type
!= REDIS_HASH
) {
6388 addReply(c
,shared
.wrongtypeerr
);
6391 /* Note the check for o != NULL happens inside the loop. This is
6392 * done because objects that cannot be found are considered to be
6393 * an empty hash. The reply should then be a series of NULLs. */
6394 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-2));
6395 for (i
= 2; i
< c
->argc
; i
++) {
6396 if (o
!= NULL
&& (value
= hashGet(o
,c
->argv
[i
])) != NULL
) {
6397 addReplyBulk(c
,value
);
6398 decrRefCount(value
);
6400 addReply(c
,shared
.nullbulk
);
6405 static void hdelCommand(redisClient
*c
) {
6407 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6408 checkType(c
,o
,REDIS_HASH
)) return;
6410 if (hashDelete(o
,c
->argv
[2])) {
6411 if (hashLength(o
) == 0) deleteKey(c
->db
,c
->argv
[1]);
6412 addReply(c
,shared
.cone
);
6415 addReply(c
,shared
.czero
);
6419 static void hlenCommand(redisClient
*c
) {
6421 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6422 checkType(c
,o
,REDIS_HASH
)) return;
6424 addReplyUlong(c
,hashLength(o
));
6427 static void genericHgetallCommand(redisClient
*c
, int flags
) {
6428 robj
*o
, *lenobj
, *obj
;
6429 unsigned long count
= 0;
6432 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6433 || checkType(c
,o
,REDIS_HASH
)) return;
6435 lenobj
= createObject(REDIS_STRING
,NULL
);
6437 decrRefCount(lenobj
);
6439 hi
= hashInitIterator(o
);
6440 while (hashNext(hi
) != REDIS_ERR
) {
6441 if (flags
& REDIS_HASH_KEY
) {
6442 obj
= hashCurrent(hi
,REDIS_HASH_KEY
);
6443 addReplyBulk(c
,obj
);
6447 if (flags
& REDIS_HASH_VALUE
) {
6448 obj
= hashCurrent(hi
,REDIS_HASH_VALUE
);
6449 addReplyBulk(c
,obj
);
6454 hashReleaseIterator(hi
);
6456 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
6459 static void hkeysCommand(redisClient
*c
) {
6460 genericHgetallCommand(c
,REDIS_HASH_KEY
);
6463 static void hvalsCommand(redisClient
*c
) {
6464 genericHgetallCommand(c
,REDIS_HASH_VALUE
);
6467 static void hgetallCommand(redisClient
*c
) {
6468 genericHgetallCommand(c
,REDIS_HASH_KEY
|REDIS_HASH_VALUE
);
6471 static void hexistsCommand(redisClient
*c
) {
6473 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6474 checkType(c
,o
,REDIS_HASH
)) return;
6476 addReply(c
, hashExists(o
,c
->argv
[2]) ? shared
.cone
: shared
.czero
);
6479 static void convertToRealHash(robj
*o
) {
6480 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
6481 unsigned int klen
, vlen
;
6482 dict
*dict
= dictCreate(&hashDictType
,NULL
);
6484 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
6485 p
= zipmapRewind(zm
);
6486 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
6487 robj
*keyobj
, *valobj
;
6489 keyobj
= createStringObject((char*)key
,klen
);
6490 valobj
= createStringObject((char*)val
,vlen
);
6491 keyobj
= tryObjectEncoding(keyobj
);
6492 valobj
= tryObjectEncoding(valobj
);
6493 dictAdd(dict
,keyobj
,valobj
);
6495 o
->encoding
= REDIS_ENCODING_HT
;
6500 /* ========================= Non type-specific commands ==================== */
6502 static void flushdbCommand(redisClient
*c
) {
6503 server
.dirty
+= dictSize(c
->db
->dict
);
6504 dictEmpty(c
->db
->dict
);
6505 dictEmpty(c
->db
->expires
);
6506 addReply(c
,shared
.ok
);
6509 static void flushallCommand(redisClient
*c
) {
6510 server
.dirty
+= emptyDb();
6511 addReply(c
,shared
.ok
);
6512 if (server
.bgsavechildpid
!= -1) {
6513 kill(server
.bgsavechildpid
,SIGKILL
);
6514 rdbRemoveTempFile(server
.bgsavechildpid
);
6516 rdbSave(server
.dbfilename
);
6520 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
6521 redisSortOperation
*so
= zmalloc(sizeof(*so
));
6523 so
->pattern
= pattern
;
6527 /* Return the value associated to the key with a name obtained
6528 * substituting the first occurence of '*' in 'pattern' with 'subst'.
6529 * The returned object will always have its refcount increased by 1
6530 * when it is non-NULL. */
6531 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
6534 robj keyobj
, fieldobj
, *o
;
6535 int prefixlen
, sublen
, postfixlen
, fieldlen
;
6536 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
6540 char buf
[REDIS_SORTKEY_MAX
+1];
6541 } keyname
, fieldname
;
6543 /* If the pattern is "#" return the substitution object itself in order
6544 * to implement the "SORT ... GET #" feature. */
6545 spat
= pattern
->ptr
;
6546 if (spat
[0] == '#' && spat
[1] == '\0') {
6547 incrRefCount(subst
);
6551 /* The substitution object may be specially encoded. If so we create
6552 * a decoded object on the fly. Otherwise getDecodedObject will just
6553 * increment the ref count, that we'll decrement later. */
6554 subst
= getDecodedObject(subst
);
6557 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
6558 p
= strchr(spat
,'*');
6560 decrRefCount(subst
);
6564 /* Find out if we're dealing with a hash dereference. */
6565 if ((f
= strstr(p
+1, "->")) != NULL
) {
6566 fieldlen
= sdslen(spat
)-(f
-spat
);
6567 /* this also copies \0 character */
6568 memcpy(fieldname
.buf
,f
+2,fieldlen
-1);
6569 fieldname
.len
= fieldlen
-2;
6575 sublen
= sdslen(ssub
);
6576 postfixlen
= sdslen(spat
)-(prefixlen
+1)-fieldlen
;
6577 memcpy(keyname
.buf
,spat
,prefixlen
);
6578 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
6579 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
6580 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
6581 keyname
.len
= prefixlen
+sublen
+postfixlen
;
6582 decrRefCount(subst
);
6584 /* Lookup substituted key */
6585 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2));
6586 o
= lookupKeyRead(db
,&keyobj
);
6587 if (o
== NULL
) return NULL
;
6590 if (o
->type
!= REDIS_HASH
|| fieldname
.len
< 1) return NULL
;
6592 /* Retrieve value from hash by the field name. This operation
6593 * already increases the refcount of the returned object. */
6594 initStaticStringObject(fieldobj
,((char*)&fieldname
)+(sizeof(long)*2));
6595 o
= hashGet(o
, &fieldobj
);
6597 if (o
->type
!= REDIS_STRING
) return NULL
;
6599 /* Every object that this function returns needs to have its refcount
6600 * increased. sortCommand decreases it again. */
6607 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
6608 * the additional parameter is not standard but a BSD-specific we have to
6609 * pass sorting parameters via the global 'server' structure */
6610 static int sortCompare(const void *s1
, const void *s2
) {
6611 const redisSortObject
*so1
= s1
, *so2
= s2
;
6614 if (!server
.sort_alpha
) {
6615 /* Numeric sorting. Here it's trivial as we precomputed scores */
6616 if (so1
->u
.score
> so2
->u
.score
) {
6618 } else if (so1
->u
.score
< so2
->u
.score
) {
6624 /* Alphanumeric sorting */
6625 if (server
.sort_bypattern
) {
6626 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
6627 /* At least one compare object is NULL */
6628 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
6630 else if (so1
->u
.cmpobj
== NULL
)
6635 /* We have both the objects, use strcoll */
6636 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
6639 /* Compare elements directly. */
6640 cmp
= compareStringObjects(so1
->obj
,so2
->obj
);
6643 return server
.sort_desc
? -cmp
: cmp
;
6646 /* The SORT command is the most complex command in Redis. Warning: this code
6647 * is optimized for speed and a bit less for readability */
6648 static void sortCommand(redisClient
*c
) {
6651 int desc
= 0, alpha
= 0;
6652 int limit_start
= 0, limit_count
= -1, start
, end
;
6653 int j
, dontsort
= 0, vectorlen
;
6654 int getop
= 0; /* GET operation counter */
6655 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
6656 redisSortObject
*vector
; /* Resulting vector to sort */
6658 /* Lookup the key to sort. It must be of the right types */
6659 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
6660 if (sortval
== NULL
) {
6661 addReply(c
,shared
.emptymultibulk
);
6664 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
6665 sortval
->type
!= REDIS_ZSET
)
6667 addReply(c
,shared
.wrongtypeerr
);
6671 /* Create a list of operations to perform for every sorted element.
6672 * Operations can be GET/DEL/INCR/DECR */
6673 operations
= listCreate();
6674 listSetFreeMethod(operations
,zfree
);
6677 /* Now we need to protect sortval incrementing its count, in the future
6678 * SORT may have options able to overwrite/delete keys during the sorting
6679 * and the sorted key itself may get destroied */
6680 incrRefCount(sortval
);
6682 /* The SORT command has an SQL-alike syntax, parse it */
6683 while(j
< c
->argc
) {
6684 int leftargs
= c
->argc
-j
-1;
6685 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
6687 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
6689 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
6691 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
6692 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
6693 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
6695 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
6696 storekey
= c
->argv
[j
+1];
6698 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
6699 sortby
= c
->argv
[j
+1];
6700 /* If the BY pattern does not contain '*', i.e. it is constant,
6701 * we don't need to sort nor to lookup the weight keys. */
6702 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
6704 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
6705 listAddNodeTail(operations
,createSortOperation(
6706 REDIS_SORT_GET
,c
->argv
[j
+1]));
6710 decrRefCount(sortval
);
6711 listRelease(operations
);
6712 addReply(c
,shared
.syntaxerr
);
6718 /* Load the sorting vector with all the objects to sort */
6719 switch(sortval
->type
) {
6720 case REDIS_LIST
: vectorlen
= listLength((list
*)sortval
->ptr
); break;
6721 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
6722 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
6723 default: vectorlen
= 0; redisPanic("Bad SORT type"); /* Avoid GCC warning */
6725 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
6728 if (sortval
->type
== REDIS_LIST
) {
6729 list
*list
= sortval
->ptr
;
6733 listRewind(list
,&li
);
6734 while((ln
= listNext(&li
))) {
6735 robj
*ele
= ln
->value
;
6736 vector
[j
].obj
= ele
;
6737 vector
[j
].u
.score
= 0;
6738 vector
[j
].u
.cmpobj
= NULL
;
6746 if (sortval
->type
== REDIS_SET
) {
6749 zset
*zs
= sortval
->ptr
;
6753 di
= dictGetIterator(set
);
6754 while((setele
= dictNext(di
)) != NULL
) {
6755 vector
[j
].obj
= dictGetEntryKey(setele
);
6756 vector
[j
].u
.score
= 0;
6757 vector
[j
].u
.cmpobj
= NULL
;
6760 dictReleaseIterator(di
);
6762 redisAssert(j
== vectorlen
);
6764 /* Now it's time to load the right scores in the sorting vector */
6765 if (dontsort
== 0) {
6766 for (j
= 0; j
< vectorlen
; j
++) {
6769 /* lookup value to sort by */
6770 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
6771 if (!byval
) continue;
6773 /* use object itself to sort by */
6774 byval
= vector
[j
].obj
;
6778 if (sortby
) vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
6780 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
6781 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
6782 } else if (byval
->encoding
== REDIS_ENCODING_INT
) {
6783 /* Don't need to decode the object if it's
6784 * integer-encoded (the only encoding supported) so
6785 * far. We can just cast it */
6786 vector
[j
].u
.score
= (long)byval
->ptr
;
6788 redisAssert(1 != 1);
6792 /* when the object was retrieved using lookupKeyByPattern,
6793 * its refcount needs to be decreased. */
6795 decrRefCount(byval
);
6800 /* We are ready to sort the vector... perform a bit of sanity check
6801 * on the LIMIT option too. We'll use a partial version of quicksort. */
6802 start
= (limit_start
< 0) ? 0 : limit_start
;
6803 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
6804 if (start
>= vectorlen
) {
6805 start
= vectorlen
-1;
6808 if (end
>= vectorlen
) end
= vectorlen
-1;
6810 if (dontsort
== 0) {
6811 server
.sort_desc
= desc
;
6812 server
.sort_alpha
= alpha
;
6813 server
.sort_bypattern
= sortby
? 1 : 0;
6814 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
6815 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
6817 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
6820 /* Send command output to the output buffer, performing the specified
6821 * GET/DEL/INCR/DECR operations if any. */
6822 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
6823 if (storekey
== NULL
) {
6824 /* STORE option not specified, sent the sorting result to client */
6825 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
6826 for (j
= start
; j
<= end
; j
++) {
6830 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
6831 listRewind(operations
,&li
);
6832 while((ln
= listNext(&li
))) {
6833 redisSortOperation
*sop
= ln
->value
;
6834 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6837 if (sop
->type
== REDIS_SORT_GET
) {
6839 addReply(c
,shared
.nullbulk
);
6841 addReplyBulk(c
,val
);
6845 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6850 robj
*listObject
= createListObject();
6851 list
*listPtr
= (list
*) listObject
->ptr
;
6853 /* STORE option specified, set the sorting result as a List object */
6854 for (j
= start
; j
<= end
; j
++) {
6859 listAddNodeTail(listPtr
,vector
[j
].obj
);
6860 incrRefCount(vector
[j
].obj
);
6862 listRewind(operations
,&li
);
6863 while((ln
= listNext(&li
))) {
6864 redisSortOperation
*sop
= ln
->value
;
6865 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6868 if (sop
->type
== REDIS_SORT_GET
) {
6870 listAddNodeTail(listPtr
,createStringObject("",0));
6872 /* We should do a incrRefCount on val because it is
6873 * added to the list, but also a decrRefCount because
6874 * it is returned by lookupKeyByPattern. This results
6875 * in doing nothing at all. */
6876 listAddNodeTail(listPtr
,val
);
6879 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6883 if (dictReplace(c
->db
->dict
,storekey
,listObject
)) {
6884 incrRefCount(storekey
);
6886 /* Note: we add 1 because the DB is dirty anyway since even if the
6887 * SORT result is empty a new key is set and maybe the old content
6889 server
.dirty
+= 1+outputlen
;
6890 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
6894 decrRefCount(sortval
);
6895 listRelease(operations
);
6896 for (j
= 0; j
< vectorlen
; j
++) {
6897 if (alpha
&& vector
[j
].u
.cmpobj
)
6898 decrRefCount(vector
[j
].u
.cmpobj
);
6903 /* Convert an amount of bytes into a human readable string in the form
6904 * of 100B, 2G, 100M, 4K, and so forth. */
6905 static void bytesToHuman(char *s
, unsigned long long n
) {
6910 sprintf(s
,"%lluB",n
);
6912 } else if (n
< (1024*1024)) {
6913 d
= (double)n
/(1024);
6914 sprintf(s
,"%.2fK",d
);
6915 } else if (n
< (1024LL*1024*1024)) {
6916 d
= (double)n
/(1024*1024);
6917 sprintf(s
,"%.2fM",d
);
6918 } else if (n
< (1024LL*1024*1024*1024)) {
6919 d
= (double)n
/(1024LL*1024*1024);
6920 sprintf(s
,"%.2fG",d
);
6924 /* Create the string returned by the INFO command. This is decoupled
6925 * by the INFO command itself as we need to report the same information
6926 * on memory corruption problems. */
6927 static sds
genRedisInfoString(void) {
6929 time_t uptime
= time(NULL
)-server
.stat_starttime
;
6933 bytesToHuman(hmem
,zmalloc_used_memory());
6934 info
= sdscatprintf(sdsempty(),
6935 "redis_version:%s\r\n"
6937 "multiplexing_api:%s\r\n"
6938 "process_id:%ld\r\n"
6939 "uptime_in_seconds:%ld\r\n"
6940 "uptime_in_days:%ld\r\n"
6941 "connected_clients:%d\r\n"
6942 "connected_slaves:%d\r\n"
6943 "blocked_clients:%d\r\n"
6944 "used_memory:%zu\r\n"
6945 "used_memory_human:%s\r\n"
6946 "changes_since_last_save:%lld\r\n"
6947 "bgsave_in_progress:%d\r\n"
6948 "last_save_time:%ld\r\n"
6949 "bgrewriteaof_in_progress:%d\r\n"
6950 "total_connections_received:%lld\r\n"
6951 "total_commands_processed:%lld\r\n"
6952 "expired_keys:%lld\r\n"
6953 "hash_max_zipmap_entries:%ld\r\n"
6954 "hash_max_zipmap_value:%ld\r\n"
6955 "pubsub_channels:%ld\r\n"
6956 "pubsub_patterns:%u\r\n"
6960 (sizeof(long) == 8) ? "64" : "32",
6965 listLength(server
.clients
)-listLength(server
.slaves
),
6966 listLength(server
.slaves
),
6967 server
.blpop_blocked_clients
,
6968 zmalloc_used_memory(),
6971 server
.bgsavechildpid
!= -1,
6973 server
.bgrewritechildpid
!= -1,
6974 server
.stat_numconnections
,
6975 server
.stat_numcommands
,
6976 server
.stat_expiredkeys
,
6977 server
.hash_max_zipmap_entries
,
6978 server
.hash_max_zipmap_value
,
6979 dictSize(server
.pubsub_channels
),
6980 listLength(server
.pubsub_patterns
),
6981 server
.vm_enabled
!= 0,
6982 server
.masterhost
== NULL
? "master" : "slave"
6984 if (server
.masterhost
) {
6985 info
= sdscatprintf(info
,
6986 "master_host:%s\r\n"
6987 "master_port:%d\r\n"
6988 "master_link_status:%s\r\n"
6989 "master_last_io_seconds_ago:%d\r\n"
6992 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
6994 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
6997 if (server
.vm_enabled
) {
6999 info
= sdscatprintf(info
,
7000 "vm_conf_max_memory:%llu\r\n"
7001 "vm_conf_page_size:%llu\r\n"
7002 "vm_conf_pages:%llu\r\n"
7003 "vm_stats_used_pages:%llu\r\n"
7004 "vm_stats_swapped_objects:%llu\r\n"
7005 "vm_stats_swappin_count:%llu\r\n"
7006 "vm_stats_swappout_count:%llu\r\n"
7007 "vm_stats_io_newjobs_len:%lu\r\n"
7008 "vm_stats_io_processing_len:%lu\r\n"
7009 "vm_stats_io_processed_len:%lu\r\n"
7010 "vm_stats_io_active_threads:%lu\r\n"
7011 "vm_stats_blocked_clients:%lu\r\n"
7012 ,(unsigned long long) server
.vm_max_memory
,
7013 (unsigned long long) server
.vm_page_size
,
7014 (unsigned long long) server
.vm_pages
,
7015 (unsigned long long) server
.vm_stats_used_pages
,
7016 (unsigned long long) server
.vm_stats_swapped_objects
,
7017 (unsigned long long) server
.vm_stats_swapins
,
7018 (unsigned long long) server
.vm_stats_swapouts
,
7019 (unsigned long) listLength(server
.io_newjobs
),
7020 (unsigned long) listLength(server
.io_processing
),
7021 (unsigned long) listLength(server
.io_processed
),
7022 (unsigned long) server
.io_active_threads
,
7023 (unsigned long) server
.vm_blocked_clients
7027 for (j
= 0; j
< server
.dbnum
; j
++) {
7028 long long keys
, vkeys
;
7030 keys
= dictSize(server
.db
[j
].dict
);
7031 vkeys
= dictSize(server
.db
[j
].expires
);
7032 if (keys
|| vkeys
) {
7033 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
7040 static void infoCommand(redisClient
*c
) {
7041 sds info
= genRedisInfoString();
7042 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
7043 (unsigned long)sdslen(info
)));
7044 addReplySds(c
,info
);
7045 addReply(c
,shared
.crlf
);
7048 static void monitorCommand(redisClient
*c
) {
7049 /* ignore MONITOR if aleady slave or in monitor mode */
7050 if (c
->flags
& REDIS_SLAVE
) return;
7052 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
7054 listAddNodeTail(server
.monitors
,c
);
7055 addReply(c
,shared
.ok
);
7058 /* ================================= Expire ================================= */
7059 static int removeExpire(redisDb
*db
, robj
*key
) {
7060 if (dictDelete(db
->expires
,key
) == DICT_OK
) {
7067 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
7068 if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) {
7076 /* Return the expire time of the specified key, or -1 if no expire
7077 * is associated with this key (i.e. the key is non volatile) */
7078 static time_t getExpire(redisDb
*db
, robj
*key
) {
7081 /* No expire? return ASAP */
7082 if (dictSize(db
->expires
) == 0 ||
7083 (de
= dictFind(db
->expires
,key
)) == NULL
) return -1;
7085 return (time_t) dictGetEntryVal(de
);
7088 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
7092 /* No expire? return ASAP */
7093 if (dictSize(db
->expires
) == 0 ||
7094 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7096 /* Lookup the expire */
7097 when
= (time_t) dictGetEntryVal(de
);
7098 if (time(NULL
) <= when
) return 0;
7100 /* Delete the key */
7101 dictDelete(db
->expires
,key
);
7102 server
.stat_expiredkeys
++;
7103 return dictDelete(db
->dict
,key
) == DICT_OK
;
7106 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
7109 /* No expire? return ASAP */
7110 if (dictSize(db
->expires
) == 0 ||
7111 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7113 /* Delete the key */
7115 server
.stat_expiredkeys
++;
7116 dictDelete(db
->expires
,key
);
7117 return dictDelete(db
->dict
,key
) == DICT_OK
;
7120 static void expireGenericCommand(redisClient
*c
, robj
*key
, robj
*param
, long offset
) {
7124 if (getLongFromObjectOrReply(c
, param
, &seconds
, NULL
) != REDIS_OK
) return;
7128 de
= dictFind(c
->db
->dict
,key
);
7130 addReply(c
,shared
.czero
);
7134 if (deleteKey(c
->db
,key
)) server
.dirty
++;
7135 addReply(c
, shared
.cone
);
7138 time_t when
= time(NULL
)+seconds
;
7139 if (setExpire(c
->db
,key
,when
)) {
7140 addReply(c
,shared
.cone
);
7143 addReply(c
,shared
.czero
);
7149 static void expireCommand(redisClient
*c
) {
7150 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],0);
7153 static void expireatCommand(redisClient
*c
) {
7154 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],time(NULL
));
7157 static void ttlCommand(redisClient
*c
) {
7161 expire
= getExpire(c
->db
,c
->argv
[1]);
7163 ttl
= (int) (expire
-time(NULL
));
7164 if (ttl
< 0) ttl
= -1;
7166 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
7169 /* ================================ MULTI/EXEC ============================== */
7171 /* Client state initialization for MULTI/EXEC */
7172 static void initClientMultiState(redisClient
*c
) {
7173 c
->mstate
.commands
= NULL
;
7174 c
->mstate
.count
= 0;
7177 /* Release all the resources associated with MULTI/EXEC state */
7178 static void freeClientMultiState(redisClient
*c
) {
7181 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7183 multiCmd
*mc
= c
->mstate
.commands
+j
;
7185 for (i
= 0; i
< mc
->argc
; i
++)
7186 decrRefCount(mc
->argv
[i
]);
7189 zfree(c
->mstate
.commands
);
7192 /* Add a new command into the MULTI commands queue */
7193 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
7197 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
7198 sizeof(multiCmd
)*(c
->mstate
.count
+1));
7199 mc
= c
->mstate
.commands
+c
->mstate
.count
;
7202 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
7203 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
7204 for (j
= 0; j
< c
->argc
; j
++)
7205 incrRefCount(mc
->argv
[j
]);
7209 static void multiCommand(redisClient
*c
) {
7210 c
->flags
|= REDIS_MULTI
;
7211 addReply(c
,shared
.ok
);
7214 static void discardCommand(redisClient
*c
) {
7215 if (!(c
->flags
& REDIS_MULTI
)) {
7216 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
7220 freeClientMultiState(c
);
7221 initClientMultiState(c
);
7222 c
->flags
&= (~REDIS_MULTI
);
7223 addReply(c
,shared
.ok
);
7226 /* Send a MULTI command to all the slaves and AOF file. Check the execCommand
7227 * implememntation for more information. */
7228 static void execCommandReplicateMulti(redisClient
*c
) {
7229 struct redisCommand
*cmd
;
7230 robj
*multistring
= createStringObject("MULTI",5);
7232 cmd
= lookupCommand("multi");
7233 if (server
.appendonly
)
7234 feedAppendOnlyFile(cmd
,c
->db
->id
,&multistring
,1);
7235 if (listLength(server
.slaves
))
7236 replicationFeedSlaves(server
.slaves
,c
->db
->id
,&multistring
,1);
7237 decrRefCount(multistring
);
7240 static void execCommand(redisClient
*c
) {
7245 if (!(c
->flags
& REDIS_MULTI
)) {
7246 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
7250 /* Replicate a MULTI request now that we are sure the block is executed.
7251 * This way we'll deliver the MULTI/..../EXEC block as a whole and
7252 * both the AOF and the replication link will have the same consistency
7253 * and atomicity guarantees. */
7254 execCommandReplicateMulti(c
);
7256 /* Exec all the queued commands */
7257 orig_argv
= c
->argv
;
7258 orig_argc
= c
->argc
;
7259 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
7260 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7261 c
->argc
= c
->mstate
.commands
[j
].argc
;
7262 c
->argv
= c
->mstate
.commands
[j
].argv
;
7263 call(c
,c
->mstate
.commands
[j
].cmd
);
7265 c
->argv
= orig_argv
;
7266 c
->argc
= orig_argc
;
7267 freeClientMultiState(c
);
7268 initClientMultiState(c
);
7269 c
->flags
&= (~REDIS_MULTI
);
7270 /* Make sure the EXEC command is always replicated / AOF, since we
7271 * always send the MULTI command (we can't know beforehand if the
7272 * next operations will contain at least a modification to the DB). */
7276 /* =========================== Blocking Operations ========================= */
7278 /* Currently Redis blocking operations support is limited to list POP ops,
7279 * so the current implementation is not fully generic, but it is also not
7280 * completely specific so it will not require a rewrite to support new
7281 * kind of blocking operations in the future.
7283 * Still it's important to note that list blocking operations can be already
7284 * used as a notification mechanism in order to implement other blocking
7285 * operations at application level, so there must be a very strong evidence
7286 * of usefulness and generality before new blocking operations are implemented.
7288 * This is how the current blocking POP works, we use BLPOP as example:
7289 * - If the user calls BLPOP and the key exists and contains a non empty list
7290 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
7291 * if there is not to block.
7292 * - If instead BLPOP is called and the key does not exists or the list is
7293 * empty we need to block. In order to do so we remove the notification for
7294 * new data to read in the client socket (so that we'll not serve new
7295 * requests if the blocking request is not served). Also we put the client
7296 * in a dictionary (db->blockingkeys) mapping keys to a list of clients
7297 * blocking for this keys.
7298 * - If a PUSH operation against a key with blocked clients waiting is
7299 * performed, we serve the first in the list: basically instead to push
7300 * the new element inside the list we return it to the (first / oldest)
7301 * blocking client, unblock the client, and remove it form the list.
7303 * The above comment and the source code should be enough in order to understand
7304 * the implementation and modify / fix it later.
7307 /* Set a client in blocking mode for the specified key, with the specified
7309 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
7314 c
->blockingkeys
= zmalloc(sizeof(robj
*)*numkeys
);
7315 c
->blockingkeysnum
= numkeys
;
7316 c
->blockingto
= timeout
;
7317 for (j
= 0; j
< numkeys
; j
++) {
7318 /* Add the key in the client structure, to map clients -> keys */
7319 c
->blockingkeys
[j
] = keys
[j
];
7320 incrRefCount(keys
[j
]);
7322 /* And in the other "side", to map keys -> clients */
7323 de
= dictFind(c
->db
->blockingkeys
,keys
[j
]);
7327 /* For every key we take a list of clients blocked for it */
7329 retval
= dictAdd(c
->db
->blockingkeys
,keys
[j
],l
);
7330 incrRefCount(keys
[j
]);
7331 assert(retval
== DICT_OK
);
7333 l
= dictGetEntryVal(de
);
7335 listAddNodeTail(l
,c
);
7337 /* Mark the client as a blocked client */
7338 c
->flags
|= REDIS_BLOCKED
;
7339 server
.blpop_blocked_clients
++;
7342 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
7343 static void unblockClientWaitingData(redisClient
*c
) {
7348 assert(c
->blockingkeys
!= NULL
);
7349 /* The client may wait for multiple keys, so unblock it for every key. */
7350 for (j
= 0; j
< c
->blockingkeysnum
; j
++) {
7351 /* Remove this client from the list of clients waiting for this key. */
7352 de
= dictFind(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7354 l
= dictGetEntryVal(de
);
7355 listDelNode(l
,listSearchKey(l
,c
));
7356 /* If the list is empty we need to remove it to avoid wasting memory */
7357 if (listLength(l
) == 0)
7358 dictDelete(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7359 decrRefCount(c
->blockingkeys
[j
]);
7361 /* Cleanup the client structure */
7362 zfree(c
->blockingkeys
);
7363 c
->blockingkeys
= NULL
;
7364 c
->flags
&= (~REDIS_BLOCKED
);
7365 server
.blpop_blocked_clients
--;
7366 /* We want to process data if there is some command waiting
7367 * in the input buffer. Note that this is safe even if
7368 * unblockClientWaitingData() gets called from freeClient() because
7369 * freeClient() will be smart enough to call this function
7370 * *after* c->querybuf was set to NULL. */
7371 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
7374 /* This should be called from any function PUSHing into lists.
7375 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
7376 * 'ele' is the element pushed.
7378 * If the function returns 0 there was no client waiting for a list push
7381 * If the function returns 1 there was a client waiting for a list push
7382 * against this key, the element was passed to this client thus it's not
7383 * needed to actually add it to the list and the caller should return asap. */
7384 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
7385 struct dictEntry
*de
;
7386 redisClient
*receiver
;
7390 de
= dictFind(c
->db
->blockingkeys
,key
);
7391 if (de
== NULL
) return 0;
7392 l
= dictGetEntryVal(de
);
7395 receiver
= ln
->value
;
7397 addReplySds(receiver
,sdsnew("*2\r\n"));
7398 addReplyBulk(receiver
,key
);
7399 addReplyBulk(receiver
,ele
);
7400 unblockClientWaitingData(receiver
);
7404 /* Blocking RPOP/LPOP */
7405 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
7410 for (j
= 1; j
< c
->argc
-1; j
++) {
7411 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
7413 if (o
->type
!= REDIS_LIST
) {
7414 addReply(c
,shared
.wrongtypeerr
);
7417 list
*list
= o
->ptr
;
7418 if (listLength(list
) != 0) {
7419 /* If the list contains elements fall back to the usual
7420 * non-blocking POP operation */
7421 robj
*argv
[2], **orig_argv
;
7424 /* We need to alter the command arguments before to call
7425 * popGenericCommand() as the command takes a single key. */
7426 orig_argv
= c
->argv
;
7427 orig_argc
= c
->argc
;
7428 argv
[1] = c
->argv
[j
];
7432 /* Also the return value is different, we need to output
7433 * the multi bulk reply header and the key name. The
7434 * "real" command will add the last element (the value)
7435 * for us. If this souds like an hack to you it's just
7436 * because it is... */
7437 addReplySds(c
,sdsnew("*2\r\n"));
7438 addReplyBulk(c
,argv
[1]);
7439 popGenericCommand(c
,where
);
7441 /* Fix the client structure with the original stuff */
7442 c
->argv
= orig_argv
;
7443 c
->argc
= orig_argc
;
7449 /* If the list is empty or the key does not exists we must block */
7450 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
7451 if (timeout
> 0) timeout
+= time(NULL
);
7452 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
7455 static void blpopCommand(redisClient
*c
) {
7456 blockingPopGenericCommand(c
,REDIS_HEAD
);
7459 static void brpopCommand(redisClient
*c
) {
7460 blockingPopGenericCommand(c
,REDIS_TAIL
);
7463 /* =============================== Replication ============================= */
7465 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7466 ssize_t nwritten
, ret
= size
;
7467 time_t start
= time(NULL
);
7471 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
7472 nwritten
= write(fd
,ptr
,size
);
7473 if (nwritten
== -1) return -1;
7477 if ((time(NULL
)-start
) > timeout
) {
7485 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7486 ssize_t nread
, totread
= 0;
7487 time_t start
= time(NULL
);
7491 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
7492 nread
= read(fd
,ptr
,size
);
7493 if (nread
== -1) return -1;
7498 if ((time(NULL
)-start
) > timeout
) {
7506 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7513 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
7516 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
7527 static void syncCommand(redisClient
*c
) {
7528 /* ignore SYNC if aleady slave or in monitor mode */
7529 if (c
->flags
& REDIS_SLAVE
) return;
7531 /* SYNC can't be issued when the server has pending data to send to
7532 * the client about already issued commands. We need a fresh reply
7533 * buffer registering the differences between the BGSAVE and the current
7534 * dataset, so that we can copy to other slaves if needed. */
7535 if (listLength(c
->reply
) != 0) {
7536 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
7540 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
7541 /* Here we need to check if there is a background saving operation
7542 * in progress, or if it is required to start one */
7543 if (server
.bgsavechildpid
!= -1) {
7544 /* Ok a background save is in progress. Let's check if it is a good
7545 * one for replication, i.e. if there is another slave that is
7546 * registering differences since the server forked to save */
7551 listRewind(server
.slaves
,&li
);
7552 while((ln
= listNext(&li
))) {
7554 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
7557 /* Perfect, the server is already registering differences for
7558 * another slave. Set the right state, and copy the buffer. */
7559 listRelease(c
->reply
);
7560 c
->reply
= listDup(slave
->reply
);
7561 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7562 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
7564 /* No way, we need to wait for the next BGSAVE in order to
7565 * register differences */
7566 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7567 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
7570 /* Ok we don't have a BGSAVE in progress, let's start one */
7571 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
7572 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7573 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
7574 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
7577 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7580 c
->flags
|= REDIS_SLAVE
;
7582 listAddNodeTail(server
.slaves
,c
);
7586 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
7587 redisClient
*slave
= privdata
;
7589 REDIS_NOTUSED(mask
);
7590 char buf
[REDIS_IOBUF_LEN
];
7591 ssize_t nwritten
, buflen
;
7593 if (slave
->repldboff
== 0) {
7594 /* Write the bulk write count before to transfer the DB. In theory here
7595 * we don't know how much room there is in the output buffer of the
7596 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
7597 * operations) will never be smaller than the few bytes we need. */
7600 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
7602 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
7610 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
7611 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
7613 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
7614 (buflen
== 0) ? "premature EOF" : strerror(errno
));
7618 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
7619 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
7624 slave
->repldboff
+= nwritten
;
7625 if (slave
->repldboff
== slave
->repldbsize
) {
7626 close(slave
->repldbfd
);
7627 slave
->repldbfd
= -1;
7628 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7629 slave
->replstate
= REDIS_REPL_ONLINE
;
7630 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
7631 sendReplyToClient
, slave
) == AE_ERR
) {
7635 addReplySds(slave
,sdsempty());
7636 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
7640 /* This function is called at the end of every backgrond saving.
7641 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
7642 * otherwise REDIS_ERR is passed to the function.
7644 * The goal of this function is to handle slaves waiting for a successful
7645 * background saving in order to perform non-blocking synchronization. */
7646 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
7648 int startbgsave
= 0;
7651 listRewind(server
.slaves
,&li
);
7652 while((ln
= listNext(&li
))) {
7653 redisClient
*slave
= ln
->value
;
7655 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
7657 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7658 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
7659 struct redis_stat buf
;
7661 if (bgsaveerr
!= REDIS_OK
) {
7663 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
7666 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
7667 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
7669 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
7672 slave
->repldboff
= 0;
7673 slave
->repldbsize
= buf
.st_size
;
7674 slave
->replstate
= REDIS_REPL_SEND_BULK
;
7675 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7676 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
7683 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7686 listRewind(server
.slaves
,&li
);
7687 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
7688 while((ln
= listNext(&li
))) {
7689 redisClient
*slave
= ln
->value
;
7691 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
7698 static int syncWithMaster(void) {
7699 char buf
[1024], tmpfile
[256], authcmd
[1024];
7701 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
7702 int dfd
, maxtries
= 5;
7705 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
7710 /* AUTH with the master if required. */
7711 if(server
.masterauth
) {
7712 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
7713 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
7715 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
7719 /* Read the AUTH result. */
7720 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7722 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
7726 if (buf
[0] != '+') {
7728 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
7733 /* Issue the SYNC command */
7734 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
7736 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
7740 /* Read the bulk write count */
7741 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7743 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
7747 if (buf
[0] != '$') {
7749 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
7752 dumpsize
= strtol(buf
+1,NULL
,10);
7753 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
7754 /* Read the bulk write data on a temp file */
7756 snprintf(tmpfile
,256,
7757 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
7758 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
7759 if (dfd
!= -1) break;
7764 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
7768 int nread
, nwritten
;
7770 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
7772 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
7778 nwritten
= write(dfd
,buf
,nread
);
7779 if (nwritten
== -1) {
7780 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
7788 if (rename(tmpfile
,server
.dbfilename
) == -1) {
7789 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
7795 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
7796 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
7800 server
.master
= createClient(fd
);
7801 server
.master
->flags
|= REDIS_MASTER
;
7802 server
.master
->authenticated
= 1;
7803 server
.replstate
= REDIS_REPL_CONNECTED
;
7807 static void slaveofCommand(redisClient
*c
) {
7808 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
7809 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
7810 if (server
.masterhost
) {
7811 sdsfree(server
.masterhost
);
7812 server
.masterhost
= NULL
;
7813 if (server
.master
) freeClient(server
.master
);
7814 server
.replstate
= REDIS_REPL_NONE
;
7815 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
7818 sdsfree(server
.masterhost
);
7819 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
7820 server
.masterport
= atoi(c
->argv
[2]->ptr
);
7821 if (server
.master
) freeClient(server
.master
);
7822 server
.replstate
= REDIS_REPL_CONNECT
;
7823 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
7824 server
.masterhost
, server
.masterport
);
7826 addReply(c
,shared
.ok
);
7829 /* ============================ Maxmemory directive ======================== */
7831 /* Try to free one object form the pre-allocated objects free list.
7832 * This is useful under low mem conditions as by default we take 1 million
7833 * free objects allocated. On success REDIS_OK is returned, otherwise
7835 static int tryFreeOneObjectFromFreelist(void) {
7838 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
7839 if (listLength(server
.objfreelist
)) {
7840 listNode
*head
= listFirst(server
.objfreelist
);
7841 o
= listNodeValue(head
);
7842 listDelNode(server
.objfreelist
,head
);
7843 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7847 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7852 /* This function gets called when 'maxmemory' is set on the config file to limit
7853 * the max memory used by the server, and we are out of memory.
7854 * This function will try to, in order:
7856 * - Free objects from the free list
7857 * - Try to remove keys with an EXPIRE set
7859 * It is not possible to free enough memory to reach used-memory < maxmemory
7860 * the server will start refusing commands that will enlarge even more the
7863 static void freeMemoryIfNeeded(void) {
7864 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
7865 int j
, k
, freed
= 0;
7867 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
7868 for (j
= 0; j
< server
.dbnum
; j
++) {
7870 robj
*minkey
= NULL
;
7871 struct dictEntry
*de
;
7873 if (dictSize(server
.db
[j
].expires
)) {
7875 /* From a sample of three keys drop the one nearest to
7876 * the natural expire */
7877 for (k
= 0; k
< 3; k
++) {
7880 de
= dictGetRandomKey(server
.db
[j
].expires
);
7881 t
= (time_t) dictGetEntryVal(de
);
7882 if (minttl
== -1 || t
< minttl
) {
7883 minkey
= dictGetEntryKey(de
);
7887 deleteKey(server
.db
+j
,minkey
);
7890 if (!freed
) return; /* nothing to free... */
7894 /* ============================== Append Only file ========================== */
7896 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
7897 sds buf
= sdsempty();
7903 /* The DB this command was targetting is not the same as the last command
7904 * we appendend. To issue a SELECT command is needed. */
7905 if (dictid
!= server
.appendseldb
) {
7908 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
7909 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
7910 (unsigned long)strlen(seldb
),seldb
);
7911 server
.appendseldb
= dictid
;
7914 /* "Fix" the argv vector if the command is EXPIRE. We want to translate
7915 * EXPIREs into EXPIREATs calls */
7916 if (cmd
->proc
== expireCommand
) {
7919 tmpargv
[0] = createStringObject("EXPIREAT",8);
7920 tmpargv
[1] = argv
[1];
7921 incrRefCount(argv
[1]);
7922 when
= time(NULL
)+strtol(argv
[2]->ptr
,NULL
,10);
7923 tmpargv
[2] = createObject(REDIS_STRING
,
7924 sdscatprintf(sdsempty(),"%ld",when
));
7928 /* Append the actual command */
7929 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
7930 for (j
= 0; j
< argc
; j
++) {
7933 o
= getDecodedObject(o
);
7934 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
7935 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
7936 buf
= sdscatlen(buf
,"\r\n",2);
7940 /* Free the objects from the modified argv for EXPIREAT */
7941 if (cmd
->proc
== expireCommand
) {
7942 for (j
= 0; j
< 3; j
++)
7943 decrRefCount(argv
[j
]);
7946 /* We want to perform a single write. This should be guaranteed atomic
7947 * at least if the filesystem we are writing is a real physical one.
7948 * While this will save us against the server being killed I don't think
7949 * there is much to do about the whole server stopping for power problems
7951 nwritten
= write(server
.appendfd
,buf
,sdslen(buf
));
7952 if (nwritten
!= (signed)sdslen(buf
)) {
7953 /* Ooops, we are in troubles. The best thing to do for now is
7954 * to simply exit instead to give the illusion that everything is
7955 * working as expected. */
7956 if (nwritten
== -1) {
7957 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
7959 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
7963 /* If a background append only file rewriting is in progress we want to
7964 * accumulate the differences between the child DB and the current one
7965 * in a buffer, so that when the child process will do its work we
7966 * can append the differences to the new append only file. */
7967 if (server
.bgrewritechildpid
!= -1)
7968 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
7972 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
7973 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
7974 now
-server
.lastfsync
> 1))
7976 fsync(server
.appendfd
); /* Let's try to get this data on the disk */
7977 server
.lastfsync
= now
;
7981 /* In Redis commands are always executed in the context of a client, so in
7982 * order to load the append only file we need to create a fake client. */
7983 static struct redisClient
*createFakeClient(void) {
7984 struct redisClient
*c
= zmalloc(sizeof(*c
));
7988 c
->querybuf
= sdsempty();
7992 /* We set the fake client as a slave waiting for the synchronization
7993 * so that Redis will not try to send replies to this client. */
7994 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7995 c
->reply
= listCreate();
7996 listSetFreeMethod(c
->reply
,decrRefCount
);
7997 listSetDupMethod(c
->reply
,dupClientReplyValue
);
8001 static void freeFakeClient(struct redisClient
*c
) {
8002 sdsfree(c
->querybuf
);
8003 listRelease(c
->reply
);
8007 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
8008 * error (the append only file is zero-length) REDIS_ERR is returned. On
8009 * fatal error an error message is logged and the program exists. */
8010 int loadAppendOnlyFile(char *filename
) {
8011 struct redisClient
*fakeClient
;
8012 FILE *fp
= fopen(filename
,"r");
8013 struct redis_stat sb
;
8014 unsigned long long loadedkeys
= 0;
8016 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
8020 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
8024 fakeClient
= createFakeClient();
8031 struct redisCommand
*cmd
;
8033 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
8039 if (buf
[0] != '*') goto fmterr
;
8041 argv
= zmalloc(sizeof(robj
*)*argc
);
8042 for (j
= 0; j
< argc
; j
++) {
8043 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
8044 if (buf
[0] != '$') goto fmterr
;
8045 len
= strtol(buf
+1,NULL
,10);
8046 argsds
= sdsnewlen(NULL
,len
);
8047 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
8048 argv
[j
] = createObject(REDIS_STRING
,argsds
);
8049 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
8052 /* Command lookup */
8053 cmd
= lookupCommand(argv
[0]->ptr
);
8055 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
8058 /* Try object encoding */
8059 if (cmd
->flags
& REDIS_CMD_BULK
)
8060 argv
[argc
-1] = tryObjectEncoding(argv
[argc
-1]);
8061 /* Run the command in the context of a fake client */
8062 fakeClient
->argc
= argc
;
8063 fakeClient
->argv
= argv
;
8064 cmd
->proc(fakeClient
);
8065 /* Discard the reply objects list from the fake client */
8066 while(listLength(fakeClient
->reply
))
8067 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
8068 /* Clean up, ready for the next command */
8069 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
8071 /* Handle swapping while loading big datasets when VM is on */
8073 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
8074 while (zmalloc_used_memory() > server
.vm_max_memory
) {
8075 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
8080 freeFakeClient(fakeClient
);
8085 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
8087 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
8091 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
8095 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
8096 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
8100 /* Avoid the incr/decr ref count business if possible to help
8101 * copy-on-write (we are often in a child process when this function
8103 * Also makes sure that key objects don't get incrRefCount-ed when VM
8105 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
8106 obj
= getDecodedObject(obj
);
8109 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
));
8110 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
;
8111 if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0)
8113 if (fwrite("\r\n",2,1,fp
) == 0) goto err
;
8114 if (decrrc
) decrRefCount(obj
);
8117 if (decrrc
) decrRefCount(obj
);
8121 /* Write binary-safe string into a file in the bulkformat
8122 * $<count>\r\n<payload>\r\n */
8123 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
8126 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
);
8127 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8128 if (len
&& fwrite(s
,len
,1,fp
) == 0) return 0;
8129 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
8133 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
8134 static int fwriteBulkDouble(FILE *fp
, double d
) {
8135 char buf
[128], dbuf
[128];
8137 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
8138 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
8139 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8140 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
8144 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
8145 static int fwriteBulkLong(FILE *fp
, long l
) {
8146 char buf
[128], lbuf
[128];
8148 snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
);
8149 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2);
8150 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8151 if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0;
8155 /* Write a sequence of commands able to fully rebuild the dataset into
8156 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
8157 static int rewriteAppendOnlyFile(char *filename
) {
8158 dictIterator
*di
= NULL
;
8163 time_t now
= time(NULL
);
8165 /* Note that we have to use a different temp name here compared to the
8166 * one used by rewriteAppendOnlyFileBackground() function. */
8167 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
8168 fp
= fopen(tmpfile
,"w");
8170 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
8173 for (j
= 0; j
< server
.dbnum
; j
++) {
8174 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
8175 redisDb
*db
= server
.db
+j
;
8177 if (dictSize(d
) == 0) continue;
8178 di
= dictGetIterator(d
);
8184 /* SELECT the new DB */
8185 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
8186 if (fwriteBulkLong(fp
,j
) == 0) goto werr
;
8188 /* Iterate this DB writing every entry */
8189 while((de
= dictNext(di
)) != NULL
) {
8194 key
= dictGetEntryKey(de
);
8195 /* If the value for this key is swapped, load a preview in memory.
8196 * We use a "swapped" flag to remember if we need to free the
8197 * value object instead to just increment the ref count anyway
8198 * in order to avoid copy-on-write of pages if we are forked() */
8199 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
8200 key
->storage
== REDIS_VM_SWAPPING
) {
8201 o
= dictGetEntryVal(de
);
8204 o
= vmPreviewObject(key
);
8207 expiretime
= getExpire(db
,key
);
8209 /* Save the key and associated value */
8210 if (o
->type
== REDIS_STRING
) {
8211 /* Emit a SET command */
8212 char cmd
[]="*3\r\n$3\r\nSET\r\n";
8213 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8215 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8216 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
8217 } else if (o
->type
== REDIS_LIST
) {
8218 /* Emit the RPUSHes needed to rebuild the list */
8219 list
*list
= o
->ptr
;
8223 listRewind(list
,&li
);
8224 while((ln
= listNext(&li
))) {
8225 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
8226 robj
*eleobj
= listNodeValue(ln
);
8228 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8229 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8230 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8232 } else if (o
->type
== REDIS_SET
) {
8233 /* Emit the SADDs needed to rebuild the set */
8235 dictIterator
*di
= dictGetIterator(set
);
8238 while((de
= dictNext(di
)) != NULL
) {
8239 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
8240 robj
*eleobj
= dictGetEntryKey(de
);
8242 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8243 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8244 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8246 dictReleaseIterator(di
);
8247 } else if (o
->type
== REDIS_ZSET
) {
8248 /* Emit the ZADDs needed to rebuild the sorted set */
8250 dictIterator
*di
= dictGetIterator(zs
->dict
);
8253 while((de
= dictNext(di
)) != NULL
) {
8254 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
8255 robj
*eleobj
= dictGetEntryKey(de
);
8256 double *score
= dictGetEntryVal(de
);
8258 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8259 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8260 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
8261 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8263 dictReleaseIterator(di
);
8264 } else if (o
->type
== REDIS_HASH
) {
8265 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
8267 /* Emit the HSETs needed to rebuild the hash */
8268 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8269 unsigned char *p
= zipmapRewind(o
->ptr
);
8270 unsigned char *field
, *val
;
8271 unsigned int flen
, vlen
;
8273 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
8274 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8275 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8276 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
8278 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
8282 dictIterator
*di
= dictGetIterator(o
->ptr
);
8285 while((de
= dictNext(di
)) != NULL
) {
8286 robj
*field
= dictGetEntryKey(de
);
8287 robj
*val
= dictGetEntryVal(de
);
8289 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8290 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8291 if (fwriteBulkObject(fp
,field
) == -1) return -1;
8292 if (fwriteBulkObject(fp
,val
) == -1) return -1;
8294 dictReleaseIterator(di
);
8297 redisPanic("Unknown object type");
8299 /* Save the expire time */
8300 if (expiretime
!= -1) {
8301 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
8302 /* If this key is already expired skip it */
8303 if (expiretime
< now
) continue;
8304 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8305 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8306 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
;
8308 if (swapped
) decrRefCount(o
);
8310 dictReleaseIterator(di
);
8313 /* Make sure data will not remain on the OS's output buffers */
8318 /* Use RENAME to make sure the DB file is changed atomically only
8319 * if the generate DB file is ok. */
8320 if (rename(tmpfile
,filename
) == -1) {
8321 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
8325 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
8331 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
8332 if (di
) dictReleaseIterator(di
);
8336 /* This is how rewriting of the append only file in background works:
8338 * 1) The user calls BGREWRITEAOF
8339 * 2) Redis calls this function, that forks():
8340 * 2a) the child rewrite the append only file in a temp file.
8341 * 2b) the parent accumulates differences in server.bgrewritebuf.
8342 * 3) When the child finished '2a' exists.
8343 * 4) The parent will trap the exit code, if it's OK, will append the
8344 * data accumulated into server.bgrewritebuf into the temp file, and
8345 * finally will rename(2) the temp file in the actual file name.
8346 * The the new file is reopened as the new append only file. Profit!
8348 static int rewriteAppendOnlyFileBackground(void) {
8351 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
8352 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
8353 if ((childpid
= fork()) == 0) {
8357 if (server
.vm_enabled
) vmReopenSwapFile();
8359 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
8360 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
8367 if (childpid
== -1) {
8368 redisLog(REDIS_WARNING
,
8369 "Can't rewrite append only file in background: fork: %s",
8373 redisLog(REDIS_NOTICE
,
8374 "Background append only file rewriting started by pid %d",childpid
);
8375 server
.bgrewritechildpid
= childpid
;
8376 updateDictResizePolicy();
8377 /* We set appendseldb to -1 in order to force the next call to the
8378 * feedAppendOnlyFile() to issue a SELECT command, so the differences
8379 * accumulated by the parent into server.bgrewritebuf will start
8380 * with a SELECT statement and it will be safe to merge. */
8381 server
.appendseldb
= -1;
8384 return REDIS_OK
; /* unreached */
8387 static void bgrewriteaofCommand(redisClient
*c
) {
8388 if (server
.bgrewritechildpid
!= -1) {
8389 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
8392 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
8393 char *status
= "+Background append only file rewriting started\r\n";
8394 addReplySds(c
,sdsnew(status
));
8396 addReply(c
,shared
.err
);
8400 static void aofRemoveTempFile(pid_t childpid
) {
8403 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
8407 /* Virtual Memory is composed mainly of two subsystems:
8408 * - Blocking Virutal Memory
8409 * - Threaded Virtual Memory I/O
8410 * The two parts are not fully decoupled, but functions are split among two
8411 * different sections of the source code (delimited by comments) in order to
8412 * make more clear what functionality is about the blocking VM and what about
8413 * the threaded (not blocking) VM.
8417 * Redis VM is a blocking VM (one that blocks reading swapped values from
8418 * disk into memory when a value swapped out is needed in memory) that is made
8419 * unblocking by trying to examine the command argument vector in order to
8420 * load in background values that will likely be needed in order to exec
8421 * the command. The command is executed only once all the relevant keys
8422 * are loaded into memory.
8424 * This basically is almost as simple of a blocking VM, but almost as parallel
8425 * as a fully non-blocking VM.
8428 /* =================== Virtual Memory - Blocking Side ====================== */
8430 /* substitute the first occurrence of '%p' with the process pid in the
8431 * swap file name. */
8432 static void expandVmSwapFilename(void) {
8433 char *p
= strstr(server
.vm_swap_file
,"%p");
8439 new = sdscat(new,server
.vm_swap_file
);
8440 new = sdscatprintf(new,"%ld",(long) getpid());
8441 new = sdscat(new,p
+2);
8442 zfree(server
.vm_swap_file
);
8443 server
.vm_swap_file
= new;
8446 static void vmInit(void) {
8451 if (server
.vm_max_threads
!= 0)
8452 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
8454 expandVmSwapFilename();
8455 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
8456 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
8457 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
8459 if (server
.vm_fp
== NULL
) {
8460 redisLog(REDIS_WARNING
,
8461 "Impossible to open the swap file: %s. Exiting.",
8465 server
.vm_fd
= fileno(server
.vm_fp
);
8466 server
.vm_next_page
= 0;
8467 server
.vm_near_pages
= 0;
8468 server
.vm_stats_used_pages
= 0;
8469 server
.vm_stats_swapped_objects
= 0;
8470 server
.vm_stats_swapouts
= 0;
8471 server
.vm_stats_swapins
= 0;
8472 totsize
= server
.vm_pages
*server
.vm_page_size
;
8473 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
8474 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
8475 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
8479 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
8481 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
8482 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
8483 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
8484 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
8486 /* Initialize threaded I/O (used by Virtual Memory) */
8487 server
.io_newjobs
= listCreate();
8488 server
.io_processing
= listCreate();
8489 server
.io_processed
= listCreate();
8490 server
.io_ready_clients
= listCreate();
8491 pthread_mutex_init(&server
.io_mutex
,NULL
);
8492 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
8493 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
8494 server
.io_active_threads
= 0;
8495 if (pipe(pipefds
) == -1) {
8496 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
8500 server
.io_ready_pipe_read
= pipefds
[0];
8501 server
.io_ready_pipe_write
= pipefds
[1];
8502 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
8503 /* LZF requires a lot of stack */
8504 pthread_attr_init(&server
.io_threads_attr
);
8505 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
8506 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
8507 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
8508 /* Listen for events in the threaded I/O pipe */
8509 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
8510 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
8511 oom("creating file event");
8514 /* Mark the page as used */
8515 static void vmMarkPageUsed(off_t page
) {
8516 off_t byte
= page
/8;
8518 redisAssert(vmFreePage(page
) == 1);
8519 server
.vm_bitmap
[byte
] |= 1<<bit
;
8522 /* Mark N contiguous pages as used, with 'page' being the first. */
8523 static void vmMarkPagesUsed(off_t page
, off_t count
) {
8526 for (j
= 0; j
< count
; j
++)
8527 vmMarkPageUsed(page
+j
);
8528 server
.vm_stats_used_pages
+= count
;
8529 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
8530 (long long)count
, (long long)page
);
8533 /* Mark the page as free */
8534 static void vmMarkPageFree(off_t page
) {
8535 off_t byte
= page
/8;
8537 redisAssert(vmFreePage(page
) == 0);
8538 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
8541 /* Mark N contiguous pages as free, with 'page' being the first. */
8542 static void vmMarkPagesFree(off_t page
, off_t count
) {
8545 for (j
= 0; j
< count
; j
++)
8546 vmMarkPageFree(page
+j
);
8547 server
.vm_stats_used_pages
-= count
;
8548 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
8549 (long long)count
, (long long)page
);
8552 /* Test if the page is free */
8553 static int vmFreePage(off_t page
) {
8554 off_t byte
= page
/8;
8556 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
8559 /* Find N contiguous free pages storing the first page of the cluster in *first.
8560 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
8561 * REDIS_ERR is returned.
8563 * This function uses a simple algorithm: we try to allocate
8564 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
8565 * again from the start of the swap file searching for free spaces.
8567 * If it looks pretty clear that there are no free pages near our offset
8568 * we try to find less populated places doing a forward jump of
8569 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
8570 * without hurry, and then we jump again and so forth...
8572 * This function can be improved using a free list to avoid to guess
8573 * too much, since we could collect data about freed pages.
8575 * note: I implemented this function just after watching an episode of
8576 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
8578 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
8579 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
8581 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
8582 server
.vm_near_pages
= 0;
8583 server
.vm_next_page
= 0;
8585 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
8586 base
= server
.vm_next_page
;
8588 while(offset
< server
.vm_pages
) {
8589 off_t
this = base
+offset
;
8591 /* If we overflow, restart from page zero */
8592 if (this >= server
.vm_pages
) {
8593 this -= server
.vm_pages
;
8595 /* Just overflowed, what we found on tail is no longer
8596 * interesting, as it's no longer contiguous. */
8600 if (vmFreePage(this)) {
8601 /* This is a free page */
8603 /* Already got N free pages? Return to the caller, with success */
8605 *first
= this-(n
-1);
8606 server
.vm_next_page
= this+1;
8607 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
8611 /* The current one is not a free page */
8615 /* Fast-forward if the current page is not free and we already
8616 * searched enough near this place. */
8618 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
8619 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
8621 /* Note that even if we rewind after the jump, we are don't need
8622 * to make sure numfree is set to zero as we only jump *if* it
8623 * is set to zero. */
8625 /* Otherwise just check the next page */
8632 /* Write the specified object at the specified page of the swap file */
8633 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
8634 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8635 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8636 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8637 redisLog(REDIS_WARNING
,
8638 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
8642 rdbSaveObject(server
.vm_fp
,o
);
8643 fflush(server
.vm_fp
);
8644 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8648 /* Swap the 'val' object relative to 'key' into disk. Store all the information
8649 * needed to later retrieve the object into the key object.
8650 * If we can't find enough contiguous empty pages to swap the object on disk
8651 * REDIS_ERR is returned. */
8652 static int vmSwapObjectBlocking(robj
*key
, robj
*val
) {
8653 off_t pages
= rdbSavedObjectPages(val
,NULL
);
8656 assert(key
->storage
== REDIS_VM_MEMORY
);
8657 assert(key
->refcount
== 1);
8658 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
;
8659 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
;
8660 key
->vm
.page
= page
;
8661 key
->vm
.usedpages
= pages
;
8662 key
->storage
= REDIS_VM_SWAPPED
;
8663 key
->vtype
= val
->type
;
8664 decrRefCount(val
); /* Deallocate the object from memory. */
8665 vmMarkPagesUsed(page
,pages
);
8666 redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)",
8667 (unsigned char*) key
->ptr
,
8668 (unsigned long long) page
, (unsigned long long) pages
);
8669 server
.vm_stats_swapped_objects
++;
8670 server
.vm_stats_swapouts
++;
8674 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
8677 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8678 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8679 redisLog(REDIS_WARNING
,
8680 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
8684 o
= rdbLoadObject(type
,server
.vm_fp
);
8686 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
8689 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8693 /* Load the value object relative to the 'key' object from swap to memory.
8694 * The newly allocated object is returned.
8696 * If preview is true the unserialized object is returned to the caller but
8697 * no changes are made to the key object, nor the pages are marked as freed */
8698 static robj
*vmGenericLoadObject(robj
*key
, int preview
) {
8701 redisAssert(key
->storage
== REDIS_VM_SWAPPED
|| key
->storage
== REDIS_VM_LOADING
);
8702 val
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
);
8704 key
->storage
= REDIS_VM_MEMORY
;
8705 key
->vm
.atime
= server
.unixtime
;
8706 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8707 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk",
8708 (unsigned char*) key
->ptr
);
8709 server
.vm_stats_swapped_objects
--;
8711 redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk",
8712 (unsigned char*) key
->ptr
);
8714 server
.vm_stats_swapins
++;
8718 /* Plain object loading, from swap to memory */
8719 static robj
*vmLoadObject(robj
*key
) {
8720 /* If we are loading the object in background, stop it, we
8721 * need to load this object synchronously ASAP. */
8722 if (key
->storage
== REDIS_VM_LOADING
)
8723 vmCancelThreadedIOJob(key
);
8724 return vmGenericLoadObject(key
,0);
8727 /* Just load the value on disk, without to modify the key.
8728 * This is useful when we want to perform some operation on the value
8729 * without to really bring it from swap to memory, like while saving the
8730 * dataset or rewriting the append only log. */
8731 static robj
*vmPreviewObject(robj
*key
) {
8732 return vmGenericLoadObject(key
,1);
8735 /* How a good candidate is this object for swapping?
8736 * The better candidate it is, the greater the returned value.
8738 * Currently we try to perform a fast estimation of the object size in
8739 * memory, and combine it with aging informations.
8741 * Basically swappability = idle-time * log(estimated size)
8743 * Bigger objects are preferred over smaller objects, but not
8744 * proportionally, this is why we use the logarithm. This algorithm is
8745 * just a first try and will probably be tuned later. */
8746 static double computeObjectSwappability(robj
*o
) {
8747 time_t age
= server
.unixtime
- o
->vm
.atime
;
8751 struct dictEntry
*de
;
8754 if (age
<= 0) return 0;
8757 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
8760 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
8765 listNode
*ln
= listFirst(l
);
8767 asize
= sizeof(list
);
8769 robj
*ele
= ln
->value
;
8772 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8773 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8775 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
8780 z
= (o
->type
== REDIS_ZSET
);
8781 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
8783 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8784 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
8789 de
= dictGetRandomKey(d
);
8790 ele
= dictGetEntryKey(de
);
8791 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8792 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8794 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8795 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
8799 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8800 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
8801 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
8802 unsigned int klen
, vlen
;
8803 unsigned char *key
, *val
;
8805 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
8809 asize
= len
*(klen
+vlen
+3);
8810 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
8812 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8817 de
= dictGetRandomKey(d
);
8818 ele
= dictGetEntryKey(de
);
8819 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8820 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8822 ele
= dictGetEntryVal(de
);
8823 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8824 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8826 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8831 return (double)age
*log(1+asize
);
8834 /* Try to swap an object that's a good candidate for swapping.
8835 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
8836 * to swap any object at all.
8838 * If 'usethreaded' is true, Redis will try to swap the object in background
8839 * using I/O threads. */
8840 static int vmSwapOneObject(int usethreads
) {
8842 struct dictEntry
*best
= NULL
;
8843 double best_swappability
= 0;
8844 redisDb
*best_db
= NULL
;
8847 for (j
= 0; j
< server
.dbnum
; j
++) {
8848 redisDb
*db
= server
.db
+j
;
8849 /* Why maxtries is set to 100?
8850 * Because this way (usually) we'll find 1 object even if just 1% - 2%
8851 * are swappable objects */
8854 if (dictSize(db
->dict
) == 0) continue;
8855 for (i
= 0; i
< 5; i
++) {
8857 double swappability
;
8859 if (maxtries
) maxtries
--;
8860 de
= dictGetRandomKey(db
->dict
);
8861 key
= dictGetEntryKey(de
);
8862 val
= dictGetEntryVal(de
);
8863 /* Only swap objects that are currently in memory.
8865 * Also don't swap shared objects if threaded VM is on, as we
8866 * try to ensure that the main thread does not touch the
8867 * object while the I/O thread is using it, but we can't
8868 * control other keys without adding additional mutex. */
8869 if (key
->storage
!= REDIS_VM_MEMORY
||
8870 (server
.vm_max_threads
!= 0 && val
->refcount
!= 1)) {
8871 if (maxtries
) i
--; /* don't count this try */
8874 swappability
= computeObjectSwappability(val
);
8875 if (!best
|| swappability
> best_swappability
) {
8877 best_swappability
= swappability
;
8882 if (best
== NULL
) return REDIS_ERR
;
8883 key
= dictGetEntryKey(best
);
8884 val
= dictGetEntryVal(best
);
8886 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
8887 key
->ptr
, best_swappability
);
8889 /* Unshare the key if needed */
8890 if (key
->refcount
> 1) {
8891 robj
*newkey
= dupStringObject(key
);
8893 key
= dictGetEntryKey(best
) = newkey
;
8897 vmSwapObjectThreaded(key
,val
,best_db
);
8900 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
8901 dictGetEntryVal(best
) = NULL
;
8909 static int vmSwapOneObjectBlocking() {
8910 return vmSwapOneObject(0);
8913 static int vmSwapOneObjectThreaded() {
8914 return vmSwapOneObject(1);
8917 /* Return true if it's safe to swap out objects in a given moment.
8918 * Basically we don't want to swap objects out while there is a BGSAVE
8919 * or a BGAEOREWRITE running in backgroud. */
8920 static int vmCanSwapOut(void) {
8921 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
8924 /* Delete a key if swapped. Returns 1 if the key was found, was swapped
8925 * and was deleted. Otherwise 0 is returned. */
8926 static int deleteIfSwapped(redisDb
*db
, robj
*key
) {
8930 if ((de
= dictFind(db
->dict
,key
)) == NULL
) return 0;
8931 foundkey
= dictGetEntryKey(de
);
8932 if (foundkey
->storage
== REDIS_VM_MEMORY
) return 0;
8937 /* =================== Virtual Memory - Threaded I/O ======================= */
8939 static void freeIOJob(iojob
*j
) {
8940 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
8941 j
->type
== REDIS_IOJOB_DO_SWAP
||
8942 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
8943 decrRefCount(j
->val
);
8944 /* We don't decrRefCount the j->key field as we did't incremented
8945 * the count creating IO Jobs. This is because the key field here is
8946 * just used as an indentifier and if a key is removed the Job should
8947 * never be touched again. */
8951 /* Every time a thread finished a Job, it writes a byte into the write side
8952 * of an unix pipe in order to "awake" the main thread, and this function
8954 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
8958 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
8960 REDIS_NOTUSED(mask
);
8961 REDIS_NOTUSED(privdata
);
8963 /* For every byte we read in the read side of the pipe, there is one
8964 * I/O job completed to process. */
8965 while((retval
= read(fd
,buf
,1)) == 1) {
8969 struct dictEntry
*de
;
8971 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
8973 /* Get the processed element (the oldest one) */
8975 assert(listLength(server
.io_processed
) != 0);
8976 if (toprocess
== -1) {
8977 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
8978 if (toprocess
<= 0) toprocess
= 1;
8980 ln
= listFirst(server
.io_processed
);
8982 listDelNode(server
.io_processed
,ln
);
8984 /* If this job is marked as canceled, just ignore it */
8989 /* Post process it in the main thread, as there are things we
8990 * can do just here to avoid race conditions and/or invasive locks */
8991 redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
);
8992 de
= dictFind(j
->db
->dict
,j
->key
);
8994 key
= dictGetEntryKey(de
);
8995 if (j
->type
== REDIS_IOJOB_LOAD
) {
8998 /* Key loaded, bring it at home */
8999 key
->storage
= REDIS_VM_MEMORY
;
9000 key
->vm
.atime
= server
.unixtime
;
9001 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
9002 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
9003 (unsigned char*) key
->ptr
);
9004 server
.vm_stats_swapped_objects
--;
9005 server
.vm_stats_swapins
++;
9006 dictGetEntryVal(de
) = j
->val
;
9007 incrRefCount(j
->val
);
9010 /* Handle clients waiting for this key to be loaded. */
9011 handleClientsBlockedOnSwappedKey(db
,key
);
9012 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9013 /* Now we know the amount of pages required to swap this object.
9014 * Let's find some space for it, and queue this task again
9015 * rebranded as REDIS_IOJOB_DO_SWAP. */
9016 if (!vmCanSwapOut() ||
9017 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
9019 /* Ooops... no space or we can't swap as there is
9020 * a fork()ed Redis trying to save stuff on disk. */
9022 key
->storage
= REDIS_VM_MEMORY
; /* undo operation */
9024 /* Note that we need to mark this pages as used now,
9025 * if the job will be canceled, we'll mark them as freed
9027 vmMarkPagesUsed(j
->page
,j
->pages
);
9028 j
->type
= REDIS_IOJOB_DO_SWAP
;
9033 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9036 /* Key swapped. We can finally free some memory. */
9037 if (key
->storage
!= REDIS_VM_SWAPPING
) {
9038 printf("key->storage: %d\n",key
->storage
);
9039 printf("key->name: %s\n",(char*)key
->ptr
);
9040 printf("key->refcount: %d\n",key
->refcount
);
9041 printf("val: %p\n",(void*)j
->val
);
9042 printf("val->type: %d\n",j
->val
->type
);
9043 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
9045 redisAssert(key
->storage
== REDIS_VM_SWAPPING
);
9046 val
= dictGetEntryVal(de
);
9047 key
->vm
.page
= j
->page
;
9048 key
->vm
.usedpages
= j
->pages
;
9049 key
->storage
= REDIS_VM_SWAPPED
;
9050 key
->vtype
= j
->val
->type
;
9051 decrRefCount(val
); /* Deallocate the object from memory. */
9052 dictGetEntryVal(de
) = NULL
;
9053 redisLog(REDIS_DEBUG
,
9054 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
9055 (unsigned char*) key
->ptr
,
9056 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
9057 server
.vm_stats_swapped_objects
++;
9058 server
.vm_stats_swapouts
++;
9060 /* Put a few more swap requests in queue if we are still
9062 if (trytoswap
&& vmCanSwapOut() &&
9063 zmalloc_used_memory() > server
.vm_max_memory
)
9068 more
= listLength(server
.io_newjobs
) <
9069 (unsigned) server
.vm_max_threads
;
9071 /* Don't waste CPU time if swappable objects are rare. */
9072 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
9080 if (processed
== toprocess
) return;
9082 if (retval
< 0 && errno
!= EAGAIN
) {
9083 redisLog(REDIS_WARNING
,
9084 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
9089 static void lockThreadedIO(void) {
9090 pthread_mutex_lock(&server
.io_mutex
);
9093 static void unlockThreadedIO(void) {
9094 pthread_mutex_unlock(&server
.io_mutex
);
9097 /* Remove the specified object from the threaded I/O queue if still not
9098 * processed, otherwise make sure to flag it as canceled. */
9099 static void vmCancelThreadedIOJob(robj
*o
) {
9101 server
.io_newjobs
, /* 0 */
9102 server
.io_processing
, /* 1 */
9103 server
.io_processed
/* 2 */
9107 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
9110 /* Search for a matching key in one of the queues */
9111 for (i
= 0; i
< 3; i
++) {
9115 listRewind(lists
[i
],&li
);
9116 while ((ln
= listNext(&li
)) != NULL
) {
9117 iojob
*job
= ln
->value
;
9119 if (job
->canceled
) continue; /* Skip this, already canceled. */
9120 if (job
->key
== o
) {
9121 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n",
9122 (void*)job
, (char*)o
->ptr
, job
->type
, i
);
9123 /* Mark the pages as free since the swap didn't happened
9124 * or happened but is now discarded. */
9125 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
9126 vmMarkPagesFree(job
->page
,job
->pages
);
9127 /* Cancel the job. It depends on the list the job is
9130 case 0: /* io_newjobs */
9131 /* If the job was yet not processed the best thing to do
9132 * is to remove it from the queue at all */
9134 listDelNode(lists
[i
],ln
);
9136 case 1: /* io_processing */
9137 /* Oh Shi- the thread is messing with the Job:
9139 * Probably it's accessing the object if this is a
9140 * PREPARE_SWAP or DO_SWAP job.
9141 * If it's a LOAD job it may be reading from disk and
9142 * if we don't wait for the job to terminate before to
9143 * cancel it, maybe in a few microseconds data can be
9144 * corrupted in this pages. So the short story is:
9146 * Better to wait for the job to move into the
9147 * next queue (processed)... */
9149 /* We try again and again until the job is completed. */
9151 /* But let's wait some time for the I/O thread
9152 * to finish with this job. After all this condition
9153 * should be very rare. */
9156 case 2: /* io_processed */
9157 /* The job was already processed, that's easy...
9158 * just mark it as canceled so that we'll ignore it
9159 * when processing completed jobs. */
9163 /* Finally we have to adjust the storage type of the object
9164 * in order to "UNDO" the operaiton. */
9165 if (o
->storage
== REDIS_VM_LOADING
)
9166 o
->storage
= REDIS_VM_SWAPPED
;
9167 else if (o
->storage
== REDIS_VM_SWAPPING
)
9168 o
->storage
= REDIS_VM_MEMORY
;
9175 assert(1 != 1); /* We should never reach this */
9178 static void *IOThreadEntryPoint(void *arg
) {
9183 pthread_detach(pthread_self());
9185 /* Get a new job to process */
9187 if (listLength(server
.io_newjobs
) == 0) {
9188 /* No new jobs in queue, exit. */
9189 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
9190 (long) pthread_self());
9191 server
.io_active_threads
--;
9195 ln
= listFirst(server
.io_newjobs
);
9197 listDelNode(server
.io_newjobs
,ln
);
9198 /* Add the job in the processing queue */
9199 j
->thread
= pthread_self();
9200 listAddNodeTail(server
.io_processing
,j
);
9201 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
9203 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
9204 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
9206 /* Process the Job */
9207 if (j
->type
== REDIS_IOJOB_LOAD
) {
9208 j
->val
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
);
9209 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9210 FILE *fp
= fopen("/dev/null","w+");
9211 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
9213 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9214 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
9218 /* Done: insert the job into the processed queue */
9219 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
9220 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
9222 listDelNode(server
.io_processing
,ln
);
9223 listAddNodeTail(server
.io_processed
,j
);
9226 /* Signal the main thread there is new stuff to process */
9227 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
9229 return NULL
; /* never reached */
9232 static void spawnIOThread(void) {
9234 sigset_t mask
, omask
;
9238 sigaddset(&mask
,SIGCHLD
);
9239 sigaddset(&mask
,SIGHUP
);
9240 sigaddset(&mask
,SIGPIPE
);
9241 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
9242 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
9243 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
9247 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
9248 server
.io_active_threads
++;
9251 /* We need to wait for the last thread to exit before we are able to
9252 * fork() in order to BGSAVE or BGREWRITEAOF. */
9253 static void waitEmptyIOJobsQueue(void) {
9255 int io_processed_len
;
9258 if (listLength(server
.io_newjobs
) == 0 &&
9259 listLength(server
.io_processing
) == 0 &&
9260 server
.io_active_threads
== 0)
9265 /* While waiting for empty jobs queue condition we post-process some
9266 * finshed job, as I/O threads may be hanging trying to write against
9267 * the io_ready_pipe_write FD but there are so much pending jobs that
9269 io_processed_len
= listLength(server
.io_processed
);
9271 if (io_processed_len
) {
9272 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
9273 usleep(1000); /* 1 millisecond */
9275 usleep(10000); /* 10 milliseconds */
9280 static void vmReopenSwapFile(void) {
9281 /* Note: we don't close the old one as we are in the child process
9282 * and don't want to mess at all with the original file object. */
9283 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
9284 if (server
.vm_fp
== NULL
) {
9285 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
9286 server
.vm_swap_file
);
9289 server
.vm_fd
= fileno(server
.vm_fp
);
9292 /* This function must be called while with threaded IO locked */
9293 static void queueIOJob(iojob
*j
) {
9294 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
9295 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
9296 listAddNodeTail(server
.io_newjobs
,j
);
9297 if (server
.io_active_threads
< server
.vm_max_threads
)
9301 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
9304 assert(key
->storage
== REDIS_VM_MEMORY
);
9305 assert(key
->refcount
== 1);
9307 j
= zmalloc(sizeof(*j
));
9308 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
9314 j
->thread
= (pthread_t
) -1;
9315 key
->storage
= REDIS_VM_SWAPPING
;
9323 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
9325 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
9326 * If there is not already a job loading the key, it is craeted.
9327 * The key is added to the io_keys list in the client structure, and also
9328 * in the hash table mapping swapped keys to waiting clients, that is,
9329 * server.io_waited_keys. */
9330 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
9331 struct dictEntry
*de
;
9335 /* If the key does not exist or is already in RAM we don't need to
9336 * block the client at all. */
9337 de
= dictFind(c
->db
->dict
,key
);
9338 if (de
== NULL
) return 0;
9339 o
= dictGetEntryKey(de
);
9340 if (o
->storage
== REDIS_VM_MEMORY
) {
9342 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
9343 /* We were swapping the key, undo it! */
9344 vmCancelThreadedIOJob(o
);
9348 /* OK: the key is either swapped, or being loaded just now. */
9350 /* Add the key to the list of keys this client is waiting for.
9351 * This maps clients to keys they are waiting for. */
9352 listAddNodeTail(c
->io_keys
,key
);
9355 /* Add the client to the swapped keys => clients waiting map. */
9356 de
= dictFind(c
->db
->io_keys
,key
);
9360 /* For every key we take a list of clients blocked for it */
9362 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
9364 assert(retval
== DICT_OK
);
9366 l
= dictGetEntryVal(de
);
9368 listAddNodeTail(l
,c
);
9370 /* Are we already loading the key from disk? If not create a job */
9371 if (o
->storage
== REDIS_VM_SWAPPED
) {
9374 o
->storage
= REDIS_VM_LOADING
;
9375 j
= zmalloc(sizeof(*j
));
9376 j
->type
= REDIS_IOJOB_LOAD
;
9379 j
->key
->vtype
= o
->vtype
;
9380 j
->page
= o
->vm
.page
;
9383 j
->thread
= (pthread_t
) -1;
9391 /* Preload keys needed for the ZUNION and ZINTER commands. */
9392 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
) {
9394 num
= atoi(c
->argv
[2]->ptr
);
9395 for (i
= 0; i
< num
; i
++) {
9396 waitForSwappedKey(c
,c
->argv
[3+i
]);
9400 /* Is this client attempting to run a command against swapped keys?
9401 * If so, block it ASAP, load the keys in background, then resume it.
9403 * The important idea about this function is that it can fail! If keys will
9404 * still be swapped when the client is resumed, this key lookups will
9405 * just block loading keys from disk. In practical terms this should only
9406 * happen with SORT BY command or if there is a bug in this function.
9408 * Return 1 if the client is marked as blocked, 0 if the client can
9409 * continue as the keys it is going to access appear to be in memory. */
9410 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
) {
9413 if (cmd
->vm_preload_proc
!= NULL
) {
9414 cmd
->vm_preload_proc(c
);
9416 if (cmd
->vm_firstkey
== 0) return 0;
9417 last
= cmd
->vm_lastkey
;
9418 if (last
< 0) last
= c
->argc
+last
;
9419 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
)
9420 waitForSwappedKey(c
,c
->argv
[j
]);
9423 /* If the client was blocked for at least one key, mark it as blocked. */
9424 if (listLength(c
->io_keys
)) {
9425 c
->flags
|= REDIS_IO_WAIT
;
9426 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
9427 server
.vm_blocked_clients
++;
9434 /* Remove the 'key' from the list of blocked keys for a given client.
9436 * The function returns 1 when there are no longer blocking keys after
9437 * the current one was removed (and the client can be unblocked). */
9438 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
9442 struct dictEntry
*de
;
9444 /* Remove the key from the list of keys this client is waiting for. */
9445 listRewind(c
->io_keys
,&li
);
9446 while ((ln
= listNext(&li
)) != NULL
) {
9447 if (compareStringObjects(ln
->value
,key
) == 0) {
9448 listDelNode(c
->io_keys
,ln
);
9454 /* Remove the client form the key => waiting clients map. */
9455 de
= dictFind(c
->db
->io_keys
,key
);
9457 l
= dictGetEntryVal(de
);
9458 ln
= listSearchKey(l
,c
);
9461 if (listLength(l
) == 0)
9462 dictDelete(c
->db
->io_keys
,key
);
9464 return listLength(c
->io_keys
) == 0;
9467 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
9468 struct dictEntry
*de
;
9473 de
= dictFind(db
->io_keys
,key
);
9476 l
= dictGetEntryVal(de
);
9477 len
= listLength(l
);
9478 /* Note: we can't use something like while(listLength(l)) as the list
9479 * can be freed by the calling function when we remove the last element. */
9482 redisClient
*c
= ln
->value
;
9484 if (dontWaitForSwappedKey(c
,key
)) {
9485 /* Put the client in the list of clients ready to go as we
9486 * loaded all the keys about it. */
9487 listAddNodeTail(server
.io_ready_clients
,c
);
9492 /* =========================== Remote Configuration ========================= */
9494 static void configSetCommand(redisClient
*c
) {
9495 robj
*o
= getDecodedObject(c
->argv
[3]);
9496 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
9497 zfree(server
.dbfilename
);
9498 server
.dbfilename
= zstrdup(o
->ptr
);
9499 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
9500 zfree(server
.requirepass
);
9501 server
.requirepass
= zstrdup(o
->ptr
);
9502 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
9503 zfree(server
.masterauth
);
9504 server
.masterauth
= zstrdup(o
->ptr
);
9505 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
9506 server
.maxmemory
= strtoll(o
->ptr
, NULL
, 10);
9508 addReplySds(c
,sdscatprintf(sdsempty(),
9509 "-ERR not supported CONFIG parameter %s\r\n",
9510 (char*)c
->argv
[2]->ptr
));
9515 addReply(c
,shared
.ok
);
9518 static void configGetCommand(redisClient
*c
) {
9519 robj
*o
= getDecodedObject(c
->argv
[2]);
9520 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
9521 char *pattern
= o
->ptr
;
9525 decrRefCount(lenobj
);
9527 if (stringmatch(pattern
,"dbfilename",0)) {
9528 addReplyBulkCString(c
,"dbfilename");
9529 addReplyBulkCString(c
,server
.dbfilename
);
9532 if (stringmatch(pattern
,"requirepass",0)) {
9533 addReplyBulkCString(c
,"requirepass");
9534 addReplyBulkCString(c
,server
.requirepass
);
9537 if (stringmatch(pattern
,"masterauth",0)) {
9538 addReplyBulkCString(c
,"masterauth");
9539 addReplyBulkCString(c
,server
.masterauth
);
9542 if (stringmatch(pattern
,"maxmemory",0)) {
9545 snprintf(buf
,128,"%llu\n",server
.maxmemory
);
9546 addReplyBulkCString(c
,"maxmemory");
9547 addReplyBulkCString(c
,buf
);
9551 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
9554 static void configCommand(redisClient
*c
) {
9555 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
9556 if (c
->argc
!= 4) goto badarity
;
9557 configSetCommand(c
);
9558 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
9559 if (c
->argc
!= 3) goto badarity
;
9560 configGetCommand(c
);
9561 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
9562 if (c
->argc
!= 2) goto badarity
;
9563 server
.stat_numcommands
= 0;
9564 server
.stat_numconnections
= 0;
9565 server
.stat_expiredkeys
= 0;
9566 server
.stat_starttime
= time(NULL
);
9567 addReply(c
,shared
.ok
);
9569 addReplySds(c
,sdscatprintf(sdsempty(),
9570 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
9575 addReplySds(c
,sdscatprintf(sdsempty(),
9576 "-ERR Wrong number of arguments for CONFIG %s\r\n",
9577 (char*) c
->argv
[1]->ptr
));
9580 /* =========================== Pubsub implementation ======================== */
9582 static void freePubsubPattern(void *p
) {
9583 pubsubPattern
*pat
= p
;
9585 decrRefCount(pat
->pattern
);
9589 static int listMatchPubsubPattern(void *a
, void *b
) {
9590 pubsubPattern
*pa
= a
, *pb
= b
;
9592 return (pa
->client
== pb
->client
) &&
9593 (compareStringObjects(pa
->pattern
,pb
->pattern
) == 0);
9596 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
9597 * 0 if the client was already subscribed to that channel. */
9598 static int pubsubSubscribeChannel(redisClient
*c
, robj
*channel
) {
9599 struct dictEntry
*de
;
9600 list
*clients
= NULL
;
9603 /* Add the channel to the client -> channels hash table */
9604 if (dictAdd(c
->pubsub_channels
,channel
,NULL
) == DICT_OK
) {
9606 incrRefCount(channel
);
9607 /* Add the client to the channel -> list of clients hash table */
9608 de
= dictFind(server
.pubsub_channels
,channel
);
9610 clients
= listCreate();
9611 dictAdd(server
.pubsub_channels
,channel
,clients
);
9612 incrRefCount(channel
);
9614 clients
= dictGetEntryVal(de
);
9616 listAddNodeTail(clients
,c
);
9618 /* Notify the client */
9619 addReply(c
,shared
.mbulk3
);
9620 addReply(c
,shared
.subscribebulk
);
9621 addReplyBulk(c
,channel
);
9622 addReplyLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
9626 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
9627 * 0 if the client was not subscribed to the specified channel. */
9628 static int pubsubUnsubscribeChannel(redisClient
*c
, robj
*channel
, int notify
) {
9629 struct dictEntry
*de
;
9634 /* Remove the channel from the client -> channels hash table */
9635 incrRefCount(channel
); /* channel may be just a pointer to the same object
9636 we have in the hash tables. Protect it... */
9637 if (dictDelete(c
->pubsub_channels
,channel
) == DICT_OK
) {
9639 /* Remove the client from the channel -> clients list hash table */
9640 de
= dictFind(server
.pubsub_channels
,channel
);
9642 clients
= dictGetEntryVal(de
);
9643 ln
= listSearchKey(clients
,c
);
9645 listDelNode(clients
,ln
);
9646 if (listLength(clients
) == 0) {
9647 /* Free the list and associated hash entry at all if this was
9648 * the latest client, so that it will be possible to abuse
9649 * Redis PUBSUB creating millions of channels. */
9650 dictDelete(server
.pubsub_channels
,channel
);
9653 /* Notify the client */
9655 addReply(c
,shared
.mbulk3
);
9656 addReply(c
,shared
.unsubscribebulk
);
9657 addReplyBulk(c
,channel
);
9658 addReplyLong(c
,dictSize(c
->pubsub_channels
)+
9659 listLength(c
->pubsub_patterns
));
9662 decrRefCount(channel
); /* it is finally safe to release it */
9666 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */
9667 static int pubsubSubscribePattern(redisClient
*c
, robj
*pattern
) {
9670 if (listSearchKey(c
->pubsub_patterns
,pattern
) == NULL
) {
9673 listAddNodeTail(c
->pubsub_patterns
,pattern
);
9674 incrRefCount(pattern
);
9675 pat
= zmalloc(sizeof(*pat
));
9676 pat
->pattern
= getDecodedObject(pattern
);
9678 listAddNodeTail(server
.pubsub_patterns
,pat
);
9680 /* Notify the client */
9681 addReply(c
,shared
.mbulk3
);
9682 addReply(c
,shared
.psubscribebulk
);
9683 addReplyBulk(c
,pattern
);
9684 addReplyLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
9688 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
9689 * 0 if the client was not subscribed to the specified channel. */
9690 static int pubsubUnsubscribePattern(redisClient
*c
, robj
*pattern
, int notify
) {
9695 incrRefCount(pattern
); /* Protect the object. May be the same we remove */
9696 if ((ln
= listSearchKey(c
->pubsub_patterns
,pattern
)) != NULL
) {
9698 listDelNode(c
->pubsub_patterns
,ln
);
9700 pat
.pattern
= pattern
;
9701 ln
= listSearchKey(server
.pubsub_patterns
,&pat
);
9702 listDelNode(server
.pubsub_patterns
,ln
);
9704 /* Notify the client */
9706 addReply(c
,shared
.mbulk3
);
9707 addReply(c
,shared
.punsubscribebulk
);
9708 addReplyBulk(c
,pattern
);
9709 addReplyLong(c
,dictSize(c
->pubsub_channels
)+
9710 listLength(c
->pubsub_patterns
));
9712 decrRefCount(pattern
);
9716 /* Unsubscribe from all the channels. Return the number of channels the
9717 * client was subscribed from. */
9718 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
) {
9719 dictIterator
*di
= dictGetIterator(c
->pubsub_channels
);
9723 while((de
= dictNext(di
)) != NULL
) {
9724 robj
*channel
= dictGetEntryKey(de
);
9726 count
+= pubsubUnsubscribeChannel(c
,channel
,notify
);
9728 dictReleaseIterator(di
);
9732 /* Unsubscribe from all the patterns. Return the number of patterns the
9733 * client was subscribed from. */
9734 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
) {
9739 listRewind(c
->pubsub_patterns
,&li
);
9740 while ((ln
= listNext(&li
)) != NULL
) {
9741 robj
*pattern
= ln
->value
;
9743 count
+= pubsubUnsubscribePattern(c
,pattern
,notify
);
9748 /* Publish a message */
9749 static int pubsubPublishMessage(robj
*channel
, robj
*message
) {
9751 struct dictEntry
*de
;
9755 /* Send to clients listening for that channel */
9756 de
= dictFind(server
.pubsub_channels
,channel
);
9758 list
*list
= dictGetEntryVal(de
);
9762 listRewind(list
,&li
);
9763 while ((ln
= listNext(&li
)) != NULL
) {
9764 redisClient
*c
= ln
->value
;
9766 addReply(c
,shared
.mbulk3
);
9767 addReply(c
,shared
.messagebulk
);
9768 addReplyBulk(c
,channel
);
9769 addReplyBulk(c
,message
);
9773 /* Send to clients listening to matching channels */
9774 if (listLength(server
.pubsub_patterns
)) {
9775 listRewind(server
.pubsub_patterns
,&li
);
9776 channel
= getDecodedObject(channel
);
9777 while ((ln
= listNext(&li
)) != NULL
) {
9778 pubsubPattern
*pat
= ln
->value
;
9780 if (stringmatchlen((char*)pat
->pattern
->ptr
,
9781 sdslen(pat
->pattern
->ptr
),
9782 (char*)channel
->ptr
,
9783 sdslen(channel
->ptr
),0)) {
9784 addReply(pat
->client
,shared
.mbulk3
);
9785 addReply(pat
->client
,shared
.messagebulk
);
9786 addReplyBulk(pat
->client
,channel
);
9787 addReplyBulk(pat
->client
,message
);
9791 decrRefCount(channel
);
9796 static void subscribeCommand(redisClient
*c
) {
9799 for (j
= 1; j
< c
->argc
; j
++)
9800 pubsubSubscribeChannel(c
,c
->argv
[j
]);
9803 static void unsubscribeCommand(redisClient
*c
) {
9805 pubsubUnsubscribeAllChannels(c
,1);
9810 for (j
= 1; j
< c
->argc
; j
++)
9811 pubsubUnsubscribeChannel(c
,c
->argv
[j
],1);
9815 static void psubscribeCommand(redisClient
*c
) {
9818 for (j
= 1; j
< c
->argc
; j
++)
9819 pubsubSubscribePattern(c
,c
->argv
[j
]);
9822 static void punsubscribeCommand(redisClient
*c
) {
9824 pubsubUnsubscribeAllPatterns(c
,1);
9829 for (j
= 1; j
< c
->argc
; j
++)
9830 pubsubUnsubscribePattern(c
,c
->argv
[j
],1);
9834 static void publishCommand(redisClient
*c
) {
9835 int receivers
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]);
9836 addReplyLong(c
,receivers
);
9839 /* ================================= Debugging ============================== */
9841 static void debugCommand(redisClient
*c
) {
9842 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
9844 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
9845 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
9846 addReply(c
,shared
.err
);
9850 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
9851 addReply(c
,shared
.err
);
9854 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
9855 addReply(c
,shared
.ok
);
9856 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
9858 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
9859 addReply(c
,shared
.err
);
9862 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
9863 addReply(c
,shared
.ok
);
9864 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
9865 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9869 addReply(c
,shared
.nokeyerr
);
9872 key
= dictGetEntryKey(de
);
9873 val
= dictGetEntryVal(de
);
9874 if (!server
.vm_enabled
|| (key
->storage
== REDIS_VM_MEMORY
||
9875 key
->storage
== REDIS_VM_SWAPPING
)) {
9879 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
9880 strenc
= strencoding
[val
->encoding
];
9882 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
9885 addReplySds(c
,sdscatprintf(sdsempty(),
9886 "+Key at:%p refcount:%d, value at:%p refcount:%d "
9887 "encoding:%s serializedlength:%lld\r\n",
9888 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
,
9889 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
9891 addReplySds(c
,sdscatprintf(sdsempty(),
9892 "+Key at:%p refcount:%d, value swapped at: page %llu "
9893 "using %llu pages\r\n",
9894 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
,
9895 (unsigned long long) key
->vm
.usedpages
));
9897 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapin") && c
->argc
== 3) {
9898 lookupKeyRead(c
->db
,c
->argv
[2]);
9899 addReply(c
,shared
.ok
);
9900 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
9901 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9904 if (!server
.vm_enabled
) {
9905 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
9909 addReply(c
,shared
.nokeyerr
);
9912 key
= dictGetEntryKey(de
);
9913 val
= dictGetEntryVal(de
);
9914 /* If the key is shared we want to create a copy */
9915 if (key
->refcount
> 1) {
9916 robj
*newkey
= dupStringObject(key
);
9918 key
= dictGetEntryKey(de
) = newkey
;
9921 if (key
->storage
!= REDIS_VM_MEMORY
) {
9922 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
9923 } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
9924 dictGetEntryVal(de
) = NULL
;
9925 addReply(c
,shared
.ok
);
9927 addReply(c
,shared
.err
);
9930 addReplySds(c
,sdsnew(
9931 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n"));
9935 static void _redisAssert(char *estr
, char *file
, int line
) {
9936 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
9937 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true\n",file
,line
,estr
);
9938 #ifdef HAVE_BACKTRACE
9939 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
9944 static void _redisPanic(char *msg
, char *file
, int line
) {
9945 redisLog(REDIS_WARNING
,"!!! Software Failure. Press left mouse button to continue");
9946 redisLog(REDIS_WARNING
,"Guru Meditation: %s #%s:%d",msg
,file
,line
);
9947 #ifdef HAVE_BACKTRACE
9948 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
9953 /* =================================== Main! ================================ */
9956 int linuxOvercommitMemoryValue(void) {
9957 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
9961 if (fgets(buf
,64,fp
) == NULL
) {
9970 void linuxOvercommitMemoryWarning(void) {
9971 if (linuxOvercommitMemoryValue() == 0) {
9972 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
9975 #endif /* __linux__ */
9977 static void daemonize(void) {
9981 if (fork() != 0) exit(0); /* parent exits */
9982 setsid(); /* create a new session */
9984 /* Every output goes to /dev/null. If Redis is daemonized but
9985 * the 'logfile' is set to 'stdout' in the configuration file
9986 * it will not log at all. */
9987 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
9988 dup2(fd
, STDIN_FILENO
);
9989 dup2(fd
, STDOUT_FILENO
);
9990 dup2(fd
, STDERR_FILENO
);
9991 if (fd
> STDERR_FILENO
) close(fd
);
9993 /* Try to write the pid file */
9994 fp
= fopen(server
.pidfile
,"w");
9996 fprintf(fp
,"%d\n",getpid());
10001 static void version() {
10002 printf("Redis server version %s\n", REDIS_VERSION
);
10006 static void usage() {
10007 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
10008 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
10012 int main(int argc
, char **argv
) {
10015 initServerConfig();
10017 if (strcmp(argv
[1], "-v") == 0 ||
10018 strcmp(argv
[1], "--version") == 0) version();
10019 if (strcmp(argv
[1], "--help") == 0) usage();
10020 resetServerSaveParams();
10021 loadServerConfig(argv
[1]);
10022 } else if ((argc
> 2)) {
10025 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
10027 if (server
.daemonize
) daemonize();
10029 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
10031 linuxOvercommitMemoryWarning();
10033 start
= time(NULL
);
10034 if (server
.appendonly
) {
10035 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
10036 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
10038 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
10039 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
10041 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
10042 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
10044 aeDeleteEventLoop(server
.el
);
10048 /* ============================= Backtrace support ========================= */
10050 #ifdef HAVE_BACKTRACE
10051 static char *findFuncName(void *pointer
, unsigned long *offset
);
10053 static void *getMcontextEip(ucontext_t
*uc
) {
10054 #if defined(__FreeBSD__)
10055 return (void*) uc
->uc_mcontext
.mc_eip
;
10056 #elif defined(__dietlibc__)
10057 return (void*) uc
->uc_mcontext
.eip
;
10058 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
10060 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
10062 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
10064 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
10065 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
10066 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
10068 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
10070 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
10071 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
10072 #elif defined(__ia64__) /* Linux IA64 */
10073 return (void*) uc
->uc_mcontext
.sc_ip
;
10079 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
10081 char **messages
= NULL
;
10082 int i
, trace_size
= 0;
10083 unsigned long offset
=0;
10084 ucontext_t
*uc
= (ucontext_t
*) secret
;
10086 REDIS_NOTUSED(info
);
10088 redisLog(REDIS_WARNING
,
10089 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
10090 infostring
= genRedisInfoString();
10091 redisLog(REDIS_WARNING
, "%s",infostring
);
10092 /* It's not safe to sdsfree() the returned string under memory
10093 * corruption conditions. Let it leak as we are going to abort */
10095 trace_size
= backtrace(trace
, 100);
10096 /* overwrite sigaction with caller's address */
10097 if (getMcontextEip(uc
) != NULL
) {
10098 trace
[1] = getMcontextEip(uc
);
10100 messages
= backtrace_symbols(trace
, trace_size
);
10102 for (i
=1; i
<trace_size
; ++i
) {
10103 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
10105 p
= strchr(messages
[i
],'+');
10106 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
10107 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
10109 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
10112 /* free(messages); Don't call free() with possibly corrupted memory. */
10116 static void setupSigSegvAction(void) {
10117 struct sigaction act
;
10119 sigemptyset (&act
.sa_mask
);
10120 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
10121 * is used. Otherwise, sa_handler is used */
10122 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
10123 act
.sa_sigaction
= segvHandler
;
10124 sigaction (SIGSEGV
, &act
, NULL
);
10125 sigaction (SIGBUS
, &act
, NULL
);
10126 sigaction (SIGFPE
, &act
, NULL
);
10127 sigaction (SIGILL
, &act
, NULL
);
10128 sigaction (SIGBUS
, &act
, NULL
);
10132 #include "staticsymbols.h"
10133 /* This function try to convert a pointer into a function name. It's used in
10134 * oreder to provide a backtrace under segmentation fault that's able to
10135 * display functions declared as static (otherwise the backtrace is useless). */
10136 static char *findFuncName(void *pointer
, unsigned long *offset
){
10138 unsigned long off
, minoff
= 0;
10140 /* Try to match against the Symbol with the smallest offset */
10141 for (i
=0; symsTable
[i
].pointer
; i
++) {
10142 unsigned long lp
= (unsigned long) pointer
;
10144 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
10145 off
=lp
-symsTable
[i
].pointer
;
10146 if (ret
< 0 || off
< minoff
) {
10152 if (ret
== -1) return NULL
;
10154 return symsTable
[ret
].name
;
10156 #else /* HAVE_BACKTRACE */
10157 static void setupSigSegvAction(void) {
10159 #endif /* HAVE_BACKTRACE */