2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "1.3.4"
40 #define __USE_POSIX199309
47 #endif /* HAVE_BACKTRACE */
55 #include <arpa/inet.h>
59 #include <sys/resource.h>
66 #include "solarisfixes.h"
70 #include "ae.h" /* Event driven programming library */
71 #include "sds.h" /* Dynamic safe strings */
72 #include "anet.h" /* Networking the easy way */
73 #include "dict.h" /* Hash tables */
74 #include "adlist.h" /* Linked lists */
75 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
76 #include "lzf.h" /* LZF compression library */
77 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
84 /* Static server configuration */
85 #define REDIS_SERVERPORT 6379 /* TCP port */
86 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
87 #define REDIS_IOBUF_LEN 1024
88 #define REDIS_LOADBUF_LEN 1024
89 #define REDIS_STATIC_ARGS 4
90 #define REDIS_DEFAULT_DBNUM 16
91 #define REDIS_CONFIGLINE_MAX 1024
92 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
93 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
94 #define REDIS_EXPIRELOOKUPS_PER_CRON 100 /* try to expire 100 keys/second */
95 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
96 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
98 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
99 #define REDIS_WRITEV_THRESHOLD 3
100 /* Max number of iovecs used for each writev call */
101 #define REDIS_WRITEV_IOVEC_COUNT 256
103 /* Hash table parameters */
104 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
107 #define REDIS_CMD_BULK 1 /* Bulk write command */
108 #define REDIS_CMD_INLINE 2 /* Inline command */
109 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
110 this flags will return an error when the 'maxmemory' option is set in the
111 config file and the server is using more than maxmemory bytes of memory.
112 In short this commands are denied on low memory conditions. */
113 #define REDIS_CMD_DENYOOM 4
116 #define REDIS_STRING 0
122 /* Objects encoding. Some kind of objects like Strings and Hashes can be
123 * internally represented in multiple ways. The 'encoding' field of the object
124 * is set to one of this fields for this object. */
125 #define REDIS_ENCODING_RAW 0 /* Raw representation */
126 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
127 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
128 #define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
130 /* Object types only used for dumping to disk */
131 #define REDIS_EXPIRETIME 253
132 #define REDIS_SELECTDB 254
133 #define REDIS_EOF 255
135 /* Defines related to the dump file format. To store 32 bits lengths for short
136 * keys requires a lot of space, so we check the most significant 2 bits of
137 * the first byte to interpreter the length:
139 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
140 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
141 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
142 * 11|000000 this means: specially encoded object will follow. The six bits
143 * number specify the kind of object that follows.
144 * See the REDIS_RDB_ENC_* defines.
146 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
147 * values, will fit inside. */
148 #define REDIS_RDB_6BITLEN 0
149 #define REDIS_RDB_14BITLEN 1
150 #define REDIS_RDB_32BITLEN 2
151 #define REDIS_RDB_ENCVAL 3
152 #define REDIS_RDB_LENERR UINT_MAX
154 /* When a length of a string object stored on disk has the first two bits
155 * set, the remaining two bits specify a special encoding for the object
156 * accordingly to the following defines: */
157 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
158 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
159 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
160 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
162 /* Virtual memory object->where field. */
163 #define REDIS_VM_MEMORY 0 /* The object is on memory */
164 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
165 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
166 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
168 /* Virtual memory static configuration stuff.
169 * Check vmFindContiguousPages() to know more about this magic numbers. */
170 #define REDIS_VM_MAX_NEAR_PAGES 65536
171 #define REDIS_VM_MAX_RANDOM_JUMP 4096
172 #define REDIS_VM_MAX_THREADS 32
173 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
174 /* The following is the *percentage* of completed I/O jobs to process when the
175 * handelr is called. While Virtual Memory I/O operations are performed by
176 * threads, this operations must be processed by the main thread when completed
177 * in order to take effect. */
178 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
181 #define REDIS_SLAVE 1 /* This client is a slave server */
182 #define REDIS_MASTER 2 /* This client is a master server */
183 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
184 #define REDIS_MULTI 8 /* This client is in a MULTI context */
185 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
186 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
188 /* Slave replication state - slave side */
189 #define REDIS_REPL_NONE 0 /* No active replication */
190 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
191 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
193 /* Slave replication state - from the point of view of master
194 * Note that in SEND_BULK and ONLINE state the slave receives new updates
195 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
196 * to start the next background saving in order to send updates to it. */
197 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
198 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
199 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
200 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
202 /* List related stuff */
206 /* Sort operations */
207 #define REDIS_SORT_GET 0
208 #define REDIS_SORT_ASC 1
209 #define REDIS_SORT_DESC 2
210 #define REDIS_SORTKEY_MAX 1024
213 #define REDIS_DEBUG 0
214 #define REDIS_VERBOSE 1
215 #define REDIS_NOTICE 2
216 #define REDIS_WARNING 3
218 /* Anti-warning macro... */
219 #define REDIS_NOTUSED(V) ((void) V)
221 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
222 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
224 /* Append only defines */
225 #define APPENDFSYNC_NO 0
226 #define APPENDFSYNC_ALWAYS 1
227 #define APPENDFSYNC_EVERYSEC 2
229 /* Hashes related defaults */
230 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
231 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
233 /* We can print the stacktrace, so our assert is defined this way: */
234 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
235 static void _redisAssert(char *estr
, char *file
, int line
);
237 /*================================= Data types ============================== */
239 /* A redis object, that is a type able to hold a string / list / set */
241 /* The VM object structure */
242 struct redisObjectVM
{
243 off_t page
; /* the page at witch the object is stored on disk */
244 off_t usedpages
; /* number of pages used on disk */
245 time_t atime
; /* Last access time */
248 /* The actual Redis Object */
249 typedef struct redisObject
{
252 unsigned char encoding
;
253 unsigned char storage
; /* If this object is a key, where is the value?
254 * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
255 unsigned char vtype
; /* If this object is a key, and value is swapped out,
256 * this is the type of the swapped out object. */
258 /* VM fields, this are only allocated if VM is active, otherwise the
259 * object allocation function will just allocate
260 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
261 * Redis without VM active will not have any overhead. */
262 struct redisObjectVM vm
;
265 /* Macro used to initalize a Redis object allocated on the stack.
266 * Note that this macro is taken near the structure definition to make sure
267 * we'll update it when the structure is changed, to avoid bugs like
268 * bug #85 introduced exactly in this way. */
269 #define initStaticStringObject(_var,_ptr) do { \
271 _var.type = REDIS_STRING; \
272 _var.encoding = REDIS_ENCODING_RAW; \
274 if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \
277 typedef struct redisDb
{
278 dict
*dict
; /* The keyspace for this DB */
279 dict
*expires
; /* Timeout of keys with a timeout set */
280 dict
*blockingkeys
; /* Keys with clients waiting for data (BLPOP) */
281 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
285 /* Client MULTI/EXEC state */
286 typedef struct multiCmd
{
289 struct redisCommand
*cmd
;
292 typedef struct multiState
{
293 multiCmd
*commands
; /* Array of MULTI commands */
294 int count
; /* Total number of MULTI commands */
297 /* With multiplexing we need to take per-clinet state.
298 * Clients are taken in a liked list. */
299 typedef struct redisClient
{
304 robj
**argv
, **mbargv
;
306 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
307 int multibulk
; /* multi bulk command format active */
310 time_t lastinteraction
; /* time of the last interaction, used for timeout */
311 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
312 int slaveseldb
; /* slave selected db, if this client is a slave */
313 int authenticated
; /* when requirepass is non-NULL */
314 int replstate
; /* replication state if this is a slave */
315 int repldbfd
; /* replication DB file descriptor */
316 long repldboff
; /* replication DB file offset */
317 off_t repldbsize
; /* replication DB file size */
318 multiState mstate
; /* MULTI/EXEC state */
319 robj
**blockingkeys
; /* The key we are waiting to terminate a blocking
320 * operation such as BLPOP. Otherwise NULL. */
321 int blockingkeysnum
; /* Number of blocking keys */
322 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
323 * is >= blockingto then the operation timed out. */
324 list
*io_keys
; /* Keys this client is waiting to be loaded from the
325 * swap file in order to continue. */
333 /* Global server state structure */
338 dict
*sharingpool
; /* Poll used for object sharing */
339 unsigned int sharingpoolsize
;
340 long long dirty
; /* changes to DB from the last save */
342 list
*slaves
, *monitors
;
343 char neterr
[ANET_ERR_LEN
];
345 int cronloops
; /* number of times the cron function run */
346 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
347 time_t lastsave
; /* Unix time of last save succeeede */
348 /* Fields used only for stats */
349 time_t stat_starttime
; /* server start time */
350 long long stat_numcommands
; /* number of processed commands */
351 long long stat_numconnections
; /* number of connections received */
364 pid_t bgsavechildpid
;
365 pid_t bgrewritechildpid
;
366 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
367 struct saveparam
*saveparams
;
372 char *appendfilename
;
376 /* Replication related */
381 redisClient
*master
; /* client that is master for this slave */
383 unsigned int maxclients
;
384 unsigned long long maxmemory
;
385 unsigned int blpop_blocked_clients
;
386 unsigned int vm_blocked_clients
;
387 /* Sort parameters - qsort_r() is only available under BSD so we
388 * have to take this state global, in order to pass it to sortCompare() */
392 /* Virtual memory configuration */
397 unsigned long long vm_max_memory
;
399 size_t hash_max_zipmap_entries
;
400 size_t hash_max_zipmap_value
;
401 /* Virtual memory state */
404 off_t vm_next_page
; /* Next probably empty page */
405 off_t vm_near_pages
; /* Number of pages allocated sequentially */
406 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
407 time_t unixtime
; /* Unix time sampled every second. */
408 /* Virtual memory I/O threads stuff */
409 /* An I/O thread process an element taken from the io_jobs queue and
410 * put the result of the operation in the io_done list. While the
411 * job is being processed, it's put on io_processing queue. */
412 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
413 list
*io_processing
; /* List of VM I/O jobs being processed */
414 list
*io_processed
; /* List of VM I/O jobs already processed */
415 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
416 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
417 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
418 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
419 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
420 int io_active_threads
; /* Number of running I/O threads */
421 int vm_max_threads
; /* Max number of I/O threads running at the same time */
422 /* Our main thread is blocked on the event loop, locking for sockets ready
423 * to be read or written, so when a threaded I/O operation is ready to be
424 * processed by the main thread, the I/O thread will use a unix pipe to
425 * awake the main thread. The followings are the two pipe FDs. */
426 int io_ready_pipe_read
;
427 int io_ready_pipe_write
;
428 /* Virtual memory stats */
429 unsigned long long vm_stats_used_pages
;
430 unsigned long long vm_stats_swapped_objects
;
431 unsigned long long vm_stats_swapouts
;
432 unsigned long long vm_stats_swapins
;
436 typedef void redisCommandProc(redisClient
*c
);
437 struct redisCommand
{
439 redisCommandProc
*proc
;
442 /* What keys should be loaded in background when calling this command? */
443 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
444 int vm_lastkey
; /* THe last argument that's a key */
445 int vm_keystep
; /* The step between first and last key */
448 struct redisFunctionSym
{
450 unsigned long pointer
;
453 typedef struct _redisSortObject
{
461 typedef struct _redisSortOperation
{
464 } redisSortOperation
;
466 /* ZSETs use a specialized version of Skiplists */
468 typedef struct zskiplistNode
{
469 struct zskiplistNode
**forward
;
470 struct zskiplistNode
*backward
;
476 typedef struct zskiplist
{
477 struct zskiplistNode
*header
, *tail
;
478 unsigned long length
;
482 typedef struct zset
{
487 /* Our shared "common" objects */
489 struct sharedObjectsStruct
{
490 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
491 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
492 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
493 *outofrangeerr
, *plus
,
494 *select0
, *select1
, *select2
, *select3
, *select4
,
495 *select5
, *select6
, *select7
, *select8
, *select9
;
498 /* Global vars that are actally used as constants. The following double
499 * values are used for double on-disk serialization, and are initialized
500 * at runtime to avoid strange compiler optimizations. */
502 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
504 /* VM threaded I/O request message */
505 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
506 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
507 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
508 typedef struct iojob
{
509 int type
; /* Request type, REDIS_IOJOB_* */
510 redisDb
*db
;/* Redis database */
511 robj
*key
; /* This I/O request is about swapping this key */
512 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
513 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
514 off_t page
; /* Swap page where to read/write the object */
515 off_t pages
; /* Swap pages needed to safe object. PREPARE_SWAP return val */
516 int canceled
; /* True if this command was canceled by blocking side of VM */
517 pthread_t thread
; /* ID of the thread processing this entry */
520 /*================================ Prototypes =============================== */
522 static void freeStringObject(robj
*o
);
523 static void freeListObject(robj
*o
);
524 static void freeSetObject(robj
*o
);
525 static void decrRefCount(void *o
);
526 static robj
*createObject(int type
, void *ptr
);
527 static void freeClient(redisClient
*c
);
528 static int rdbLoad(char *filename
);
529 static void addReply(redisClient
*c
, robj
*obj
);
530 static void addReplySds(redisClient
*c
, sds s
);
531 static void incrRefCount(robj
*o
);
532 static int rdbSaveBackground(char *filename
);
533 static robj
*createStringObject(char *ptr
, size_t len
);
534 static robj
*dupStringObject(robj
*o
);
535 static void replicationFeedSlaves(list
*slaves
, struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
536 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
537 static int syncWithMaster(void);
538 static robj
*tryObjectSharing(robj
*o
);
539 static int tryObjectEncoding(robj
*o
);
540 static robj
*getDecodedObject(robj
*o
);
541 static int removeExpire(redisDb
*db
, robj
*key
);
542 static int expireIfNeeded(redisDb
*db
, robj
*key
);
543 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
544 static int deleteIfSwapped(redisDb
*db
, robj
*key
);
545 static int deleteKey(redisDb
*db
, robj
*key
);
546 static time_t getExpire(redisDb
*db
, robj
*key
);
547 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
548 static void updateSlavesWaitingBgsave(int bgsaveerr
);
549 static void freeMemoryIfNeeded(void);
550 static int processCommand(redisClient
*c
);
551 static void setupSigSegvAction(void);
552 static void rdbRemoveTempFile(pid_t childpid
);
553 static void aofRemoveTempFile(pid_t childpid
);
554 static size_t stringObjectLen(robj
*o
);
555 static void processInputBuffer(redisClient
*c
);
556 static zskiplist
*zslCreate(void);
557 static void zslFree(zskiplist
*zsl
);
558 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
559 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
560 static void initClientMultiState(redisClient
*c
);
561 static void freeClientMultiState(redisClient
*c
);
562 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
563 static void unblockClientWaitingData(redisClient
*c
);
564 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
565 static void vmInit(void);
566 static void vmMarkPagesFree(off_t page
, off_t count
);
567 static robj
*vmLoadObject(robj
*key
);
568 static robj
*vmPreviewObject(robj
*key
);
569 static int vmSwapOneObjectBlocking(void);
570 static int vmSwapOneObjectThreaded(void);
571 static int vmCanSwapOut(void);
572 static int tryFreeOneObjectFromFreelist(void);
573 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
574 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
575 static void vmCancelThreadedIOJob(robj
*o
);
576 static void lockThreadedIO(void);
577 static void unlockThreadedIO(void);
578 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
579 static void freeIOJob(iojob
*j
);
580 static void queueIOJob(iojob
*j
);
581 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
582 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
583 static void waitEmptyIOJobsQueue(void);
584 static void vmReopenSwapFile(void);
585 static int vmFreePage(off_t page
);
586 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
);
587 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
588 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
589 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
590 static struct redisCommand
*lookupCommand(char *name
);
591 static void call(redisClient
*c
, struct redisCommand
*cmd
);
592 static void resetClient(redisClient
*c
);
594 static void authCommand(redisClient
*c
);
595 static void pingCommand(redisClient
*c
);
596 static void echoCommand(redisClient
*c
);
597 static void setCommand(redisClient
*c
);
598 static void setnxCommand(redisClient
*c
);
599 static void getCommand(redisClient
*c
);
600 static void delCommand(redisClient
*c
);
601 static void existsCommand(redisClient
*c
);
602 static void incrCommand(redisClient
*c
);
603 static void decrCommand(redisClient
*c
);
604 static void incrbyCommand(redisClient
*c
);
605 static void decrbyCommand(redisClient
*c
);
606 static void selectCommand(redisClient
*c
);
607 static void randomkeyCommand(redisClient
*c
);
608 static void keysCommand(redisClient
*c
);
609 static void dbsizeCommand(redisClient
*c
);
610 static void lastsaveCommand(redisClient
*c
);
611 static void saveCommand(redisClient
*c
);
612 static void bgsaveCommand(redisClient
*c
);
613 static void bgrewriteaofCommand(redisClient
*c
);
614 static void shutdownCommand(redisClient
*c
);
615 static void moveCommand(redisClient
*c
);
616 static void renameCommand(redisClient
*c
);
617 static void renamenxCommand(redisClient
*c
);
618 static void lpushCommand(redisClient
*c
);
619 static void rpushCommand(redisClient
*c
);
620 static void lpopCommand(redisClient
*c
);
621 static void rpopCommand(redisClient
*c
);
622 static void llenCommand(redisClient
*c
);
623 static void lindexCommand(redisClient
*c
);
624 static void lrangeCommand(redisClient
*c
);
625 static void ltrimCommand(redisClient
*c
);
626 static void typeCommand(redisClient
*c
);
627 static void lsetCommand(redisClient
*c
);
628 static void saddCommand(redisClient
*c
);
629 static void sremCommand(redisClient
*c
);
630 static void smoveCommand(redisClient
*c
);
631 static void sismemberCommand(redisClient
*c
);
632 static void scardCommand(redisClient
*c
);
633 static void spopCommand(redisClient
*c
);
634 static void srandmemberCommand(redisClient
*c
);
635 static void sinterCommand(redisClient
*c
);
636 static void sinterstoreCommand(redisClient
*c
);
637 static void sunionCommand(redisClient
*c
);
638 static void sunionstoreCommand(redisClient
*c
);
639 static void sdiffCommand(redisClient
*c
);
640 static void sdiffstoreCommand(redisClient
*c
);
641 static void syncCommand(redisClient
*c
);
642 static void flushdbCommand(redisClient
*c
);
643 static void flushallCommand(redisClient
*c
);
644 static void sortCommand(redisClient
*c
);
645 static void lremCommand(redisClient
*c
);
646 static void rpoplpushcommand(redisClient
*c
);
647 static void infoCommand(redisClient
*c
);
648 static void mgetCommand(redisClient
*c
);
649 static void monitorCommand(redisClient
*c
);
650 static void expireCommand(redisClient
*c
);
651 static void expireatCommand(redisClient
*c
);
652 static void getsetCommand(redisClient
*c
);
653 static void ttlCommand(redisClient
*c
);
654 static void slaveofCommand(redisClient
*c
);
655 static void debugCommand(redisClient
*c
);
656 static void msetCommand(redisClient
*c
);
657 static void msetnxCommand(redisClient
*c
);
658 static void zaddCommand(redisClient
*c
);
659 static void zincrbyCommand(redisClient
*c
);
660 static void zrangeCommand(redisClient
*c
);
661 static void zrangebyscoreCommand(redisClient
*c
);
662 static void zcountCommand(redisClient
*c
);
663 static void zrevrangeCommand(redisClient
*c
);
664 static void zcardCommand(redisClient
*c
);
665 static void zremCommand(redisClient
*c
);
666 static void zscoreCommand(redisClient
*c
);
667 static void zremrangebyscoreCommand(redisClient
*c
);
668 static void multiCommand(redisClient
*c
);
669 static void execCommand(redisClient
*c
);
670 static void discardCommand(redisClient
*c
);
671 static void blpopCommand(redisClient
*c
);
672 static void brpopCommand(redisClient
*c
);
673 static void appendCommand(redisClient
*c
);
674 static void substrCommand(redisClient
*c
);
675 static void zrankCommand(redisClient
*c
);
676 static void hsetCommand(redisClient
*c
);
677 static void hgetCommand(redisClient
*c
);
678 static void zmergeCommand(redisClient
*c
);
679 static void zmergeweighedCommand(redisClient
*c
);
681 /*================================= Globals ================================= */
684 static struct redisServer server
; /* server global state */
685 static struct redisCommand cmdTable
[] = {
686 {"get",getCommand
,2,REDIS_CMD_INLINE
,1,1,1},
687 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,0,0,0},
688 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,0,0,0},
689 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1},
690 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,1,1,1},
691 {"del",delCommand
,-2,REDIS_CMD_INLINE
,0,0,0},
692 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,1,1,1},
693 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,1,1},
694 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,1,1},
695 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,1,-1,1},
696 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1},
697 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1},
698 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,1,1,1},
699 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,1,1,1},
700 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,1,1,1},
701 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,1,1,1},
702 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,1,1,1},
703 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,1,1,1},
704 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1},
705 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,1,1,1},
706 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,1,1,1},
707 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,1,1,1},
708 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,2,1},
709 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1},
710 {"srem",sremCommand
,3,REDIS_CMD_BULK
,1,1,1},
711 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,1,2,1},
712 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,1,1,1},
713 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,1,1,1},
714 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,1,1,1},
715 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,1,1,1},
716 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,-1,1},
717 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,2,-1,1},
718 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,-1,1},
719 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,2,-1,1},
720 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,-1,1},
721 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,2,-1,1},
722 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,1,1,1},
723 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1},
724 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1},
725 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,1,1,1},
726 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,1,1,1},
727 {"zmerge",zmergeCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,2,-1,1},
728 {"zmergeweighed",zmergeweighedCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,2,-2,2},
729 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,1,1,1},
730 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,1,1,1},
731 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,1,1,1},
732 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,1,1,1},
733 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,1,1,1},
734 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1},
735 {"zrank",zrankCommand
,3,REDIS_CMD_INLINE
,1,1,1},
736 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1},
737 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,1,1,1},
738 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,1,1},
739 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,1,1},
740 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1},
741 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,-1,2},
742 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,-1,2},
743 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,0,0,0},
744 {"select",selectCommand
,2,REDIS_CMD_INLINE
,0,0,0},
745 {"move",moveCommand
,3,REDIS_CMD_INLINE
,1,1,1},
746 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,1,1,1},
747 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,1,1,1},
748 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,0,0,0},
749 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,0,0,0},
750 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,0,0,0},
751 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,0,0,0},
752 {"auth",authCommand
,2,REDIS_CMD_INLINE
,0,0,0},
753 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,0,0,0},
754 {"echo",echoCommand
,2,REDIS_CMD_BULK
,0,0,0},
755 {"save",saveCommand
,1,REDIS_CMD_INLINE
,0,0,0},
756 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,0,0,0},
757 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,0,0,0},
758 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,0,0,0},
759 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,0,0,0},
760 {"type",typeCommand
,2,REDIS_CMD_INLINE
,1,1,1},
761 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,0,0,0},
762 {"exec",execCommand
,1,REDIS_CMD_INLINE
,0,0,0},
763 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,0,0,0},
764 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,0,0,0},
765 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,0,0,0},
766 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,0,0,0},
767 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,1,1},
768 {"info",infoCommand
,1,REDIS_CMD_INLINE
,0,0,0},
769 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,0,0,0},
770 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,1,1,1},
771 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,0,0,0},
772 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,0,0,0},
773 {NULL
,NULL
,0,0,0,0,0}
776 /*============================ Utility functions ============================ */
778 /* Glob-style pattern matching. */
779 int stringmatchlen(const char *pattern
, int patternLen
,
780 const char *string
, int stringLen
, int nocase
)
785 while (pattern
[1] == '*') {
790 return 1; /* match */
792 if (stringmatchlen(pattern
+1, patternLen
-1,
793 string
, stringLen
, nocase
))
794 return 1; /* match */
798 return 0; /* no match */
802 return 0; /* no match */
812 not = pattern
[0] == '^';
819 if (pattern
[0] == '\\') {
822 if (pattern
[0] == string
[0])
824 } else if (pattern
[0] == ']') {
826 } else if (patternLen
== 0) {
830 } else if (pattern
[1] == '-' && patternLen
>= 3) {
831 int start
= pattern
[0];
832 int end
= pattern
[2];
840 start
= tolower(start
);
846 if (c
>= start
&& c
<= end
)
850 if (pattern
[0] == string
[0])
853 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
863 return 0; /* no match */
869 if (patternLen
>= 2) {
876 if (pattern
[0] != string
[0])
877 return 0; /* no match */
879 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
880 return 0; /* no match */
888 if (stringLen
== 0) {
889 while(*pattern
== '*') {
896 if (patternLen
== 0 && stringLen
== 0)
901 static void redisLog(int level
, const char *fmt
, ...) {
905 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
909 if (level
>= server
.verbosity
) {
915 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
916 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
917 vfprintf(fp
, fmt
, ap
);
923 if (server
.logfile
) fclose(fp
);
926 /*====================== Hash table type implementation ==================== */
928 /* This is an hash table type that uses the SDS dynamic strings libary as
929 * keys and radis objects as values (objects can hold SDS strings,
932 static void dictVanillaFree(void *privdata
, void *val
)
934 DICT_NOTUSED(privdata
);
938 static void dictListDestructor(void *privdata
, void *val
)
940 DICT_NOTUSED(privdata
);
941 listRelease((list
*)val
);
944 static int sdsDictKeyCompare(void *privdata
, const void *key1
,
948 DICT_NOTUSED(privdata
);
950 l1
= sdslen((sds
)key1
);
951 l2
= sdslen((sds
)key2
);
952 if (l1
!= l2
) return 0;
953 return memcmp(key1
, key2
, l1
) == 0;
956 static void dictRedisObjectDestructor(void *privdata
, void *val
)
958 DICT_NOTUSED(privdata
);
960 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
964 static int dictObjKeyCompare(void *privdata
, const void *key1
,
967 const robj
*o1
= key1
, *o2
= key2
;
968 return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
971 static unsigned int dictObjHash(const void *key
) {
973 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
976 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
979 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
982 o1
= getDecodedObject(o1
);
983 o2
= getDecodedObject(o2
);
984 cmp
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
990 static unsigned int dictEncObjHash(const void *key
) {
991 robj
*o
= (robj
*) key
;
993 if (o
->encoding
== REDIS_ENCODING_RAW
) {
994 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
996 if (o
->encoding
== REDIS_ENCODING_INT
) {
1000 len
= snprintf(buf
,32,"%ld",(long)o
->ptr
);
1001 return dictGenHashFunction((unsigned char*)buf
, len
);
1005 o
= getDecodedObject(o
);
1006 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1013 /* Sets type and expires */
1014 static dictType setDictType
= {
1015 dictEncObjHash
, /* hash function */
1018 dictEncObjKeyCompare
, /* key compare */
1019 dictRedisObjectDestructor
, /* key destructor */
1020 NULL
/* val destructor */
1023 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1024 static dictType zsetDictType
= {
1025 dictEncObjHash
, /* hash function */
1028 dictEncObjKeyCompare
, /* key compare */
1029 dictRedisObjectDestructor
, /* key destructor */
1030 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1034 static dictType dbDictType
= {
1035 dictObjHash
, /* hash function */
1038 dictObjKeyCompare
, /* key compare */
1039 dictRedisObjectDestructor
, /* key destructor */
1040 dictRedisObjectDestructor
/* val destructor */
1044 static dictType keyptrDictType
= {
1045 dictObjHash
, /* hash function */
1048 dictObjKeyCompare
, /* key compare */
1049 dictRedisObjectDestructor
, /* key destructor */
1050 NULL
/* val destructor */
1053 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1054 static dictType hashDictType
= {
1055 dictEncObjHash
, /* hash function */
1058 dictEncObjKeyCompare
, /* key compare */
1059 dictRedisObjectDestructor
, /* key destructor */
1060 dictRedisObjectDestructor
/* val destructor */
1063 /* Keylist hash table type has unencoded redis objects as keys and
1064 * lists as values. It's used for blocking operations (BLPOP) and to
1065 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1066 static dictType keylistDictType
= {
1067 dictObjHash
, /* hash function */
1070 dictObjKeyCompare
, /* key compare */
1071 dictRedisObjectDestructor
, /* key destructor */
1072 dictListDestructor
/* val destructor */
1075 /* ========================= Random utility functions ======================= */
1077 /* Redis generally does not try to recover from out of memory conditions
1078 * when allocating objects or strings, it is not clear if it will be possible
1079 * to report this condition to the client since the networking layer itself
1080 * is based on heap allocation for send buffers, so we simply abort.
1081 * At least the code will be simpler to read... */
1082 static void oom(const char *msg
) {
1083 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1088 /* ====================== Redis server networking stuff ===================== */
1089 static void closeTimedoutClients(void) {
1092 time_t now
= time(NULL
);
1095 listRewind(server
.clients
,&li
);
1096 while ((ln
= listNext(&li
)) != NULL
) {
1097 c
= listNodeValue(ln
);
1098 if (server
.maxidletime
&&
1099 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1100 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1101 (now
- c
->lastinteraction
> server
.maxidletime
))
1103 redisLog(REDIS_VERBOSE
,"Closing idle client");
1105 } else if (c
->flags
& REDIS_BLOCKED
) {
1106 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1107 addReply(c
,shared
.nullmultibulk
);
1108 unblockClientWaitingData(c
);
1114 static int htNeedsResize(dict
*dict
) {
1115 long long size
, used
;
1117 size
= dictSlots(dict
);
1118 used
= dictSize(dict
);
1119 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1120 (used
*100/size
< REDIS_HT_MINFILL
));
1123 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1124 * we resize the hash table to save memory */
1125 static void tryResizeHashTables(void) {
1128 for (j
= 0; j
< server
.dbnum
; j
++) {
1129 if (htNeedsResize(server
.db
[j
].dict
)) {
1130 redisLog(REDIS_VERBOSE
,"The hash table %d is too sparse, resize it...",j
);
1131 dictResize(server
.db
[j
].dict
);
1132 redisLog(REDIS_VERBOSE
,"Hash table %d resized.",j
);
1134 if (htNeedsResize(server
.db
[j
].expires
))
1135 dictResize(server
.db
[j
].expires
);
1139 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1140 void backgroundSaveDoneHandler(int statloc
) {
1141 int exitcode
= WEXITSTATUS(statloc
);
1142 int bysignal
= WIFSIGNALED(statloc
);
1144 if (!bysignal
&& exitcode
== 0) {
1145 redisLog(REDIS_NOTICE
,
1146 "Background saving terminated with success");
1148 server
.lastsave
= time(NULL
);
1149 } else if (!bysignal
&& exitcode
!= 0) {
1150 redisLog(REDIS_WARNING
, "Background saving error");
1152 redisLog(REDIS_WARNING
,
1153 "Background saving terminated by signal");
1154 rdbRemoveTempFile(server
.bgsavechildpid
);
1156 server
.bgsavechildpid
= -1;
1157 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1158 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1159 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1162 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1164 void backgroundRewriteDoneHandler(int statloc
) {
1165 int exitcode
= WEXITSTATUS(statloc
);
1166 int bysignal
= WIFSIGNALED(statloc
);
1168 if (!bysignal
&& exitcode
== 0) {
1172 redisLog(REDIS_NOTICE
,
1173 "Background append only file rewriting terminated with success");
1174 /* Now it's time to flush the differences accumulated by the parent */
1175 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1176 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1178 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1181 /* Flush our data... */
1182 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1183 (signed) sdslen(server
.bgrewritebuf
)) {
1184 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1188 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1189 /* Now our work is to rename the temp file into the stable file. And
1190 * switch the file descriptor used by the server for append only. */
1191 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1192 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1196 /* Mission completed... almost */
1197 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1198 if (server
.appendfd
!= -1) {
1199 /* If append only is actually enabled... */
1200 close(server
.appendfd
);
1201 server
.appendfd
= fd
;
1203 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1204 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1206 /* If append only is disabled we just generate a dump in this
1207 * format. Why not? */
1210 } else if (!bysignal
&& exitcode
!= 0) {
1211 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1213 redisLog(REDIS_WARNING
,
1214 "Background append only file rewriting terminated by signal");
1217 sdsfree(server
.bgrewritebuf
);
1218 server
.bgrewritebuf
= sdsempty();
1219 aofRemoveTempFile(server
.bgrewritechildpid
);
1220 server
.bgrewritechildpid
= -1;
1223 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1224 int j
, loops
= server
.cronloops
++;
1225 REDIS_NOTUSED(eventLoop
);
1227 REDIS_NOTUSED(clientData
);
1229 /* We take a cached value of the unix time in the global state because
1230 * with virtual memory and aging there is to store the current time
1231 * in objects at every object access, and accuracy is not needed.
1232 * To access a global var is faster than calling time(NULL) */
1233 server
.unixtime
= time(NULL
);
1235 /* Show some info about non-empty databases */
1236 for (j
= 0; j
< server
.dbnum
; j
++) {
1237 long long size
, used
, vkeys
;
1239 size
= dictSlots(server
.db
[j
].dict
);
1240 used
= dictSize(server
.db
[j
].dict
);
1241 vkeys
= dictSize(server
.db
[j
].expires
);
1242 if (!(loops
% 5) && (used
|| vkeys
)) {
1243 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1244 /* dictPrintStats(server.dict); */
1248 /* We don't want to resize the hash tables while a bacground saving
1249 * is in progress: the saving child is created using fork() that is
1250 * implemented with a copy-on-write semantic in most modern systems, so
1251 * if we resize the HT while there is the saving child at work actually
1252 * a lot of memory movements in the parent will cause a lot of pages
1254 if (server
.bgsavechildpid
== -1) tryResizeHashTables();
1256 /* Show information about connected clients */
1258 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use, %d shared objects",
1259 listLength(server
.clients
)-listLength(server
.slaves
),
1260 listLength(server
.slaves
),
1261 zmalloc_used_memory(),
1262 dictSize(server
.sharingpool
));
1265 /* Close connections of timedout clients */
1266 if ((server
.maxidletime
&& !(loops
% 10)) || server
.blpop_blocked_clients
)
1267 closeTimedoutClients();
1269 /* Check if a background saving or AOF rewrite in progress terminated */
1270 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1274 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1275 if (pid
== server
.bgsavechildpid
) {
1276 backgroundSaveDoneHandler(statloc
);
1278 backgroundRewriteDoneHandler(statloc
);
1282 /* If there is not a background saving in progress check if
1283 * we have to save now */
1284 time_t now
= time(NULL
);
1285 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1286 struct saveparam
*sp
= server
.saveparams
+j
;
1288 if (server
.dirty
>= sp
->changes
&&
1289 now
-server
.lastsave
> sp
->seconds
) {
1290 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1291 sp
->changes
, sp
->seconds
);
1292 rdbSaveBackground(server
.dbfilename
);
1298 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1299 * will use few CPU cycles if there are few expiring keys, otherwise
1300 * it will get more aggressive to avoid that too much memory is used by
1301 * keys that can be removed from the keyspace. */
1302 for (j
= 0; j
< server
.dbnum
; j
++) {
1304 redisDb
*db
= server
.db
+j
;
1306 /* Continue to expire if at the end of the cycle more than 25%
1307 * of the keys were expired. */
1309 long num
= dictSize(db
->expires
);
1310 time_t now
= time(NULL
);
1313 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1314 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1319 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1320 t
= (time_t) dictGetEntryVal(de
);
1322 deleteKey(db
,dictGetEntryKey(de
));
1326 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1329 /* Swap a few keys on disk if we are over the memory limit and VM
1330 * is enbled. Try to free objects from the free list first. */
1331 if (vmCanSwapOut()) {
1332 while (server
.vm_enabled
&& zmalloc_used_memory() >
1333 server
.vm_max_memory
)
1337 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1338 retval
= (server
.vm_max_threads
== 0) ?
1339 vmSwapOneObjectBlocking() :
1340 vmSwapOneObjectThreaded();
1341 if (retval
== REDIS_ERR
&& (loops
% 30) == 0 &&
1342 zmalloc_used_memory() >
1343 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1345 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1347 /* Note that when using threade I/O we free just one object,
1348 * because anyway when the I/O thread in charge to swap this
1349 * object out will finish, the handler of completed jobs
1350 * will try to swap more objects if we are still out of memory. */
1351 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1355 /* Check if we should connect to a MASTER */
1356 if (server
.replstate
== REDIS_REPL_CONNECT
) {
1357 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1358 if (syncWithMaster() == REDIS_OK
) {
1359 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1365 /* This function gets called every time Redis is entering the
1366 * main loop of the event driven library, that is, before to sleep
1367 * for ready file descriptors. */
1368 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1369 REDIS_NOTUSED(eventLoop
);
1371 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1375 listRewind(server
.io_ready_clients
,&li
);
1376 while((ln
= listNext(&li
))) {
1377 redisClient
*c
= ln
->value
;
1378 struct redisCommand
*cmd
;
1380 /* Resume the client. */
1381 listDelNode(server
.io_ready_clients
,ln
);
1382 c
->flags
&= (~REDIS_IO_WAIT
);
1383 server
.vm_blocked_clients
--;
1384 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1385 readQueryFromClient
, c
);
1386 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1387 assert(cmd
!= NULL
);
1390 /* There may be more data to process in the input buffer. */
1391 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1392 processInputBuffer(c
);
1397 static void createSharedObjects(void) {
1398 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1399 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1400 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1401 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1402 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1403 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1404 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1405 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1406 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1407 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1408 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1409 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1410 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1411 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1412 "-ERR no such key\r\n"));
1413 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1414 "-ERR syntax error\r\n"));
1415 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1416 "-ERR source and destination objects are the same\r\n"));
1417 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1418 "-ERR index out of range\r\n"));
1419 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1420 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1421 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1422 shared
.select0
= createStringObject("select 0\r\n",10);
1423 shared
.select1
= createStringObject("select 1\r\n",10);
1424 shared
.select2
= createStringObject("select 2\r\n",10);
1425 shared
.select3
= createStringObject("select 3\r\n",10);
1426 shared
.select4
= createStringObject("select 4\r\n",10);
1427 shared
.select5
= createStringObject("select 5\r\n",10);
1428 shared
.select6
= createStringObject("select 6\r\n",10);
1429 shared
.select7
= createStringObject("select 7\r\n",10);
1430 shared
.select8
= createStringObject("select 8\r\n",10);
1431 shared
.select9
= createStringObject("select 9\r\n",10);
1434 static void appendServerSaveParams(time_t seconds
, int changes
) {
1435 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1436 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1437 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1438 server
.saveparamslen
++;
1441 static void resetServerSaveParams() {
1442 zfree(server
.saveparams
);
1443 server
.saveparams
= NULL
;
1444 server
.saveparamslen
= 0;
1447 static void initServerConfig() {
1448 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1449 server
.port
= REDIS_SERVERPORT
;
1450 server
.verbosity
= REDIS_VERBOSE
;
1451 server
.maxidletime
= REDIS_MAXIDLETIME
;
1452 server
.saveparams
= NULL
;
1453 server
.logfile
= NULL
; /* NULL = log on standard output */
1454 server
.bindaddr
= NULL
;
1455 server
.glueoutputbuf
= 1;
1456 server
.daemonize
= 0;
1457 server
.appendonly
= 0;
1458 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1459 server
.lastfsync
= time(NULL
);
1460 server
.appendfd
= -1;
1461 server
.appendseldb
= -1; /* Make sure the first time will not match */
1462 server
.pidfile
= "/var/run/redis.pid";
1463 server
.dbfilename
= "dump.rdb";
1464 server
.appendfilename
= "appendonly.aof";
1465 server
.requirepass
= NULL
;
1466 server
.shareobjects
= 0;
1467 server
.rdbcompression
= 1;
1468 server
.sharingpoolsize
= 1024;
1469 server
.maxclients
= 0;
1470 server
.blpop_blocked_clients
= 0;
1471 server
.maxmemory
= 0;
1472 server
.vm_enabled
= 0;
1473 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1474 server
.vm_page_size
= 256; /* 256 bytes per page */
1475 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1476 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1477 server
.vm_max_threads
= 4;
1478 server
.vm_blocked_clients
= 0;
1479 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1480 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1482 resetServerSaveParams();
1484 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1485 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1486 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1487 /* Replication related */
1489 server
.masterauth
= NULL
;
1490 server
.masterhost
= NULL
;
1491 server
.masterport
= 6379;
1492 server
.master
= NULL
;
1493 server
.replstate
= REDIS_REPL_NONE
;
1495 /* Double constants initialization */
1497 R_PosInf
= 1.0/R_Zero
;
1498 R_NegInf
= -1.0/R_Zero
;
1499 R_Nan
= R_Zero
/R_Zero
;
1502 static void initServer() {
1505 signal(SIGHUP
, SIG_IGN
);
1506 signal(SIGPIPE
, SIG_IGN
);
1507 setupSigSegvAction();
1509 server
.devnull
= fopen("/dev/null","w");
1510 if (server
.devnull
== NULL
) {
1511 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1514 server
.clients
= listCreate();
1515 server
.slaves
= listCreate();
1516 server
.monitors
= listCreate();
1517 server
.objfreelist
= listCreate();
1518 createSharedObjects();
1519 server
.el
= aeCreateEventLoop();
1520 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1521 server
.sharingpool
= dictCreate(&setDictType
,NULL
);
1522 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1523 if (server
.fd
== -1) {
1524 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1527 for (j
= 0; j
< server
.dbnum
; j
++) {
1528 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1529 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1530 server
.db
[j
].blockingkeys
= dictCreate(&keylistDictType
,NULL
);
1531 if (server
.vm_enabled
)
1532 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1533 server
.db
[j
].id
= j
;
1535 server
.cronloops
= 0;
1536 server
.bgsavechildpid
= -1;
1537 server
.bgrewritechildpid
= -1;
1538 server
.bgrewritebuf
= sdsempty();
1539 server
.lastsave
= time(NULL
);
1541 server
.stat_numcommands
= 0;
1542 server
.stat_numconnections
= 0;
1543 server
.stat_starttime
= time(NULL
);
1544 server
.unixtime
= time(NULL
);
1545 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1546 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1547 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1549 if (server
.appendonly
) {
1550 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1551 if (server
.appendfd
== -1) {
1552 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1558 if (server
.vm_enabled
) vmInit();
1561 /* Empty the whole database */
1562 static long long emptyDb() {
1564 long long removed
= 0;
1566 for (j
= 0; j
< server
.dbnum
; j
++) {
1567 removed
+= dictSize(server
.db
[j
].dict
);
1568 dictEmpty(server
.db
[j
].dict
);
1569 dictEmpty(server
.db
[j
].expires
);
1574 static int yesnotoi(char *s
) {
1575 if (!strcasecmp(s
,"yes")) return 1;
1576 else if (!strcasecmp(s
,"no")) return 0;
1580 /* I agree, this is a very rudimental way to load a configuration...
1581 will improve later if the config gets more complex */
1582 static void loadServerConfig(char *filename
) {
1584 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1588 if (filename
[0] == '-' && filename
[1] == '\0')
1591 if ((fp
= fopen(filename
,"r")) == NULL
) {
1592 redisLog(REDIS_WARNING
,"Fatal error, can't open config file");
1597 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1603 line
= sdstrim(line
," \t\r\n");
1605 /* Skip comments and blank lines*/
1606 if (line
[0] == '#' || line
[0] == '\0') {
1611 /* Split into arguments */
1612 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1613 sdstolower(argv
[0]);
1615 /* Execute config directives */
1616 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1617 server
.maxidletime
= atoi(argv
[1]);
1618 if (server
.maxidletime
< 0) {
1619 err
= "Invalid timeout value"; goto loaderr
;
1621 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1622 server
.port
= atoi(argv
[1]);
1623 if (server
.port
< 1 || server
.port
> 65535) {
1624 err
= "Invalid port"; goto loaderr
;
1626 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1627 server
.bindaddr
= zstrdup(argv
[1]);
1628 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1629 int seconds
= atoi(argv
[1]);
1630 int changes
= atoi(argv
[2]);
1631 if (seconds
< 1 || changes
< 0) {
1632 err
= "Invalid save parameters"; goto loaderr
;
1634 appendServerSaveParams(seconds
,changes
);
1635 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1636 if (chdir(argv
[1]) == -1) {
1637 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1638 argv
[1], strerror(errno
));
1641 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1642 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1643 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1644 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1645 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1647 err
= "Invalid log level. Must be one of debug, notice, warning";
1650 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1653 server
.logfile
= zstrdup(argv
[1]);
1654 if (!strcasecmp(server
.logfile
,"stdout")) {
1655 zfree(server
.logfile
);
1656 server
.logfile
= NULL
;
1658 if (server
.logfile
) {
1659 /* Test if we are able to open the file. The server will not
1660 * be able to abort just for this problem later... */
1661 logfp
= fopen(server
.logfile
,"a");
1662 if (logfp
== NULL
) {
1663 err
= sdscatprintf(sdsempty(),
1664 "Can't open the log file: %s", strerror(errno
));
1669 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1670 server
.dbnum
= atoi(argv
[1]);
1671 if (server
.dbnum
< 1) {
1672 err
= "Invalid number of databases"; goto loaderr
;
1674 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1675 server
.maxclients
= atoi(argv
[1]);
1676 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1677 server
.maxmemory
= strtoll(argv
[1], NULL
, 10);
1678 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1679 server
.masterhost
= sdsnew(argv
[1]);
1680 server
.masterport
= atoi(argv
[2]);
1681 server
.replstate
= REDIS_REPL_CONNECT
;
1682 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1683 server
.masterauth
= zstrdup(argv
[1]);
1684 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1685 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1686 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1688 } else if (!strcasecmp(argv
[0],"shareobjects") && argc
== 2) {
1689 if ((server
.shareobjects
= yesnotoi(argv
[1])) == -1) {
1690 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1692 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1693 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1694 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1696 } else if (!strcasecmp(argv
[0],"shareobjectspoolsize") && argc
== 2) {
1697 server
.sharingpoolsize
= atoi(argv
[1]);
1698 if (server
.sharingpoolsize
< 1) {
1699 err
= "invalid object sharing pool size"; goto loaderr
;
1701 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1702 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1703 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1705 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1706 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1707 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1709 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
1710 if (!strcasecmp(argv
[1],"no")) {
1711 server
.appendfsync
= APPENDFSYNC_NO
;
1712 } else if (!strcasecmp(argv
[1],"always")) {
1713 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1714 } else if (!strcasecmp(argv
[1],"everysec")) {
1715 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1717 err
= "argument must be 'no', 'always' or 'everysec'";
1720 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
1721 server
.requirepass
= zstrdup(argv
[1]);
1722 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
1723 server
.pidfile
= zstrdup(argv
[1]);
1724 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
1725 server
.dbfilename
= zstrdup(argv
[1]);
1726 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
1727 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
1728 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1730 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
1731 zfree(server
.vm_swap_file
);
1732 server
.vm_swap_file
= zstrdup(argv
[1]);
1733 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
1734 server
.vm_max_memory
= strtoll(argv
[1], NULL
, 10);
1735 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
1736 server
.vm_page_size
= strtoll(argv
[1], NULL
, 10);
1737 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
1738 server
.vm_pages
= strtoll(argv
[1], NULL
, 10);
1739 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1740 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1741 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
1742 server
.hash_max_zipmap_entries
= strtol(argv
[1], NULL
, 10);
1743 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
1744 server
.hash_max_zipmap_value
= strtol(argv
[1], NULL
, 10);
1745 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1746 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1748 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
1750 for (j
= 0; j
< argc
; j
++)
1755 if (fp
!= stdin
) fclose(fp
);
1759 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
1760 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
1761 fprintf(stderr
, ">>> '%s'\n", line
);
1762 fprintf(stderr
, "%s\n", err
);
1766 static void freeClientArgv(redisClient
*c
) {
1769 for (j
= 0; j
< c
->argc
; j
++)
1770 decrRefCount(c
->argv
[j
]);
1771 for (j
= 0; j
< c
->mbargc
; j
++)
1772 decrRefCount(c
->mbargv
[j
]);
1777 static void freeClient(redisClient
*c
) {
1780 /* Note that if the client we are freeing is blocked into a blocking
1781 * call, we have to set querybuf to NULL *before* to call
1782 * unblockClientWaitingData() to avoid processInputBuffer() will get
1783 * called. Also it is important to remove the file events after
1784 * this, because this call adds the READABLE event. */
1785 sdsfree(c
->querybuf
);
1787 if (c
->flags
& REDIS_BLOCKED
)
1788 unblockClientWaitingData(c
);
1790 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
1791 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
1792 listRelease(c
->reply
);
1795 /* Remove from the list of clients */
1796 ln
= listSearchKey(server
.clients
,c
);
1797 redisAssert(ln
!= NULL
);
1798 listDelNode(server
.clients
,ln
);
1799 /* Remove from the list of clients waiting for swapped keys */
1800 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
1801 ln
= listSearchKey(server
.io_ready_clients
,c
);
1803 listDelNode(server
.io_ready_clients
,ln
);
1804 server
.vm_blocked_clients
--;
1807 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
1808 ln
= listFirst(c
->io_keys
);
1809 dontWaitForSwappedKey(c
,ln
->value
);
1811 listRelease(c
->io_keys
);
1813 if (c
->flags
& REDIS_SLAVE
) {
1814 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
1816 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
1817 ln
= listSearchKey(l
,c
);
1818 redisAssert(ln
!= NULL
);
1821 if (c
->flags
& REDIS_MASTER
) {
1822 server
.master
= NULL
;
1823 server
.replstate
= REDIS_REPL_CONNECT
;
1827 freeClientMultiState(c
);
1831 #define GLUEREPLY_UP_TO (1024)
1832 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
1834 char buf
[GLUEREPLY_UP_TO
];
1839 listRewind(c
->reply
,&li
);
1840 while((ln
= listNext(&li
))) {
1844 objlen
= sdslen(o
->ptr
);
1845 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
1846 memcpy(buf
+copylen
,o
->ptr
,objlen
);
1848 listDelNode(c
->reply
,ln
);
1850 if (copylen
== 0) return;
1854 /* Now the output buffer is empty, add the new single element */
1855 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
1856 listAddNodeHead(c
->reply
,o
);
1859 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
1860 redisClient
*c
= privdata
;
1861 int nwritten
= 0, totwritten
= 0, objlen
;
1864 REDIS_NOTUSED(mask
);
1866 /* Use writev() if we have enough buffers to send */
1867 if (!server
.glueoutputbuf
&&
1868 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
1869 !(c
->flags
& REDIS_MASTER
))
1871 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
1875 while(listLength(c
->reply
)) {
1876 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
1877 glueReplyBuffersIfNeeded(c
);
1879 o
= listNodeValue(listFirst(c
->reply
));
1880 objlen
= sdslen(o
->ptr
);
1883 listDelNode(c
->reply
,listFirst(c
->reply
));
1887 if (c
->flags
& REDIS_MASTER
) {
1888 /* Don't reply to a master */
1889 nwritten
= objlen
- c
->sentlen
;
1891 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
1892 if (nwritten
<= 0) break;
1894 c
->sentlen
+= nwritten
;
1895 totwritten
+= nwritten
;
1896 /* If we fully sent the object on head go to the next one */
1897 if (c
->sentlen
== objlen
) {
1898 listDelNode(c
->reply
,listFirst(c
->reply
));
1901 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
1902 * bytes, in a single threaded server it's a good idea to serve
1903 * other clients as well, even if a very large request comes from
1904 * super fast link that is always able to accept data (in real world
1905 * scenario think about 'KEYS *' against the loopback interfae) */
1906 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
1908 if (nwritten
== -1) {
1909 if (errno
== EAGAIN
) {
1912 redisLog(REDIS_VERBOSE
,
1913 "Error writing to client: %s", strerror(errno
));
1918 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
1919 if (listLength(c
->reply
) == 0) {
1921 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
1925 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
1927 redisClient
*c
= privdata
;
1928 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
1930 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
1931 int offset
, ion
= 0;
1933 REDIS_NOTUSED(mask
);
1936 while (listLength(c
->reply
)) {
1937 offset
= c
->sentlen
;
1941 /* fill-in the iov[] array */
1942 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
1943 o
= listNodeValue(node
);
1944 objlen
= sdslen(o
->ptr
);
1946 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
1949 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
1950 break; /* no more iovecs */
1952 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
1953 iov
[ion
].iov_len
= objlen
- offset
;
1954 willwrite
+= objlen
- offset
;
1955 offset
= 0; /* just for the first item */
1962 /* write all collected blocks at once */
1963 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
1964 if (errno
!= EAGAIN
) {
1965 redisLog(REDIS_VERBOSE
,
1966 "Error writing to client: %s", strerror(errno
));
1973 totwritten
+= nwritten
;
1974 offset
= c
->sentlen
;
1976 /* remove written robjs from c->reply */
1977 while (nwritten
&& listLength(c
->reply
)) {
1978 o
= listNodeValue(listFirst(c
->reply
));
1979 objlen
= sdslen(o
->ptr
);
1981 if(nwritten
>= objlen
- offset
) {
1982 listDelNode(c
->reply
, listFirst(c
->reply
));
1983 nwritten
-= objlen
- offset
;
1987 c
->sentlen
+= nwritten
;
1995 c
->lastinteraction
= time(NULL
);
1997 if (listLength(c
->reply
) == 0) {
1999 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2003 static struct redisCommand
*lookupCommand(char *name
) {
2005 while(cmdTable
[j
].name
!= NULL
) {
2006 if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
];
2012 /* resetClient prepare the client to process the next command */
2013 static void resetClient(redisClient
*c
) {
2019 /* Call() is the core of Redis execution of a command */
2020 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2023 dirty
= server
.dirty
;
2025 if (server
.appendonly
&& server
.dirty
-dirty
)
2026 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2027 if (server
.dirty
-dirty
&& listLength(server
.slaves
))
2028 replicationFeedSlaves(server
.slaves
,cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2029 if (listLength(server
.monitors
))
2030 replicationFeedSlaves(server
.monitors
,cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2031 server
.stat_numcommands
++;
2034 /* If this function gets called we already read a whole
2035 * command, argments are in the client argv/argc fields.
2036 * processCommand() execute the command or prepare the
2037 * server for a bulk read from the client.
2039 * If 1 is returned the client is still alive and valid and
2040 * and other operations can be performed by the caller. Otherwise
2041 * if 0 is returned the client was destroied (i.e. after QUIT). */
2042 static int processCommand(redisClient
*c
) {
2043 struct redisCommand
*cmd
;
2045 /* Free some memory if needed (maxmemory setting) */
2046 if (server
.maxmemory
) freeMemoryIfNeeded();
2048 /* Handle the multi bulk command type. This is an alternative protocol
2049 * supported by Redis in order to receive commands that are composed of
2050 * multiple binary-safe "bulk" arguments. The latency of processing is
2051 * a bit higher but this allows things like multi-sets, so if this
2052 * protocol is used only for MSET and similar commands this is a big win. */
2053 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2054 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2055 if (c
->multibulk
<= 0) {
2059 decrRefCount(c
->argv
[c
->argc
-1]);
2063 } else if (c
->multibulk
) {
2064 if (c
->bulklen
== -1) {
2065 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2066 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2070 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2071 decrRefCount(c
->argv
[0]);
2072 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2074 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2079 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2083 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2084 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2088 if (c
->multibulk
== 0) {
2092 /* Here we need to swap the multi-bulk argc/argv with the
2093 * normal argc/argv of the client structure. */
2095 c
->argv
= c
->mbargv
;
2096 c
->mbargv
= auxargv
;
2099 c
->argc
= c
->mbargc
;
2100 c
->mbargc
= auxargc
;
2102 /* We need to set bulklen to something different than -1
2103 * in order for the code below to process the command without
2104 * to try to read the last argument of a bulk command as
2105 * a special argument. */
2107 /* continue below and process the command */
2114 /* -- end of multi bulk commands processing -- */
2116 /* The QUIT command is handled as a special case. Normal command
2117 * procs are unable to close the client connection safely */
2118 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2123 /* Now lookup the command and check ASAP about trivial error conditions
2124 * such wrong arity, bad command name and so forth. */
2125 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2128 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2129 (char*)c
->argv
[0]->ptr
));
2132 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2133 (c
->argc
< -cmd
->arity
)) {
2135 sdscatprintf(sdsempty(),
2136 "-ERR wrong number of arguments for '%s' command\r\n",
2140 } else if (server
.maxmemory
&& cmd
->flags
& REDIS_CMD_DENYOOM
&& zmalloc_used_memory() > server
.maxmemory
) {
2141 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2144 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2145 /* This is a bulk command, we have to read the last argument yet. */
2146 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2148 decrRefCount(c
->argv
[c
->argc
-1]);
2149 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2151 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2156 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2157 /* It is possible that the bulk read is already in the
2158 * buffer. Check this condition and handle it accordingly.
2159 * This is just a fast path, alternative to call processInputBuffer().
2160 * It's a good idea since the code is small and this condition
2161 * happens most of the times. */
2162 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2163 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2165 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2167 /* Otherwise return... there is to read the last argument
2168 * from the socket. */
2172 /* Let's try to share objects on the command arguments vector */
2173 if (server
.shareobjects
) {
2175 for(j
= 1; j
< c
->argc
; j
++)
2176 c
->argv
[j
] = tryObjectSharing(c
->argv
[j
]);
2178 /* Let's try to encode the bulk object to save space. */
2179 if (cmd
->flags
& REDIS_CMD_BULK
)
2180 tryObjectEncoding(c
->argv
[c
->argc
-1]);
2182 /* Check if the user is authenticated */
2183 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2184 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2189 /* Exec the command */
2190 if (c
->flags
& REDIS_MULTI
&& cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
) {
2191 queueMultiCommand(c
,cmd
);
2192 addReply(c
,shared
.queued
);
2194 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2195 blockClientOnSwappedKeys(cmd
,c
)) return 1;
2199 /* Prepare the client for the next command */
2204 static void replicationFeedSlaves(list
*slaves
, struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
2209 /* (args*2)+1 is enough room for args, spaces, newlines */
2210 robj
*static_outv
[REDIS_STATIC_ARGS
*2+1];
2212 if (argc
<= REDIS_STATIC_ARGS
) {
2215 outv
= zmalloc(sizeof(robj
*)*(argc
*2+1));
2218 for (j
= 0; j
< argc
; j
++) {
2219 if (j
!= 0) outv
[outc
++] = shared
.space
;
2220 if ((cmd
->flags
& REDIS_CMD_BULK
) && j
== argc
-1) {
2223 lenobj
= createObject(REDIS_STRING
,
2224 sdscatprintf(sdsempty(),"%lu\r\n",
2225 (unsigned long) stringObjectLen(argv
[j
])));
2226 lenobj
->refcount
= 0;
2227 outv
[outc
++] = lenobj
;
2229 outv
[outc
++] = argv
[j
];
2231 outv
[outc
++] = shared
.crlf
;
2233 /* Increment all the refcounts at start and decrement at end in order to
2234 * be sure to free objects if there is no slave in a replication state
2235 * able to be feed with commands */
2236 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2237 listRewind(slaves
,&li
);
2238 while((ln
= listNext(&li
))) {
2239 redisClient
*slave
= ln
->value
;
2241 /* Don't feed slaves that are still waiting for BGSAVE to start */
2242 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2244 /* Feed all the other slaves, MONITORs and so on */
2245 if (slave
->slaveseldb
!= dictid
) {
2249 case 0: selectcmd
= shared
.select0
; break;
2250 case 1: selectcmd
= shared
.select1
; break;
2251 case 2: selectcmd
= shared
.select2
; break;
2252 case 3: selectcmd
= shared
.select3
; break;
2253 case 4: selectcmd
= shared
.select4
; break;
2254 case 5: selectcmd
= shared
.select5
; break;
2255 case 6: selectcmd
= shared
.select6
; break;
2256 case 7: selectcmd
= shared
.select7
; break;
2257 case 8: selectcmd
= shared
.select8
; break;
2258 case 9: selectcmd
= shared
.select9
; break;
2260 selectcmd
= createObject(REDIS_STRING
,
2261 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2262 selectcmd
->refcount
= 0;
2265 addReply(slave
,selectcmd
);
2266 slave
->slaveseldb
= dictid
;
2268 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2270 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2271 if (outv
!= static_outv
) zfree(outv
);
2274 static void processInputBuffer(redisClient
*c
) {
2276 /* Before to process the input buffer, make sure the client is not
2277 * waitig for a blocking operation such as BLPOP. Note that the first
2278 * iteration the client is never blocked, otherwise the processInputBuffer
2279 * would not be called at all, but after the execution of the first commands
2280 * in the input buffer the client may be blocked, and the "goto again"
2281 * will try to reiterate. The following line will make it return asap. */
2282 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2283 if (c
->bulklen
== -1) {
2284 /* Read the first line of the query */
2285 char *p
= strchr(c
->querybuf
,'\n');
2292 query
= c
->querybuf
;
2293 c
->querybuf
= sdsempty();
2294 querylen
= 1+(p
-(query
));
2295 if (sdslen(query
) > querylen
) {
2296 /* leave data after the first line of the query in the buffer */
2297 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2299 *p
= '\0'; /* remove "\n" */
2300 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2301 sdsupdatelen(query
);
2303 /* Now we can split the query in arguments */
2304 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2307 if (c
->argv
) zfree(c
->argv
);
2308 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2310 for (j
= 0; j
< argc
; j
++) {
2311 if (sdslen(argv
[j
])) {
2312 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2320 /* Execute the command. If the client is still valid
2321 * after processCommand() return and there is something
2322 * on the query buffer try to process the next command. */
2323 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2325 /* Nothing to process, argc == 0. Just process the query
2326 * buffer if it's not empty or return to the caller */
2327 if (sdslen(c
->querybuf
)) goto again
;
2330 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2331 redisLog(REDIS_VERBOSE
, "Client protocol error");
2336 /* Bulk read handling. Note that if we are at this point
2337 the client already sent a command terminated with a newline,
2338 we are reading the bulk data that is actually the last
2339 argument of the command. */
2340 int qbl
= sdslen(c
->querybuf
);
2342 if (c
->bulklen
<= qbl
) {
2343 /* Copy everything but the final CRLF as final argument */
2344 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2346 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2347 /* Process the command. If the client is still valid after
2348 * the processing and there is more data in the buffer
2349 * try to parse it. */
2350 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2356 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2357 redisClient
*c
= (redisClient
*) privdata
;
2358 char buf
[REDIS_IOBUF_LEN
];
2361 REDIS_NOTUSED(mask
);
2363 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2365 if (errno
== EAGAIN
) {
2368 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2372 } else if (nread
== 0) {
2373 redisLog(REDIS_VERBOSE
, "Client closed connection");
2378 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2379 c
->lastinteraction
= time(NULL
);
2383 if (!(c
->flags
& REDIS_BLOCKED
))
2384 processInputBuffer(c
);
2387 static int selectDb(redisClient
*c
, int id
) {
2388 if (id
< 0 || id
>= server
.dbnum
)
2390 c
->db
= &server
.db
[id
];
2394 static void *dupClientReplyValue(void *o
) {
2395 incrRefCount((robj
*)o
);
2399 static redisClient
*createClient(int fd
) {
2400 redisClient
*c
= zmalloc(sizeof(*c
));
2402 anetNonBlock(NULL
,fd
);
2403 anetTcpNoDelay(NULL
,fd
);
2404 if (!c
) return NULL
;
2407 c
->querybuf
= sdsempty();
2416 c
->lastinteraction
= time(NULL
);
2417 c
->authenticated
= 0;
2418 c
->replstate
= REDIS_REPL_NONE
;
2419 c
->reply
= listCreate();
2420 listSetFreeMethod(c
->reply
,decrRefCount
);
2421 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2422 c
->blockingkeys
= NULL
;
2423 c
->blockingkeysnum
= 0;
2424 c
->io_keys
= listCreate();
2425 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2426 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2427 readQueryFromClient
, c
) == AE_ERR
) {
2431 listAddNodeTail(server
.clients
,c
);
2432 initClientMultiState(c
);
2436 static void addReply(redisClient
*c
, robj
*obj
) {
2437 if (listLength(c
->reply
) == 0 &&
2438 (c
->replstate
== REDIS_REPL_NONE
||
2439 c
->replstate
== REDIS_REPL_ONLINE
) &&
2440 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2441 sendReplyToClient
, c
) == AE_ERR
) return;
2443 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2444 obj
= dupStringObject(obj
);
2445 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2447 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2450 static void addReplySds(redisClient
*c
, sds s
) {
2451 robj
*o
= createObject(REDIS_STRING
,s
);
2456 static void addReplyDouble(redisClient
*c
, double d
) {
2459 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2460 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2461 (unsigned long) strlen(buf
),buf
));
2464 static void addReplyLong(redisClient
*c
, long l
) {
2468 len
= snprintf(buf
,sizeof(buf
),":%ld\r\n",l
);
2469 addReplySds(c
,sdsnewlen(buf
,len
));
2472 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2475 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2476 len
= sdslen(obj
->ptr
);
2478 long n
= (long)obj
->ptr
;
2480 /* Compute how many bytes will take this integer as a radix 10 string */
2486 while((n
= n
/10) != 0) {
2490 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",(unsigned long)len
));
2493 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2498 REDIS_NOTUSED(mask
);
2499 REDIS_NOTUSED(privdata
);
2501 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2502 if (cfd
== AE_ERR
) {
2503 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2506 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2507 if ((c
= createClient(cfd
)) == NULL
) {
2508 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2509 close(cfd
); /* May be already closed, just ingore errors */
2512 /* If maxclient directive is set and this is one client more... close the
2513 * connection. Note that we create the client instead to check before
2514 * for this condition, since now the socket is already set in nonblocking
2515 * mode and we can send an error for free using the Kernel I/O */
2516 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2517 char *err
= "-ERR max number of clients reached\r\n";
2519 /* That's a best effort error message, don't check write errors */
2520 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2521 /* Nothing to do, Just to avoid the warning... */
2526 server
.stat_numconnections
++;
2529 /* ======================= Redis objects implementation ===================== */
2531 static robj
*createObject(int type
, void *ptr
) {
2534 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2535 if (listLength(server
.objfreelist
)) {
2536 listNode
*head
= listFirst(server
.objfreelist
);
2537 o
= listNodeValue(head
);
2538 listDelNode(server
.objfreelist
,head
);
2539 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2541 if (server
.vm_enabled
) {
2542 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2543 o
= zmalloc(sizeof(*o
));
2545 o
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
));
2549 o
->encoding
= REDIS_ENCODING_RAW
;
2552 if (server
.vm_enabled
) {
2553 /* Note that this code may run in the context of an I/O thread
2554 * and accessing to server.unixtime in theory is an error
2555 * (no locks). But in practice this is safe, and even if we read
2556 * garbage Redis will not fail, as it's just a statistical info */
2557 o
->vm
.atime
= server
.unixtime
;
2558 o
->storage
= REDIS_VM_MEMORY
;
2563 static robj
*createStringObject(char *ptr
, size_t len
) {
2564 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
2567 static robj
*dupStringObject(robj
*o
) {
2568 assert(o
->encoding
== REDIS_ENCODING_RAW
);
2569 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
2572 static robj
*createListObject(void) {
2573 list
*l
= listCreate();
2575 listSetFreeMethod(l
,decrRefCount
);
2576 return createObject(REDIS_LIST
,l
);
2579 static robj
*createSetObject(void) {
2580 dict
*d
= dictCreate(&setDictType
,NULL
);
2581 return createObject(REDIS_SET
,d
);
2584 static robj
*createHashObject(void) {
2585 /* All the Hashes start as zipmaps. Will be automatically converted
2586 * into hash tables if there are enough elements or big elements
2588 unsigned char *zm
= zipmapNew();
2589 robj
*o
= createObject(REDIS_HASH
,zm
);
2590 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
2594 static robj
*createZsetObject(void) {
2595 zset
*zs
= zmalloc(sizeof(*zs
));
2597 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
2598 zs
->zsl
= zslCreate();
2599 return createObject(REDIS_ZSET
,zs
);
2602 static void freeStringObject(robj
*o
) {
2603 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2608 static void freeListObject(robj
*o
) {
2609 listRelease((list
*) o
->ptr
);
2612 static void freeSetObject(robj
*o
) {
2613 dictRelease((dict
*) o
->ptr
);
2616 static void freeZsetObject(robj
*o
) {
2619 dictRelease(zs
->dict
);
2624 static void freeHashObject(robj
*o
) {
2625 switch (o
->encoding
) {
2626 case REDIS_ENCODING_HT
:
2627 dictRelease((dict
*) o
->ptr
);
2629 case REDIS_ENCODING_ZIPMAP
:
2638 static void incrRefCount(robj
*o
) {
2639 redisAssert(!server
.vm_enabled
|| o
->storage
== REDIS_VM_MEMORY
);
2643 static void decrRefCount(void *obj
) {
2646 /* Object is a key of a swapped out value, or in the process of being
2648 if (server
.vm_enabled
&&
2649 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
2651 if (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
) {
2652 redisAssert(o
->refcount
== 1);
2654 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
);
2655 redisAssert(o
->type
== REDIS_STRING
);
2656 freeStringObject(o
);
2657 vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
);
2658 pthread_mutex_lock(&server
.obj_freelist_mutex
);
2659 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2660 !listAddNodeHead(server
.objfreelist
,o
))
2662 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2663 server
.vm_stats_swapped_objects
--;
2666 /* Object is in memory, or in the process of being swapped out. */
2667 if (--(o
->refcount
) == 0) {
2668 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
2669 vmCancelThreadedIOJob(obj
);
2671 case REDIS_STRING
: freeStringObject(o
); break;
2672 case REDIS_LIST
: freeListObject(o
); break;
2673 case REDIS_SET
: freeSetObject(o
); break;
2674 case REDIS_ZSET
: freeZsetObject(o
); break;
2675 case REDIS_HASH
: freeHashObject(o
); break;
2676 default: redisAssert(0 != 0); break;
2678 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2679 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2680 !listAddNodeHead(server
.objfreelist
,o
))
2682 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2686 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
2687 dictEntry
*de
= dictFind(db
->dict
,key
);
2689 robj
*key
= dictGetEntryKey(de
);
2690 robj
*val
= dictGetEntryVal(de
);
2692 if (server
.vm_enabled
) {
2693 if (key
->storage
== REDIS_VM_MEMORY
||
2694 key
->storage
== REDIS_VM_SWAPPING
)
2696 /* If we were swapping the object out, stop it, this key
2698 if (key
->storage
== REDIS_VM_SWAPPING
)
2699 vmCancelThreadedIOJob(key
);
2700 /* Update the access time of the key for the aging algorithm. */
2701 key
->vm
.atime
= server
.unixtime
;
2703 int notify
= (key
->storage
== REDIS_VM_LOADING
);
2705 /* Our value was swapped on disk. Bring it at home. */
2706 redisAssert(val
== NULL
);
2707 val
= vmLoadObject(key
);
2708 dictGetEntryVal(de
) = val
;
2710 /* Clients blocked by the VM subsystem may be waiting for
2712 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
2721 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
2722 expireIfNeeded(db
,key
);
2723 return lookupKey(db
,key
);
2726 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
2727 deleteIfVolatile(db
,key
);
2728 return lookupKey(db
,key
);
2731 static int deleteKey(redisDb
*db
, robj
*key
) {
2734 /* We need to protect key from destruction: after the first dictDelete()
2735 * it may happen that 'key' is no longer valid if we don't increment
2736 * it's count. This may happen when we get the object reference directly
2737 * from the hash table with dictRandomKey() or dict iterators */
2739 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
);
2740 retval
= dictDelete(db
->dict
,key
);
2743 return retval
== DICT_OK
;
2746 /* Try to share an object against the shared objects pool */
2747 static robj
*tryObjectSharing(robj
*o
) {
2748 struct dictEntry
*de
;
2751 if (o
== NULL
|| server
.shareobjects
== 0) return o
;
2753 redisAssert(o
->type
== REDIS_STRING
);
2754 de
= dictFind(server
.sharingpool
,o
);
2756 robj
*shared
= dictGetEntryKey(de
);
2758 c
= ((unsigned long) dictGetEntryVal(de
))+1;
2759 dictGetEntryVal(de
) = (void*) c
;
2760 incrRefCount(shared
);
2764 /* Here we are using a stream algorihtm: Every time an object is
2765 * shared we increment its count, everytime there is a miss we
2766 * recrement the counter of a random object. If this object reaches
2767 * zero we remove the object and put the current object instead. */
2768 if (dictSize(server
.sharingpool
) >=
2769 server
.sharingpoolsize
) {
2770 de
= dictGetRandomKey(server
.sharingpool
);
2771 redisAssert(de
!= NULL
);
2772 c
= ((unsigned long) dictGetEntryVal(de
))-1;
2773 dictGetEntryVal(de
) = (void*) c
;
2775 dictDelete(server
.sharingpool
,de
->key
);
2778 c
= 0; /* If the pool is empty we want to add this object */
2783 retval
= dictAdd(server
.sharingpool
,o
,(void*)1);
2784 redisAssert(retval
== DICT_OK
);
2791 /* Check if the nul-terminated string 's' can be represented by a long
2792 * (that is, is a number that fits into long without any other space or
2793 * character before or after the digits).
2795 * If so, the function returns REDIS_OK and *longval is set to the value
2796 * of the number. Otherwise REDIS_ERR is returned */
2797 static int isStringRepresentableAsLong(sds s
, long *longval
) {
2798 char buf
[32], *endptr
;
2802 value
= strtol(s
, &endptr
, 10);
2803 if (endptr
[0] != '\0') return REDIS_ERR
;
2804 slen
= snprintf(buf
,32,"%ld",value
);
2806 /* If the number converted back into a string is not identical
2807 * then it's not possible to encode the string as integer */
2808 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
2809 if (longval
) *longval
= value
;
2813 /* Try to encode a string object in order to save space */
2814 static int tryObjectEncoding(robj
*o
) {
2818 if (o
->encoding
!= REDIS_ENCODING_RAW
)
2819 return REDIS_ERR
; /* Already encoded */
2821 /* It's not save to encode shared objects: shared objects can be shared
2822 * everywhere in the "object space" of Redis. Encoded objects can only
2823 * appear as "values" (and not, for instance, as keys) */
2824 if (o
->refcount
> 1) return REDIS_ERR
;
2826 /* Currently we try to encode only strings */
2827 redisAssert(o
->type
== REDIS_STRING
);
2829 /* Check if we can represent this string as a long integer */
2830 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return REDIS_ERR
;
2832 /* Ok, this object can be encoded */
2833 o
->encoding
= REDIS_ENCODING_INT
;
2835 o
->ptr
= (void*) value
;
2839 /* Get a decoded version of an encoded object (returned as a new object).
2840 * If the object is already raw-encoded just increment the ref count. */
2841 static robj
*getDecodedObject(robj
*o
) {
2844 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2848 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
2851 snprintf(buf
,32,"%ld",(long)o
->ptr
);
2852 dec
= createStringObject(buf
,strlen(buf
));
2855 redisAssert(1 != 1);
2859 /* Compare two string objects via strcmp() or alike.
2860 * Note that the objects may be integer-encoded. In such a case we
2861 * use snprintf() to get a string representation of the numbers on the stack
2862 * and compare the strings, it's much faster than calling getDecodedObject().
2864 * Important note: if objects are not integer encoded, but binary-safe strings,
2865 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
2867 static int compareStringObjects(robj
*a
, robj
*b
) {
2868 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
2869 char bufa
[128], bufb
[128], *astr
, *bstr
;
2872 if (a
== b
) return 0;
2873 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
2874 snprintf(bufa
,sizeof(bufa
),"%ld",(long) a
->ptr
);
2880 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
2881 snprintf(bufb
,sizeof(bufb
),"%ld",(long) b
->ptr
);
2887 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
2890 static size_t stringObjectLen(robj
*o
) {
2891 redisAssert(o
->type
== REDIS_STRING
);
2892 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2893 return sdslen(o
->ptr
);
2897 return snprintf(buf
,32,"%ld",(long)o
->ptr
);
2901 /*============================ RDB saving/loading =========================== */
2903 static int rdbSaveType(FILE *fp
, unsigned char type
) {
2904 if (fwrite(&type
,1,1,fp
) == 0) return -1;
2908 static int rdbSaveTime(FILE *fp
, time_t t
) {
2909 int32_t t32
= (int32_t) t
;
2910 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
2914 /* check rdbLoadLen() comments for more info */
2915 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
2916 unsigned char buf
[2];
2919 /* Save a 6 bit len */
2920 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
2921 if (fwrite(buf
,1,1,fp
) == 0) return -1;
2922 } else if (len
< (1<<14)) {
2923 /* Save a 14 bit len */
2924 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
2926 if (fwrite(buf
,2,1,fp
) == 0) return -1;
2928 /* Save a 32 bit len */
2929 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
2930 if (fwrite(buf
,1,1,fp
) == 0) return -1;
2932 if (fwrite(&len
,4,1,fp
) == 0) return -1;
2937 /* String objects in the form "2391" "-100" without any space and with a
2938 * range of values that can fit in an 8, 16 or 32 bit signed value can be
2939 * encoded as integers to save space */
2940 static int rdbTryIntegerEncoding(sds s
, unsigned char *enc
) {
2942 char *endptr
, buf
[32];
2944 /* Check if it's possible to encode this value as a number */
2945 value
= strtoll(s
, &endptr
, 10);
2946 if (endptr
[0] != '\0') return 0;
2947 snprintf(buf
,32,"%lld",value
);
2949 /* If the number converted back into a string is not identical
2950 * then it's not possible to encode the string as integer */
2951 if (strlen(buf
) != sdslen(s
) || memcmp(buf
,s
,sdslen(s
))) return 0;
2953 /* Finally check if it fits in our ranges */
2954 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
2955 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
2956 enc
[1] = value
&0xFF;
2958 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
2959 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
2960 enc
[1] = value
&0xFF;
2961 enc
[2] = (value
>>8)&0xFF;
2963 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
2964 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
2965 enc
[1] = value
&0xFF;
2966 enc
[2] = (value
>>8)&0xFF;
2967 enc
[3] = (value
>>16)&0xFF;
2968 enc
[4] = (value
>>24)&0xFF;
2975 static int rdbSaveLzfStringObject(FILE *fp
, robj
*obj
) {
2976 unsigned int comprlen
, outlen
;
2980 /* We require at least four bytes compression for this to be worth it */
2981 outlen
= sdslen(obj
->ptr
)-4;
2982 if (outlen
<= 0) return 0;
2983 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
2984 comprlen
= lzf_compress(obj
->ptr
, sdslen(obj
->ptr
), out
, outlen
);
2985 if (comprlen
== 0) {
2989 /* Data compressed! Let's save it on disk */
2990 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
2991 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
2992 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
2993 if (rdbSaveLen(fp
,sdslen(obj
->ptr
)) == -1) goto writeerr
;
2994 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3003 /* Save a string objet as [len][data] on disk. If the object is a string
3004 * representation of an integer value we try to safe it in a special form */
3005 static int rdbSaveStringObjectRaw(FILE *fp
, robj
*obj
) {
3009 len
= sdslen(obj
->ptr
);
3011 /* Try integer encoding */
3013 unsigned char buf
[5];
3014 if ((enclen
= rdbTryIntegerEncoding(obj
->ptr
,buf
)) > 0) {
3015 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3020 /* Try LZF compression - under 20 bytes it's unable to compress even
3021 * aaaaaaaaaaaaaaaaaa so skip it */
3022 if (server
.rdbcompression
&& len
> 20) {
3025 retval
= rdbSaveLzfStringObject(fp
,obj
);
3026 if (retval
== -1) return -1;
3027 if (retval
> 0) return 0;
3028 /* retval == 0 means data can't be compressed, save the old way */
3031 /* Store verbatim */
3032 if (rdbSaveLen(fp
,len
) == -1) return -1;
3033 if (len
&& fwrite(obj
->ptr
,len
,1,fp
) == 0) return -1;
3037 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3038 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3041 /* Avoid incr/decr ref count business when possible.
3042 * This plays well with copy-on-write given that we are probably
3043 * in a child process (BGSAVE). Also this makes sure key objects
3044 * of swapped objects are not incRefCount-ed (an assert does not allow
3045 * this in order to avoid bugs) */
3046 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
3047 obj
= getDecodedObject(obj
);
3048 retval
= rdbSaveStringObjectRaw(fp
,obj
);
3051 retval
= rdbSaveStringObjectRaw(fp
,obj
);
3056 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3057 * 8 bit integer specifing the length of the representation.
3058 * This 8 bit integer has special values in order to specify the following
3064 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3065 unsigned char buf
[128];
3071 } else if (!isfinite(val
)) {
3073 buf
[0] = (val
< 0) ? 255 : 254;
3075 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3076 buf
[0] = strlen((char*)buf
+1);
3079 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3083 /* Save a Redis object. */
3084 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3085 if (o
->type
== REDIS_STRING
) {
3086 /* Save a string value */
3087 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3088 } else if (o
->type
== REDIS_LIST
) {
3089 /* Save a list value */
3090 list
*list
= o
->ptr
;
3094 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3095 listRewind(list
,&li
);
3096 while((ln
= listNext(&li
))) {
3097 robj
*eleobj
= listNodeValue(ln
);
3099 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3101 } else if (o
->type
== REDIS_SET
) {
3102 /* Save a set value */
3104 dictIterator
*di
= dictGetIterator(set
);
3107 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3108 while((de
= dictNext(di
)) != NULL
) {
3109 robj
*eleobj
= dictGetEntryKey(de
);
3111 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3113 dictReleaseIterator(di
);
3114 } else if (o
->type
== REDIS_ZSET
) {
3115 /* Save a set value */
3117 dictIterator
*di
= dictGetIterator(zs
->dict
);
3120 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3121 while((de
= dictNext(di
)) != NULL
) {
3122 robj
*eleobj
= dictGetEntryKey(de
);
3123 double *score
= dictGetEntryVal(de
);
3125 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3126 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3128 dictReleaseIterator(di
);
3130 redisAssert(0 != 0);
3135 /* Return the length the object will have on disk if saved with
3136 * the rdbSaveObject() function. Currently we use a trick to get
3137 * this length with very little changes to the code. In the future
3138 * we could switch to a faster solution. */
3139 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3140 if (fp
== NULL
) fp
= server
.devnull
;
3142 assert(rdbSaveObject(fp
,o
) != 1);
3146 /* Return the number of pages required to save this object in the swap file */
3147 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3148 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3150 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3153 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3154 static int rdbSave(char *filename
) {
3155 dictIterator
*di
= NULL
;
3160 time_t now
= time(NULL
);
3162 /* Wait for I/O therads to terminate, just in case this is a
3163 * foreground-saving, to avoid seeking the swap file descriptor at the
3165 if (server
.vm_enabled
)
3166 waitEmptyIOJobsQueue();
3168 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3169 fp
= fopen(tmpfile
,"w");
3171 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3174 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3175 for (j
= 0; j
< server
.dbnum
; j
++) {
3176 redisDb
*db
= server
.db
+j
;
3178 if (dictSize(d
) == 0) continue;
3179 di
= dictGetIterator(d
);
3185 /* Write the SELECT DB opcode */
3186 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3187 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3189 /* Iterate this DB writing every entry */
3190 while((de
= dictNext(di
)) != NULL
) {
3191 robj
*key
= dictGetEntryKey(de
);
3192 robj
*o
= dictGetEntryVal(de
);
3193 time_t expiretime
= getExpire(db
,key
);
3195 /* Save the expire time */
3196 if (expiretime
!= -1) {
3197 /* If this key is already expired skip it */
3198 if (expiretime
< now
) continue;
3199 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3200 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3202 /* Save the key and associated value. This requires special
3203 * handling if the value is swapped out. */
3204 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
3205 key
->storage
== REDIS_VM_SWAPPING
) {
3206 /* Save type, key, value */
3207 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3208 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3209 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3211 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3213 /* Get a preview of the object in memory */
3214 po
= vmPreviewObject(key
);
3215 /* Save type, key, value */
3216 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
;
3217 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3218 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3219 /* Remove the loaded object from memory */
3223 dictReleaseIterator(di
);
3226 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3228 /* Make sure data will not remain on the OS's output buffers */
3233 /* Use RENAME to make sure the DB file is changed atomically only
3234 * if the generate DB file is ok. */
3235 if (rename(tmpfile
,filename
) == -1) {
3236 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3240 redisLog(REDIS_NOTICE
,"DB saved on disk");
3242 server
.lastsave
= time(NULL
);
3248 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3249 if (di
) dictReleaseIterator(di
);
3253 static int rdbSaveBackground(char *filename
) {
3256 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3257 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3258 if ((childpid
= fork()) == 0) {
3260 if (server
.vm_enabled
) vmReopenSwapFile();
3262 if (rdbSave(filename
) == REDIS_OK
) {
3269 if (childpid
== -1) {
3270 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3274 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3275 server
.bgsavechildpid
= childpid
;
3278 return REDIS_OK
; /* unreached */
3281 static void rdbRemoveTempFile(pid_t childpid
) {
3284 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
3288 static int rdbLoadType(FILE *fp
) {
3290 if (fread(&type
,1,1,fp
) == 0) return -1;
3294 static time_t rdbLoadTime(FILE *fp
) {
3296 if (fread(&t32
,4,1,fp
) == 0) return -1;
3297 return (time_t) t32
;
3300 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3301 * of this file for a description of how this are stored on disk.
3303 * isencoded is set to 1 if the readed length is not actually a length but
3304 * an "encoding type", check the above comments for more info */
3305 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
3306 unsigned char buf
[2];
3310 if (isencoded
) *isencoded
= 0;
3311 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3312 type
= (buf
[0]&0xC0)>>6;
3313 if (type
== REDIS_RDB_6BITLEN
) {
3314 /* Read a 6 bit len */
3316 } else if (type
== REDIS_RDB_ENCVAL
) {
3317 /* Read a 6 bit len encoding type */
3318 if (isencoded
) *isencoded
= 1;
3320 } else if (type
== REDIS_RDB_14BITLEN
) {
3321 /* Read a 14 bit len */
3322 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3323 return ((buf
[0]&0x3F)<<8)|buf
[1];
3325 /* Read a 32 bit len */
3326 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
3331 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
) {
3332 unsigned char enc
[4];
3335 if (enctype
== REDIS_RDB_ENC_INT8
) {
3336 if (fread(enc
,1,1,fp
) == 0) return NULL
;
3337 val
= (signed char)enc
[0];
3338 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
3340 if (fread(enc
,2,1,fp
) == 0) return NULL
;
3341 v
= enc
[0]|(enc
[1]<<8);
3343 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
3345 if (fread(enc
,4,1,fp
) == 0) return NULL
;
3346 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
3349 val
= 0; /* anti-warning */
3352 return createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",val
));
3355 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
3356 unsigned int len
, clen
;
3357 unsigned char *c
= NULL
;
3360 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3361 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3362 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
3363 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
3364 if (fread(c
,clen
,1,fp
) == 0) goto err
;
3365 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
3367 return createObject(REDIS_STRING
,val
);
3374 static robj
*rdbLoadStringObject(FILE*fp
) {
3379 len
= rdbLoadLen(fp
,&isencoded
);
3382 case REDIS_RDB_ENC_INT8
:
3383 case REDIS_RDB_ENC_INT16
:
3384 case REDIS_RDB_ENC_INT32
:
3385 return tryObjectSharing(rdbLoadIntegerObject(fp
,len
));
3386 case REDIS_RDB_ENC_LZF
:
3387 return tryObjectSharing(rdbLoadLzfStringObject(fp
));
3393 if (len
== REDIS_RDB_LENERR
) return NULL
;
3394 val
= sdsnewlen(NULL
,len
);
3395 if (len
&& fread(val
,len
,1,fp
) == 0) {
3399 return tryObjectSharing(createObject(REDIS_STRING
,val
));
3402 /* For information about double serialization check rdbSaveDoubleValue() */
3403 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
3407 if (fread(&len
,1,1,fp
) == 0) return -1;
3409 case 255: *val
= R_NegInf
; return 0;
3410 case 254: *val
= R_PosInf
; return 0;
3411 case 253: *val
= R_Nan
; return 0;
3413 if (fread(buf
,len
,1,fp
) == 0) return -1;
3415 sscanf(buf
, "%lg", val
);
3420 /* Load a Redis object of the specified type from the specified file.
3421 * On success a newly allocated object is returned, otherwise NULL. */
3422 static robj
*rdbLoadObject(int type
, FILE *fp
) {
3425 if (type
== REDIS_STRING
) {
3426 /* Read string value */
3427 if ((o
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3428 tryObjectEncoding(o
);
3429 } else if (type
== REDIS_LIST
|| type
== REDIS_SET
) {
3430 /* Read list/set value */
3433 if ((listlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3434 o
= (type
== REDIS_LIST
) ? createListObject() : createSetObject();
3435 /* It's faster to expand the dict to the right size asap in order
3436 * to avoid rehashing */
3437 if (type
== REDIS_SET
&& listlen
> DICT_HT_INITIAL_SIZE
)
3438 dictExpand(o
->ptr
,listlen
);
3439 /* Load every single element of the list/set */
3443 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3444 tryObjectEncoding(ele
);
3445 if (type
== REDIS_LIST
) {
3446 listAddNodeTail((list
*)o
->ptr
,ele
);
3448 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
3451 } else if (type
== REDIS_ZSET
) {
3452 /* Read list/set value */
3456 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3457 o
= createZsetObject();
3459 /* Load every single element of the list/set */
3462 double *score
= zmalloc(sizeof(double));
3464 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3465 tryObjectEncoding(ele
);
3466 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
3467 dictAdd(zs
->dict
,ele
,score
);
3468 zslInsert(zs
->zsl
,*score
,ele
);
3469 incrRefCount(ele
); /* added to skiplist */
3472 redisAssert(0 != 0);
3477 static int rdbLoad(char *filename
) {
3479 robj
*keyobj
= NULL
;
3481 int type
, retval
, rdbver
;
3482 dict
*d
= server
.db
[0].dict
;
3483 redisDb
*db
= server
.db
+0;
3485 time_t expiretime
= -1, now
= time(NULL
);
3486 long long loadedkeys
= 0;
3488 fp
= fopen(filename
,"r");
3489 if (!fp
) return REDIS_ERR
;
3490 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
3492 if (memcmp(buf
,"REDIS",5) != 0) {
3494 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
3497 rdbver
= atoi(buf
+5);
3500 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
3507 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3508 if (type
== REDIS_EXPIRETIME
) {
3509 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
3510 /* We read the time so we need to read the object type again */
3511 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3513 if (type
== REDIS_EOF
) break;
3514 /* Handle SELECT DB opcode as a special case */
3515 if (type
== REDIS_SELECTDB
) {
3516 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
3518 if (dbid
>= (unsigned)server
.dbnum
) {
3519 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
3522 db
= server
.db
+dbid
;
3527 if ((keyobj
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
3529 if ((o
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
3530 /* Add the new object in the hash table */
3531 retval
= dictAdd(d
,keyobj
,o
);
3532 if (retval
== DICT_ERR
) {
3533 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj
->ptr
);
3536 /* Set the expire time if needed */
3537 if (expiretime
!= -1) {
3538 setExpire(db
,keyobj
,expiretime
);
3539 /* Delete this key if already expired */
3540 if (expiretime
< now
) deleteKey(db
,keyobj
);
3544 /* Handle swapping while loading big datasets when VM is on */
3546 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
3547 while (zmalloc_used_memory() > server
.vm_max_memory
) {
3548 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
3555 eoferr
: /* unexpected end of file is handled here with a fatal exit */
3556 if (keyobj
) decrRefCount(keyobj
);
3557 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
3559 return REDIS_ERR
; /* Just to avoid warning */
3562 /*================================== Commands =============================== */
3564 static void authCommand(redisClient
*c
) {
3565 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
3566 c
->authenticated
= 1;
3567 addReply(c
,shared
.ok
);
3569 c
->authenticated
= 0;
3570 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
3574 static void pingCommand(redisClient
*c
) {
3575 addReply(c
,shared
.pong
);
3578 static void echoCommand(redisClient
*c
) {
3579 addReplyBulkLen(c
,c
->argv
[1]);
3580 addReply(c
,c
->argv
[1]);
3581 addReply(c
,shared
.crlf
);
3584 /*=================================== Strings =============================== */
3586 static void setGenericCommand(redisClient
*c
, int nx
) {
3589 if (nx
) deleteIfVolatile(c
->db
,c
->argv
[1]);
3590 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3591 if (retval
== DICT_ERR
) {
3593 /* If the key is about a swapped value, we want a new key object
3594 * to overwrite the old. So we delete the old key in the database.
3595 * This will also make sure that swap pages about the old object
3596 * will be marked as free. */
3597 if (server
.vm_enabled
&& deleteIfSwapped(c
->db
,c
->argv
[1]))
3598 incrRefCount(c
->argv
[1]);
3599 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3600 incrRefCount(c
->argv
[2]);
3602 addReply(c
,shared
.czero
);
3606 incrRefCount(c
->argv
[1]);
3607 incrRefCount(c
->argv
[2]);
3610 removeExpire(c
->db
,c
->argv
[1]);
3611 addReply(c
, nx
? shared
.cone
: shared
.ok
);
3614 static void setCommand(redisClient
*c
) {
3615 setGenericCommand(c
,0);
3618 static void setnxCommand(redisClient
*c
) {
3619 setGenericCommand(c
,1);
3622 static int getGenericCommand(redisClient
*c
) {
3623 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[1]);
3626 addReply(c
,shared
.nullbulk
);
3629 if (o
->type
!= REDIS_STRING
) {
3630 addReply(c
,shared
.wrongtypeerr
);
3633 addReplyBulkLen(c
,o
);
3635 addReply(c
,shared
.crlf
);
3641 static void getCommand(redisClient
*c
) {
3642 getGenericCommand(c
);
3645 static void getsetCommand(redisClient
*c
) {
3646 if (getGenericCommand(c
) == REDIS_ERR
) return;
3647 if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) {
3648 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3650 incrRefCount(c
->argv
[1]);
3652 incrRefCount(c
->argv
[2]);
3654 removeExpire(c
->db
,c
->argv
[1]);
3657 static void mgetCommand(redisClient
*c
) {
3660 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
3661 for (j
= 1; j
< c
->argc
; j
++) {
3662 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
3664 addReply(c
,shared
.nullbulk
);
3666 if (o
->type
!= REDIS_STRING
) {
3667 addReply(c
,shared
.nullbulk
);
3669 addReplyBulkLen(c
,o
);
3671 addReply(c
,shared
.crlf
);
3677 static void msetGenericCommand(redisClient
*c
, int nx
) {
3678 int j
, busykeys
= 0;
3680 if ((c
->argc
% 2) == 0) {
3681 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
3684 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
3685 * set nothing at all if at least one already key exists. */
3687 for (j
= 1; j
< c
->argc
; j
+= 2) {
3688 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
3694 addReply(c
, shared
.czero
);
3698 for (j
= 1; j
< c
->argc
; j
+= 2) {
3701 tryObjectEncoding(c
->argv
[j
+1]);
3702 retval
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
3703 if (retval
== DICT_ERR
) {
3704 dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
3705 incrRefCount(c
->argv
[j
+1]);
3707 incrRefCount(c
->argv
[j
]);
3708 incrRefCount(c
->argv
[j
+1]);
3710 removeExpire(c
->db
,c
->argv
[j
]);
3712 server
.dirty
+= (c
->argc
-1)/2;
3713 addReply(c
, nx
? shared
.cone
: shared
.ok
);
3716 static void msetCommand(redisClient
*c
) {
3717 msetGenericCommand(c
,0);
3720 static void msetnxCommand(redisClient
*c
) {
3721 msetGenericCommand(c
,1);
3724 static void incrDecrCommand(redisClient
*c
, long long incr
) {
3729 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
3733 if (o
->type
!= REDIS_STRING
) {
3738 if (o
->encoding
== REDIS_ENCODING_RAW
)
3739 value
= strtoll(o
->ptr
, &eptr
, 10);
3740 else if (o
->encoding
== REDIS_ENCODING_INT
)
3741 value
= (long)o
->ptr
;
3743 redisAssert(1 != 1);
3748 o
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
3749 tryObjectEncoding(o
);
3750 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],o
);
3751 if (retval
== DICT_ERR
) {
3752 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
3753 removeExpire(c
->db
,c
->argv
[1]);
3755 incrRefCount(c
->argv
[1]);
3758 addReply(c
,shared
.colon
);
3760 addReply(c
,shared
.crlf
);
3763 static void incrCommand(redisClient
*c
) {
3764 incrDecrCommand(c
,1);
3767 static void decrCommand(redisClient
*c
) {
3768 incrDecrCommand(c
,-1);
3771 static void incrbyCommand(redisClient
*c
) {
3772 long long incr
= strtoll(c
->argv
[2]->ptr
, NULL
, 10);
3773 incrDecrCommand(c
,incr
);
3776 static void decrbyCommand(redisClient
*c
) {
3777 long long incr
= strtoll(c
->argv
[2]->ptr
, NULL
, 10);
3778 incrDecrCommand(c
,-incr
);
3781 static void appendCommand(redisClient
*c
) {
3786 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
3788 /* Create the key */
3789 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3790 incrRefCount(c
->argv
[1]);
3791 incrRefCount(c
->argv
[2]);
3792 totlen
= stringObjectLen(c
->argv
[2]);
3796 de
= dictFind(c
->db
->dict
,c
->argv
[1]);
3799 o
= dictGetEntryVal(de
);
3800 if (o
->type
!= REDIS_STRING
) {
3801 addReply(c
,shared
.wrongtypeerr
);
3804 /* If the object is specially encoded or shared we have to make
3806 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
3807 robj
*decoded
= getDecodedObject(o
);
3809 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
3810 decrRefCount(decoded
);
3811 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
3814 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
3815 o
->ptr
= sdscatlen(o
->ptr
,
3816 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
3818 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
3819 (unsigned long) c
->argv
[2]->ptr
);
3821 totlen
= sdslen(o
->ptr
);
3824 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
3827 static void substrCommand(redisClient
*c
) {
3829 long start
= atoi(c
->argv
[2]->ptr
);
3830 long end
= atoi(c
->argv
[3]->ptr
);
3832 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
3834 addReply(c
,shared
.nullbulk
);
3836 if (o
->type
!= REDIS_STRING
) {
3837 addReply(c
,shared
.wrongtypeerr
);
3839 size_t rangelen
, strlen
;
3842 o
= getDecodedObject(o
);
3843 strlen
= sdslen(o
->ptr
);
3845 /* convert negative indexes */
3846 if (start
< 0) start
= strlen
+start
;
3847 if (end
< 0) end
= strlen
+end
;
3848 if (start
< 0) start
= 0;
3849 if (end
< 0) end
= 0;
3851 /* indexes sanity checks */
3852 if (start
> end
|| (size_t)start
>= strlen
) {
3853 /* Out of range start or start > end result in null reply */
3854 addReply(c
,shared
.nullbulk
);
3858 if ((size_t)end
>= strlen
) end
= strlen
-1;
3859 rangelen
= (end
-start
)+1;
3861 /* Return the result */
3862 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",rangelen
));
3863 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
3864 addReplySds(c
,range
);
3865 addReply(c
,shared
.crlf
);
3871 /* ========================= Type agnostic commands ========================= */
3873 static void delCommand(redisClient
*c
) {
3876 for (j
= 1; j
< c
->argc
; j
++) {
3877 if (deleteKey(c
->db
,c
->argv
[j
])) {
3884 addReply(c
,shared
.czero
);
3887 addReply(c
,shared
.cone
);
3890 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",deleted
));
3895 static void existsCommand(redisClient
*c
) {
3896 addReply(c
,lookupKeyRead(c
->db
,c
->argv
[1]) ? shared
.cone
: shared
.czero
);
3899 static void selectCommand(redisClient
*c
) {
3900 int id
= atoi(c
->argv
[1]->ptr
);
3902 if (selectDb(c
,id
) == REDIS_ERR
) {
3903 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
3905 addReply(c
,shared
.ok
);
3909 static void randomkeyCommand(redisClient
*c
) {
3913 de
= dictGetRandomKey(c
->db
->dict
);
3914 if (!de
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break;
3917 addReply(c
,shared
.plus
);
3918 addReply(c
,shared
.crlf
);
3920 addReply(c
,shared
.plus
);
3921 addReply(c
,dictGetEntryKey(de
));
3922 addReply(c
,shared
.crlf
);
3926 static void keysCommand(redisClient
*c
) {
3929 sds pattern
= c
->argv
[1]->ptr
;
3930 int plen
= sdslen(pattern
);
3931 unsigned long numkeys
= 0;
3932 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
3934 di
= dictGetIterator(c
->db
->dict
);
3936 decrRefCount(lenobj
);
3937 while((de
= dictNext(di
)) != NULL
) {
3938 robj
*keyobj
= dictGetEntryKey(de
);
3940 sds key
= keyobj
->ptr
;
3941 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
3942 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
3943 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
3944 addReplyBulkLen(c
,keyobj
);
3946 addReply(c
,shared
.crlf
);
3951 dictReleaseIterator(di
);
3952 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
3955 static void dbsizeCommand(redisClient
*c
) {
3957 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
3960 static void lastsaveCommand(redisClient
*c
) {
3962 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
3965 static void typeCommand(redisClient
*c
) {
3969 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
3974 case REDIS_STRING
: type
= "+string"; break;
3975 case REDIS_LIST
: type
= "+list"; break;
3976 case REDIS_SET
: type
= "+set"; break;
3977 case REDIS_ZSET
: type
= "+zset"; break;
3978 default: type
= "unknown"; break;
3981 addReplySds(c
,sdsnew(type
));
3982 addReply(c
,shared
.crlf
);
3985 static void saveCommand(redisClient
*c
) {
3986 if (server
.bgsavechildpid
!= -1) {
3987 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
3990 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
3991 addReply(c
,shared
.ok
);
3993 addReply(c
,shared
.err
);
3997 static void bgsaveCommand(redisClient
*c
) {
3998 if (server
.bgsavechildpid
!= -1) {
3999 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4002 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4003 char *status
= "+Background saving started\r\n";
4004 addReplySds(c
,sdsnew(status
));
4006 addReply(c
,shared
.err
);
4010 static void shutdownCommand(redisClient
*c
) {
4011 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4012 /* Kill the saving child if there is a background saving in progress.
4013 We want to avoid race conditions, for instance our saving child may
4014 overwrite the synchronous saving did by SHUTDOWN. */
4015 if (server
.bgsavechildpid
!= -1) {
4016 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4017 kill(server
.bgsavechildpid
,SIGKILL
);
4018 rdbRemoveTempFile(server
.bgsavechildpid
);
4020 if (server
.appendonly
) {
4021 /* Append only file: fsync() the AOF and exit */
4022 fsync(server
.appendfd
);
4023 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4026 /* Snapshotting. Perform a SYNC SAVE and exit */
4027 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4028 if (server
.daemonize
)
4029 unlink(server
.pidfile
);
4030 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4031 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4032 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4035 /* Ooops.. error saving! The best we can do is to continue operating.
4036 * Note that if there was a background saving process, in the next
4037 * cron() Redis will be notified that the background saving aborted,
4038 * handling special stuff like slaves pending for synchronization... */
4039 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4040 addReplySds(c
,sdsnew("-ERR can't quit, problems saving the DB\r\n"));
4045 static void renameGenericCommand(redisClient
*c
, int nx
) {
4048 /* To use the same key as src and dst is probably an error */
4049 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4050 addReply(c
,shared
.sameobjecterr
);
4054 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4056 addReply(c
,shared
.nokeyerr
);
4060 deleteIfVolatile(c
->db
,c
->argv
[2]);
4061 if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) {
4064 addReply(c
,shared
.czero
);
4067 dictReplace(c
->db
->dict
,c
->argv
[2],o
);
4069 incrRefCount(c
->argv
[2]);
4071 deleteKey(c
->db
,c
->argv
[1]);
4073 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4076 static void renameCommand(redisClient
*c
) {
4077 renameGenericCommand(c
,0);
4080 static void renamenxCommand(redisClient
*c
) {
4081 renameGenericCommand(c
,1);
4084 static void moveCommand(redisClient
*c
) {
4089 /* Obtain source and target DB pointers */
4092 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4093 addReply(c
,shared
.outofrangeerr
);
4097 selectDb(c
,srcid
); /* Back to the source DB */
4099 /* If the user is moving using as target the same
4100 * DB as the source DB it is probably an error. */
4102 addReply(c
,shared
.sameobjecterr
);
4106 /* Check if the element exists and get a reference */
4107 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4109 addReply(c
,shared
.czero
);
4113 /* Try to add the element to the target DB */
4114 deleteIfVolatile(dst
,c
->argv
[1]);
4115 if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) {
4116 addReply(c
,shared
.czero
);
4119 incrRefCount(c
->argv
[1]);
4122 /* OK! key moved, free the entry in the source DB */
4123 deleteKey(src
,c
->argv
[1]);
4125 addReply(c
,shared
.cone
);
4128 /* =================================== Lists ================================ */
4129 static void pushGenericCommand(redisClient
*c
, int where
) {
4133 lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4135 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4136 addReply(c
,shared
.cone
);
4139 lobj
= createListObject();
4141 if (where
== REDIS_HEAD
) {
4142 listAddNodeHead(list
,c
->argv
[2]);
4144 listAddNodeTail(list
,c
->argv
[2]);
4146 dictAdd(c
->db
->dict
,c
->argv
[1],lobj
);
4147 incrRefCount(c
->argv
[1]);
4148 incrRefCount(c
->argv
[2]);
4150 if (lobj
->type
!= REDIS_LIST
) {
4151 addReply(c
,shared
.wrongtypeerr
);
4154 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4155 addReply(c
,shared
.cone
);
4159 if (where
== REDIS_HEAD
) {
4160 listAddNodeHead(list
,c
->argv
[2]);
4162 listAddNodeTail(list
,c
->argv
[2]);
4164 incrRefCount(c
->argv
[2]);
4167 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",listLength(list
)));
4170 static void lpushCommand(redisClient
*c
) {
4171 pushGenericCommand(c
,REDIS_HEAD
);
4174 static void rpushCommand(redisClient
*c
) {
4175 pushGenericCommand(c
,REDIS_TAIL
);
4178 static void llenCommand(redisClient
*c
) {
4182 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4184 addReply(c
,shared
.czero
);
4187 if (o
->type
!= REDIS_LIST
) {
4188 addReply(c
,shared
.wrongtypeerr
);
4191 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",listLength(l
)));
4196 static void lindexCommand(redisClient
*c
) {
4198 int index
= atoi(c
->argv
[2]->ptr
);
4200 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4202 addReply(c
,shared
.nullbulk
);
4204 if (o
->type
!= REDIS_LIST
) {
4205 addReply(c
,shared
.wrongtypeerr
);
4207 list
*list
= o
->ptr
;
4210 ln
= listIndex(list
, index
);
4212 addReply(c
,shared
.nullbulk
);
4214 robj
*ele
= listNodeValue(ln
);
4215 addReplyBulkLen(c
,ele
);
4217 addReply(c
,shared
.crlf
);
4223 static void lsetCommand(redisClient
*c
) {
4225 int index
= atoi(c
->argv
[2]->ptr
);
4227 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4229 addReply(c
,shared
.nokeyerr
);
4231 if (o
->type
!= REDIS_LIST
) {
4232 addReply(c
,shared
.wrongtypeerr
);
4234 list
*list
= o
->ptr
;
4237 ln
= listIndex(list
, index
);
4239 addReply(c
,shared
.outofrangeerr
);
4241 robj
*ele
= listNodeValue(ln
);
4244 listNodeValue(ln
) = c
->argv
[3];
4245 incrRefCount(c
->argv
[3]);
4246 addReply(c
,shared
.ok
);
4253 static void popGenericCommand(redisClient
*c
, int where
) {
4256 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4258 addReply(c
,shared
.nullbulk
);
4260 if (o
->type
!= REDIS_LIST
) {
4261 addReply(c
,shared
.wrongtypeerr
);
4263 list
*list
= o
->ptr
;
4266 if (where
== REDIS_HEAD
)
4267 ln
= listFirst(list
);
4269 ln
= listLast(list
);
4272 addReply(c
,shared
.nullbulk
);
4274 robj
*ele
= listNodeValue(ln
);
4275 addReplyBulkLen(c
,ele
);
4277 addReply(c
,shared
.crlf
);
4278 listDelNode(list
,ln
);
4285 static void lpopCommand(redisClient
*c
) {
4286 popGenericCommand(c
,REDIS_HEAD
);
4289 static void rpopCommand(redisClient
*c
) {
4290 popGenericCommand(c
,REDIS_TAIL
);
4293 static void lrangeCommand(redisClient
*c
) {
4295 int start
= atoi(c
->argv
[2]->ptr
);
4296 int end
= atoi(c
->argv
[3]->ptr
);
4298 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4300 addReply(c
,shared
.nullmultibulk
);
4302 if (o
->type
!= REDIS_LIST
) {
4303 addReply(c
,shared
.wrongtypeerr
);
4305 list
*list
= o
->ptr
;
4307 int llen
= listLength(list
);
4311 /* convert negative indexes */
4312 if (start
< 0) start
= llen
+start
;
4313 if (end
< 0) end
= llen
+end
;
4314 if (start
< 0) start
= 0;
4315 if (end
< 0) end
= 0;
4317 /* indexes sanity checks */
4318 if (start
> end
|| start
>= llen
) {
4319 /* Out of range start or start > end result in empty list */
4320 addReply(c
,shared
.emptymultibulk
);
4323 if (end
>= llen
) end
= llen
-1;
4324 rangelen
= (end
-start
)+1;
4326 /* Return the result in form of a multi-bulk reply */
4327 ln
= listIndex(list
, start
);
4328 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
4329 for (j
= 0; j
< rangelen
; j
++) {
4330 ele
= listNodeValue(ln
);
4331 addReplyBulkLen(c
,ele
);
4333 addReply(c
,shared
.crlf
);
4340 static void ltrimCommand(redisClient
*c
) {
4342 int start
= atoi(c
->argv
[2]->ptr
);
4343 int end
= atoi(c
->argv
[3]->ptr
);
4345 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4347 addReply(c
,shared
.ok
);
4349 if (o
->type
!= REDIS_LIST
) {
4350 addReply(c
,shared
.wrongtypeerr
);
4352 list
*list
= o
->ptr
;
4354 int llen
= listLength(list
);
4355 int j
, ltrim
, rtrim
;
4357 /* convert negative indexes */
4358 if (start
< 0) start
= llen
+start
;
4359 if (end
< 0) end
= llen
+end
;
4360 if (start
< 0) start
= 0;
4361 if (end
< 0) end
= 0;
4363 /* indexes sanity checks */
4364 if (start
> end
|| start
>= llen
) {
4365 /* Out of range start or start > end result in empty list */
4369 if (end
>= llen
) end
= llen
-1;
4374 /* Remove list elements to perform the trim */
4375 for (j
= 0; j
< ltrim
; j
++) {
4376 ln
= listFirst(list
);
4377 listDelNode(list
,ln
);
4379 for (j
= 0; j
< rtrim
; j
++) {
4380 ln
= listLast(list
);
4381 listDelNode(list
,ln
);
4384 addReply(c
,shared
.ok
);
4389 static void lremCommand(redisClient
*c
) {
4392 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4394 addReply(c
,shared
.czero
);
4396 if (o
->type
!= REDIS_LIST
) {
4397 addReply(c
,shared
.wrongtypeerr
);
4399 list
*list
= o
->ptr
;
4400 listNode
*ln
, *next
;
4401 int toremove
= atoi(c
->argv
[2]->ptr
);
4406 toremove
= -toremove
;
4409 ln
= fromtail
? list
->tail
: list
->head
;
4411 robj
*ele
= listNodeValue(ln
);
4413 next
= fromtail
? ln
->prev
: ln
->next
;
4414 if (compareStringObjects(ele
,c
->argv
[3]) == 0) {
4415 listDelNode(list
,ln
);
4418 if (toremove
&& removed
== toremove
) break;
4422 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
4427 /* This is the semantic of this command:
4428 * RPOPLPUSH srclist dstlist:
4429 * IF LLEN(srclist) > 0
4430 * element = RPOP srclist
4431 * LPUSH dstlist element
4438 * The idea is to be able to get an element from a list in a reliable way
4439 * since the element is not just returned but pushed against another list
4440 * as well. This command was originally proposed by Ezra Zygmuntowicz.
4442 static void rpoplpushcommand(redisClient
*c
) {
4445 sobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4447 addReply(c
,shared
.nullbulk
);
4449 if (sobj
->type
!= REDIS_LIST
) {
4450 addReply(c
,shared
.wrongtypeerr
);
4452 list
*srclist
= sobj
->ptr
;
4453 listNode
*ln
= listLast(srclist
);
4456 addReply(c
,shared
.nullbulk
);
4458 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4459 robj
*ele
= listNodeValue(ln
);
4462 if (dobj
&& dobj
->type
!= REDIS_LIST
) {
4463 addReply(c
,shared
.wrongtypeerr
);
4467 /* Add the element to the target list (unless it's directly
4468 * passed to some BLPOP-ing client */
4469 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) {
4471 /* Create the list if the key does not exist */
4472 dobj
= createListObject();
4473 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
);
4474 incrRefCount(c
->argv
[2]);
4476 dstlist
= dobj
->ptr
;
4477 listAddNodeHead(dstlist
,ele
);
4481 /* Send the element to the client as reply as well */
4482 addReplyBulkLen(c
,ele
);
4484 addReply(c
,shared
.crlf
);
4486 /* Finally remove the element from the source list */
4487 listDelNode(srclist
,ln
);
4495 /* ==================================== Sets ================================ */
4497 static void saddCommand(redisClient
*c
) {
4500 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4502 set
= createSetObject();
4503 dictAdd(c
->db
->dict
,c
->argv
[1],set
);
4504 incrRefCount(c
->argv
[1]);
4506 if (set
->type
!= REDIS_SET
) {
4507 addReply(c
,shared
.wrongtypeerr
);
4511 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
4512 incrRefCount(c
->argv
[2]);
4514 addReply(c
,shared
.cone
);
4516 addReply(c
,shared
.czero
);
4520 static void sremCommand(redisClient
*c
) {
4523 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4525 addReply(c
,shared
.czero
);
4527 if (set
->type
!= REDIS_SET
) {
4528 addReply(c
,shared
.wrongtypeerr
);
4531 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
4533 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4534 addReply(c
,shared
.cone
);
4536 addReply(c
,shared
.czero
);
4541 static void smoveCommand(redisClient
*c
) {
4542 robj
*srcset
, *dstset
;
4544 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4545 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4547 /* If the source key does not exist return 0, if it's of the wrong type
4549 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
4550 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
4553 /* Error if the destination key is not a set as well */
4554 if (dstset
&& dstset
->type
!= REDIS_SET
) {
4555 addReply(c
,shared
.wrongtypeerr
);
4558 /* Remove the element from the source set */
4559 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
4560 /* Key not found in the src set! return zero */
4561 addReply(c
,shared
.czero
);
4565 /* Add the element to the destination set */
4567 dstset
= createSetObject();
4568 dictAdd(c
->db
->dict
,c
->argv
[2],dstset
);
4569 incrRefCount(c
->argv
[2]);
4571 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
4572 incrRefCount(c
->argv
[3]);
4573 addReply(c
,shared
.cone
);
4576 static void sismemberCommand(redisClient
*c
) {
4579 set
= lookupKeyRead(c
->db
,c
->argv
[1]);
4581 addReply(c
,shared
.czero
);
4583 if (set
->type
!= REDIS_SET
) {
4584 addReply(c
,shared
.wrongtypeerr
);
4587 if (dictFind(set
->ptr
,c
->argv
[2]))
4588 addReply(c
,shared
.cone
);
4590 addReply(c
,shared
.czero
);
4594 static void scardCommand(redisClient
*c
) {
4598 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4600 addReply(c
,shared
.czero
);
4603 if (o
->type
!= REDIS_SET
) {
4604 addReply(c
,shared
.wrongtypeerr
);
4607 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",
4613 static void spopCommand(redisClient
*c
) {
4617 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4619 addReply(c
,shared
.nullbulk
);
4621 if (set
->type
!= REDIS_SET
) {
4622 addReply(c
,shared
.wrongtypeerr
);
4625 de
= dictGetRandomKey(set
->ptr
);
4627 addReply(c
,shared
.nullbulk
);
4629 robj
*ele
= dictGetEntryKey(de
);
4631 addReplyBulkLen(c
,ele
);
4633 addReply(c
,shared
.crlf
);
4634 dictDelete(set
->ptr
,ele
);
4635 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4641 static void srandmemberCommand(redisClient
*c
) {
4645 set
= lookupKeyRead(c
->db
,c
->argv
[1]);
4647 addReply(c
,shared
.nullbulk
);
4649 if (set
->type
!= REDIS_SET
) {
4650 addReply(c
,shared
.wrongtypeerr
);
4653 de
= dictGetRandomKey(set
->ptr
);
4655 addReply(c
,shared
.nullbulk
);
4657 robj
*ele
= dictGetEntryKey(de
);
4659 addReplyBulkLen(c
,ele
);
4661 addReply(c
,shared
.crlf
);
4666 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
4667 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
4669 return dictSize(*d1
)-dictSize(*d2
);
4672 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
4673 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4676 robj
*lenobj
= NULL
, *dstset
= NULL
;
4677 unsigned long j
, cardinality
= 0;
4679 for (j
= 0; j
< setsnum
; j
++) {
4683 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4684 lookupKeyRead(c
->db
,setskeys
[j
]);
4688 if (deleteKey(c
->db
,dstkey
))
4690 addReply(c
,shared
.czero
);
4692 addReply(c
,shared
.nullmultibulk
);
4696 if (setobj
->type
!= REDIS_SET
) {
4698 addReply(c
,shared
.wrongtypeerr
);
4701 dv
[j
] = setobj
->ptr
;
4703 /* Sort sets from the smallest to largest, this will improve our
4704 * algorithm's performace */
4705 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
4707 /* The first thing we should output is the total number of elements...
4708 * since this is a multi-bulk write, but at this stage we don't know
4709 * the intersection set size, so we use a trick, append an empty object
4710 * to the output list and save the pointer to later modify it with the
4713 lenobj
= createObject(REDIS_STRING
,NULL
);
4715 decrRefCount(lenobj
);
4717 /* If we have a target key where to store the resulting set
4718 * create this key with an empty set inside */
4719 dstset
= createSetObject();
4722 /* Iterate all the elements of the first (smallest) set, and test
4723 * the element against all the other sets, if at least one set does
4724 * not include the element it is discarded */
4725 di
= dictGetIterator(dv
[0]);
4727 while((de
= dictNext(di
)) != NULL
) {
4730 for (j
= 1; j
< setsnum
; j
++)
4731 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
4733 continue; /* at least one set does not contain the member */
4734 ele
= dictGetEntryKey(de
);
4736 addReplyBulkLen(c
,ele
);
4738 addReply(c
,shared
.crlf
);
4741 dictAdd(dstset
->ptr
,ele
,NULL
);
4745 dictReleaseIterator(di
);
4748 /* Store the resulting set into the target */
4749 deleteKey(c
->db
,dstkey
);
4750 dictAdd(c
->db
->dict
,dstkey
,dstset
);
4751 incrRefCount(dstkey
);
4755 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
4757 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",
4758 dictSize((dict
*)dstset
->ptr
)));
4764 static void sinterCommand(redisClient
*c
) {
4765 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
4768 static void sinterstoreCommand(redisClient
*c
) {
4769 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
4772 #define REDIS_OP_UNION 0
4773 #define REDIS_OP_DIFF 1
4775 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
4776 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4779 robj
*dstset
= NULL
;
4780 int j
, cardinality
= 0;
4782 for (j
= 0; j
< setsnum
; j
++) {
4786 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4787 lookupKeyRead(c
->db
,setskeys
[j
]);
4792 if (setobj
->type
!= REDIS_SET
) {
4794 addReply(c
,shared
.wrongtypeerr
);
4797 dv
[j
] = setobj
->ptr
;
4800 /* We need a temp set object to store our union. If the dstkey
4801 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
4802 * this set object will be the resulting object to set into the target key*/
4803 dstset
= createSetObject();
4805 /* Iterate all the elements of all the sets, add every element a single
4806 * time to the result set */
4807 for (j
= 0; j
< setsnum
; j
++) {
4808 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
4809 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
4811 di
= dictGetIterator(dv
[j
]);
4813 while((de
= dictNext(di
)) != NULL
) {
4816 /* dictAdd will not add the same element multiple times */
4817 ele
= dictGetEntryKey(de
);
4818 if (op
== REDIS_OP_UNION
|| j
== 0) {
4819 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
4823 } else if (op
== REDIS_OP_DIFF
) {
4824 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
4829 dictReleaseIterator(di
);
4831 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break; /* result set is empty */
4834 /* Output the content of the resulting set, if not in STORE mode */
4836 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
4837 di
= dictGetIterator(dstset
->ptr
);
4838 while((de
= dictNext(di
)) != NULL
) {
4841 ele
= dictGetEntryKey(de
);
4842 addReplyBulkLen(c
,ele
);
4844 addReply(c
,shared
.crlf
);
4846 dictReleaseIterator(di
);
4848 /* If we have a target key where to store the resulting set
4849 * create this key with the result set inside */
4850 deleteKey(c
->db
,dstkey
);
4851 dictAdd(c
->db
->dict
,dstkey
,dstset
);
4852 incrRefCount(dstkey
);
4857 decrRefCount(dstset
);
4859 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",
4860 dictSize((dict
*)dstset
->ptr
)));
4866 static void sunionCommand(redisClient
*c
) {
4867 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
4870 static void sunionstoreCommand(redisClient
*c
) {
4871 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
4874 static void sdiffCommand(redisClient
*c
) {
4875 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
4878 static void sdiffstoreCommand(redisClient
*c
) {
4879 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
4882 /* ==================================== ZSets =============================== */
4884 /* ZSETs are ordered sets using two data structures to hold the same elements
4885 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
4888 * The elements are added to an hash table mapping Redis objects to scores.
4889 * At the same time the elements are added to a skip list mapping scores
4890 * to Redis objects (so objects are sorted by scores in this "view"). */
4892 /* This skiplist implementation is almost a C translation of the original
4893 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
4894 * Alternative to Balanced Trees", modified in three ways:
4895 * a) this implementation allows for repeated values.
4896 * b) the comparison is not just by key (our 'score') but by satellite data.
4897 * c) there is a back pointer, so it's a doubly linked list with the back
4898 * pointers being only at "level 1". This allows to traverse the list
4899 * from tail to head, useful for ZREVRANGE. */
4901 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
4902 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
4904 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
4906 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
4912 static zskiplist
*zslCreate(void) {
4916 zsl
= zmalloc(sizeof(*zsl
));
4919 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
4920 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
4921 zsl
->header
->forward
[j
] = NULL
;
4923 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
4924 if (j
< ZSKIPLIST_MAXLEVEL
-1)
4925 zsl
->header
->span
[j
] = 0;
4927 zsl
->header
->backward
= NULL
;
4932 static void zslFreeNode(zskiplistNode
*node
) {
4933 decrRefCount(node
->obj
);
4934 zfree(node
->forward
);
4939 static void zslFree(zskiplist
*zsl
) {
4940 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
4942 zfree(zsl
->header
->forward
);
4943 zfree(zsl
->header
->span
);
4946 next
= node
->forward
[0];
4953 static int zslRandomLevel(void) {
4955 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
4960 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
4961 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
4962 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
4966 for (i
= zsl
->level
-1; i
>= 0; i
--) {
4967 /* store rank that is crossed to reach the insert position */
4968 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
4970 while (x
->forward
[i
] &&
4971 (x
->forward
[i
]->score
< score
||
4972 (x
->forward
[i
]->score
== score
&&
4973 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
4974 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
4979 /* we assume the key is not already inside, since we allow duplicated
4980 * scores, and the re-insertion of score and redis object should never
4981 * happpen since the caller of zslInsert() should test in the hash table
4982 * if the element is already inside or not. */
4983 level
= zslRandomLevel();
4984 if (level
> zsl
->level
) {
4985 for (i
= zsl
->level
; i
< level
; i
++) {
4987 update
[i
] = zsl
->header
;
4988 update
[i
]->span
[i
-1] = zsl
->length
;
4992 x
= zslCreateNode(level
,score
,obj
);
4993 for (i
= 0; i
< level
; i
++) {
4994 x
->forward
[i
] = update
[i
]->forward
[i
];
4995 update
[i
]->forward
[i
] = x
;
4997 /* update span covered by update[i] as x is inserted here */
4999 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5000 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5004 /* increment span for untouched levels */
5005 for (i
= level
; i
< zsl
->level
; i
++) {
5006 update
[i
]->span
[i
-1]++;
5009 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
5011 x
->forward
[0]->backward
= x
;
5017 /* Delete an element with matching score/object from the skiplist. */
5018 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
5019 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5023 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5024 while (x
->forward
[i
] &&
5025 (x
->forward
[i
]->score
< score
||
5026 (x
->forward
[i
]->score
== score
&&
5027 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
5031 /* We may have multiple elements with the same score, what we need
5032 * is to find the element with both the right score and object. */
5034 if (x
&& score
== x
->score
&& compareStringObjects(x
->obj
,obj
) == 0) {
5035 for (i
= 0; i
< zsl
->level
; i
++) {
5036 if (update
[i
]->forward
[i
] == x
) {
5038 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5040 update
[i
]->forward
[i
] = x
->forward
[i
];
5042 /* invariant: i > 0, because update[0]->forward[0]
5043 * is always equal to x */
5044 update
[i
]->span
[i
-1] -= 1;
5047 if (x
->forward
[0]) {
5048 x
->forward
[0]->backward
= x
->backward
;
5050 zsl
->tail
= x
->backward
;
5053 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5058 return 0; /* not found */
5060 return 0; /* not found */
5063 /* Delete all the elements with score between min and max from the skiplist.
5064 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5065 * Note that this function takes the reference to the hash table view of the
5066 * sorted set, in order to remove the elements from the hash table too. */
5067 static unsigned long zslDeleteRange(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
5068 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5069 unsigned long removed
= 0;
5073 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5074 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
5078 /* We may have multiple elements with the same score, what we need
5079 * is to find the element with both the right score and object. */
5081 while (x
&& x
->score
<= max
) {
5082 zskiplistNode
*next
;
5084 for (i
= 0; i
< zsl
->level
; i
++) {
5085 if (update
[i
]->forward
[i
] == x
) {
5087 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5089 update
[i
]->forward
[i
] = x
->forward
[i
];
5091 /* invariant: i > 0, because update[0]->forward[0]
5092 * is always equal to x */
5093 update
[i
]->span
[i
-1] -= 1;
5096 if (x
->forward
[0]) {
5097 x
->forward
[0]->backward
= x
->backward
;
5099 zsl
->tail
= x
->backward
;
5101 next
= x
->forward
[0];
5102 dictDelete(dict
,x
->obj
);
5104 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5110 return removed
; /* not found */
5113 /* Find the first node having a score equal or greater than the specified one.
5114 * Returns NULL if there is no match. */
5115 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
5120 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5121 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
5124 /* We may have multiple elements with the same score, what we need
5125 * is to find the element with both the right score and object. */
5126 return x
->forward
[0];
5129 /* Find the rank for an element by both score and key.
5130 * Returns 0 when the element cannot be found, rank otherwise.
5131 * Note that the rank is 1-based due to the span of zsl->header to the
5133 static unsigned long zslGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
5135 unsigned long rank
= 0;
5139 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5140 while (x
->forward
[i
] &&
5141 (x
->forward
[i
]->score
< score
||
5142 (x
->forward
[i
]->score
== score
&&
5143 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
5144 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
5148 /* x might be equal to zsl->header, so test if obj is non-NULL */
5149 if (x
->obj
&& compareStringObjects(x
->obj
,o
) == 0) {
5156 /* Finds an element by its rank. The rank argument needs to be 1-based. */
5157 zskiplistNode
* zslGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
5159 unsigned long traversed
= 0;
5163 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5164 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) <= rank
) {
5165 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5169 if (traversed
== rank
) {
5176 /* The actual Z-commands implementations */
5178 /* This generic command implements both ZADD and ZINCRBY.
5179 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
5180 * the increment if the operation is a ZINCRBY (doincrement == 1). */
5181 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
5186 zsetobj
= lookupKeyWrite(c
->db
,key
);
5187 if (zsetobj
== NULL
) {
5188 zsetobj
= createZsetObject();
5189 dictAdd(c
->db
->dict
,key
,zsetobj
);
5192 if (zsetobj
->type
!= REDIS_ZSET
) {
5193 addReply(c
,shared
.wrongtypeerr
);
5199 /* Ok now since we implement both ZADD and ZINCRBY here the code
5200 * needs to handle the two different conditions. It's all about setting
5201 * '*score', that is, the new score to set, to the right value. */
5202 score
= zmalloc(sizeof(double));
5206 /* Read the old score. If the element was not present starts from 0 */
5207 de
= dictFind(zs
->dict
,ele
);
5209 double *oldscore
= dictGetEntryVal(de
);
5210 *score
= *oldscore
+ scoreval
;
5218 /* What follows is a simple remove and re-insert operation that is common
5219 * to both ZADD and ZINCRBY... */
5220 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
5221 /* case 1: New element */
5222 incrRefCount(ele
); /* added to hash */
5223 zslInsert(zs
->zsl
,*score
,ele
);
5224 incrRefCount(ele
); /* added to skiplist */
5227 addReplyDouble(c
,*score
);
5229 addReply(c
,shared
.cone
);
5234 /* case 2: Score update operation */
5235 de
= dictFind(zs
->dict
,ele
);
5236 redisAssert(de
!= NULL
);
5237 oldscore
= dictGetEntryVal(de
);
5238 if (*score
!= *oldscore
) {
5241 /* Remove and insert the element in the skip list with new score */
5242 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
5243 redisAssert(deleted
!= 0);
5244 zslInsert(zs
->zsl
,*score
,ele
);
5246 /* Update the score in the hash table */
5247 dictReplace(zs
->dict
,ele
,score
);
5253 addReplyDouble(c
,*score
);
5255 addReply(c
,shared
.czero
);
5259 static void zaddCommand(redisClient
*c
) {
5262 scoreval
= strtod(c
->argv
[2]->ptr
,NULL
);
5263 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
5266 static void zincrbyCommand(redisClient
*c
) {
5269 scoreval
= strtod(c
->argv
[2]->ptr
,NULL
);
5270 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
5273 static void zremCommand(redisClient
*c
) {
5277 zsetobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5278 if (zsetobj
== NULL
) {
5279 addReply(c
,shared
.czero
);
5285 if (zsetobj
->type
!= REDIS_ZSET
) {
5286 addReply(c
,shared
.wrongtypeerr
);
5290 de
= dictFind(zs
->dict
,c
->argv
[2]);
5292 addReply(c
,shared
.czero
);
5295 /* Delete from the skiplist */
5296 oldscore
= dictGetEntryVal(de
);
5297 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
5298 redisAssert(deleted
!= 0);
5300 /* Delete from the hash table */
5301 dictDelete(zs
->dict
,c
->argv
[2]);
5302 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5304 addReply(c
,shared
.cone
);
5308 static void zremrangebyscoreCommand(redisClient
*c
) {
5309 double min
= strtod(c
->argv
[2]->ptr
,NULL
);
5310 double max
= strtod(c
->argv
[3]->ptr
,NULL
);
5314 zsetobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5315 if (zsetobj
== NULL
) {
5316 addReply(c
,shared
.czero
);
5320 if (zsetobj
->type
!= REDIS_ZSET
) {
5321 addReply(c
,shared
.wrongtypeerr
);
5325 deleted
= zslDeleteRange(zs
->zsl
,min
,max
,zs
->dict
);
5326 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5327 server
.dirty
+= deleted
;
5328 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",deleted
));
5332 /* This command merges 2 or more zsets to a destination. When an element
5333 * does not exist in a certain set, score 0 is assumed. The score for an
5334 * element across sets is summed. */
5335 static void zmergeGenericCommand(redisClient
*c
, int readweights
) {
5339 robj
*dstkey
= c
->argv
[1], *dstobj
;
5344 zsetnum
= c
->argc
-2;
5346 /* force number of arguments to be even */
5347 if (zsetnum
% 2 > 0) {
5348 addReplySds(c
,sdsnew("-ERR wrong number of arguments for ZMERGEWEIGHED\r\n"));
5354 addReply(c
,shared
.syntaxerr
);
5358 srcdict
= zmalloc(sizeof(dict
*) * zsetnum
);
5359 weights
= zmalloc(sizeof(double) * zsetnum
);
5360 for (i
= 0; i
< zsetnum
; i
++) {
5363 weights
[i
] = strtod(c
->argv
[j
+1]->ptr
, NULL
);
5369 robj
*zsetobj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
5373 if (zsetobj
->type
!= REDIS_ZSET
) {
5376 addReply(c
,shared
.wrongtypeerr
);
5379 srcdict
[i
] = ((zset
*)zsetobj
->ptr
)->dict
;
5383 dstobj
= createZsetObject();
5385 for (i
= 0; i
< zsetnum
; i
++) {
5386 if (!srcdict
[i
]) continue;
5388 di
= dictGetIterator(srcdict
[i
]);
5389 while((de
= dictNext(di
)) != NULL
) {
5390 /* skip key when already processed */
5391 if (dictFind(dst
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
5393 double *score
= zmalloc(sizeof(double));
5395 for (j
= 0; j
< zsetnum
; j
++) {
5396 if (!srcdict
[j
]) continue;
5398 dictEntry
*other
= dictFind(srcdict
[j
],dictGetEntryKey(de
));
5400 *score
= *score
+ weights
[j
] * (*(double*)dictGetEntryVal(other
));
5404 robj
*o
= dictGetEntryKey(de
);
5405 dictAdd(dst
->dict
,o
,score
);
5406 incrRefCount(o
); /* added to dictionary */
5407 zslInsert(dst
->zsl
,*score
,o
);
5408 incrRefCount(o
); /* added to skiplist */
5410 dictReleaseIterator(di
);
5413 deleteKey(c
->db
,dstkey
);
5414 dictAdd(c
->db
->dict
,dstkey
,dstobj
);
5415 incrRefCount(dstkey
);
5417 addReplyLong(c
, dst
->zsl
->length
);
5423 static void zmergeCommand(redisClient
*c
) {
5424 zmergeGenericCommand(c
,0);
5427 static void zmergeweighedCommand(redisClient
*c
) {
5428 zmergeGenericCommand(c
,1);
5431 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
5433 int start
= atoi(c
->argv
[2]->ptr
);
5434 int end
= atoi(c
->argv
[3]->ptr
);
5437 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
5439 } else if (c
->argc
>= 5) {
5440 addReply(c
,shared
.syntaxerr
);
5444 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
5446 addReply(c
,shared
.nullmultibulk
);
5448 if (o
->type
!= REDIS_ZSET
) {
5449 addReply(c
,shared
.wrongtypeerr
);
5451 zset
*zsetobj
= o
->ptr
;
5452 zskiplist
*zsl
= zsetobj
->zsl
;
5455 int llen
= zsl
->length
;
5459 /* convert negative indexes */
5460 if (start
< 0) start
= llen
+start
;
5461 if (end
< 0) end
= llen
+end
;
5462 if (start
< 0) start
= 0;
5463 if (end
< 0) end
= 0;
5465 /* indexes sanity checks */
5466 if (start
> end
|| start
>= llen
) {
5467 /* Out of range start or start > end result in empty list */
5468 addReply(c
,shared
.emptymultibulk
);
5471 if (end
>= llen
) end
= llen
-1;
5472 rangelen
= (end
-start
)+1;
5474 /* check if starting point is trivial, before searching
5475 * the element in log(N) time */
5477 ln
= start
== 0 ? zsl
->tail
: zslGetElementByRank(zsl
, llen
- start
);
5479 ln
= start
== 0 ? zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+ 1);
5482 /* Return the result in form of a multi-bulk reply */
5483 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
5484 withscores
? (rangelen
*2) : rangelen
));
5485 for (j
= 0; j
< rangelen
; j
++) {
5487 addReplyBulkLen(c
,ele
);
5489 addReply(c
,shared
.crlf
);
5491 addReplyDouble(c
,ln
->score
);
5492 ln
= reverse
? ln
->backward
: ln
->forward
[0];
5498 static void zrangeCommand(redisClient
*c
) {
5499 zrangeGenericCommand(c
,0);
5502 static void zrevrangeCommand(redisClient
*c
) {
5503 zrangeGenericCommand(c
,1);
5506 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
5507 * If justcount is non-zero, just the count is returned. */
5508 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
5511 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
5512 int offset
= 0, limit
= -1;
5516 /* Parse the min-max interval. If one of the values is prefixed
5517 * by the "(" character, it's considered "open". For instance
5518 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
5519 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
5520 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
5521 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
5524 min
= strtod(c
->argv
[2]->ptr
,NULL
);
5526 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
5527 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
5530 max
= strtod(c
->argv
[3]->ptr
,NULL
);
5533 /* Parse "WITHSCORES": note that if the command was called with
5534 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
5535 * enter the following paths to parse WITHSCORES and LIMIT. */
5536 if (c
->argc
== 5 || c
->argc
== 8) {
5537 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
5542 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
5546 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
5551 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
5552 addReply(c
,shared
.syntaxerr
);
5554 } else if (c
->argc
== (7 + withscores
)) {
5555 offset
= atoi(c
->argv
[5]->ptr
);
5556 limit
= atoi(c
->argv
[6]->ptr
);
5557 if (offset
< 0) offset
= 0;
5560 /* Ok, lookup the key and get the range */
5561 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
5563 addReply(c
,justcount
? shared
.czero
: shared
.nullmultibulk
);
5565 if (o
->type
!= REDIS_ZSET
) {
5566 addReply(c
,shared
.wrongtypeerr
);
5568 zset
*zsetobj
= o
->ptr
;
5569 zskiplist
*zsl
= zsetobj
->zsl
;
5571 robj
*ele
, *lenobj
= NULL
;
5572 unsigned long rangelen
= 0;
5574 /* Get the first node with the score >= min, or with
5575 * score > min if 'minex' is true. */
5576 ln
= zslFirstWithScore(zsl
,min
);
5577 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
5580 /* No element matching the speciifed interval */
5581 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
5585 /* We don't know in advance how many matching elements there
5586 * are in the list, so we push this object that will represent
5587 * the multi-bulk length in the output buffer, and will "fix"
5590 lenobj
= createObject(REDIS_STRING
,NULL
);
5592 decrRefCount(lenobj
);
5595 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
5598 ln
= ln
->forward
[0];
5601 if (limit
== 0) break;
5604 addReplyBulkLen(c
,ele
);
5606 addReply(c
,shared
.crlf
);
5608 addReplyDouble(c
,ln
->score
);
5610 ln
= ln
->forward
[0];
5612 if (limit
> 0) limit
--;
5615 addReplyLong(c
,(long)rangelen
);
5617 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
5618 withscores
? (rangelen
*2) : rangelen
);
5624 static void zrangebyscoreCommand(redisClient
*c
) {
5625 genericZrangebyscoreCommand(c
,0);
5628 static void zcountCommand(redisClient
*c
) {
5629 genericZrangebyscoreCommand(c
,1);
5632 static void zcardCommand(redisClient
*c
) {
5636 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
5638 addReply(c
,shared
.czero
);
5641 if (o
->type
!= REDIS_ZSET
) {
5642 addReply(c
,shared
.wrongtypeerr
);
5645 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",zs
->zsl
->length
));
5650 static void zscoreCommand(redisClient
*c
) {
5654 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
5656 addReply(c
,shared
.nullbulk
);
5659 if (o
->type
!= REDIS_ZSET
) {
5660 addReply(c
,shared
.wrongtypeerr
);
5665 de
= dictFind(zs
->dict
,c
->argv
[2]);
5667 addReply(c
,shared
.nullbulk
);
5669 double *score
= dictGetEntryVal(de
);
5671 addReplyDouble(c
,*score
);
5677 static void zrankCommand(redisClient
*c
) {
5679 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
5681 addReply(c
,shared
.nullbulk
);
5684 if (o
->type
!= REDIS_ZSET
) {
5685 addReply(c
,shared
.wrongtypeerr
);
5688 zskiplist
*zsl
= zs
->zsl
;
5692 de
= dictFind(zs
->dict
,c
->argv
[2]);
5694 addReply(c
,shared
.nullbulk
);
5698 double *score
= dictGetEntryVal(de
);
5699 rank
= zslGetRank(zsl
, *score
, c
->argv
[2]);
5701 addReplyLong(c
, rank
-1);
5703 addReply(c
,shared
.nullbulk
);
5708 /* =================================== Hashes =============================== */
5709 static void hsetCommand(redisClient
*c
) {
5711 robj
*o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5714 o
= createHashObject();
5715 dictAdd(c
->db
->dict
,c
->argv
[1],o
);
5716 incrRefCount(c
->argv
[1]);
5718 if (o
->type
!= REDIS_HASH
) {
5719 addReply(c
,shared
.wrongtypeerr
);
5723 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
5724 unsigned char *zm
= o
->ptr
;
5726 zm
= zipmapSet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
5727 c
->argv
[3]->ptr
,sdslen(c
->argv
[3]->ptr
),&update
);
5730 if (dictAdd(o
->ptr
,c
->argv
[2],c
->argv
[3]) == DICT_OK
) {
5731 incrRefCount(c
->argv
[2]);
5735 incrRefCount(c
->argv
[3]);
5738 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",update
== 0));
5741 static void hgetCommand(redisClient
*c
) {
5742 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[1]);
5745 addReply(c
,shared
.nullbulk
);
5748 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
5749 unsigned char *zm
= o
->ptr
;
5753 if (zipmapGet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
), &val
,&vlen
)) {
5754 addReplySds(c
,sdscatprintf(sdsempty(),"$%u\r\n", vlen
));
5755 addReplySds(c
,sdsnewlen(val
,vlen
));
5756 addReply(c
,shared
.crlf
);
5759 addReply(c
,shared
.nullbulk
);
5763 struct dictEntry
*de
;
5765 de
= dictFind(o
->ptr
,c
->argv
[2]);
5767 addReply(c
,shared
.nullbulk
);
5769 robj
*e
= dictGetEntryVal(de
);
5771 addReplyBulkLen(c
,e
);
5773 addReply(c
,shared
.crlf
);
5779 /* ========================= Non type-specific commands ==================== */
5781 static void flushdbCommand(redisClient
*c
) {
5782 server
.dirty
+= dictSize(c
->db
->dict
);
5783 dictEmpty(c
->db
->dict
);
5784 dictEmpty(c
->db
->expires
);
5785 addReply(c
,shared
.ok
);
5788 static void flushallCommand(redisClient
*c
) {
5789 server
.dirty
+= emptyDb();
5790 addReply(c
,shared
.ok
);
5791 rdbSave(server
.dbfilename
);
5795 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
5796 redisSortOperation
*so
= zmalloc(sizeof(*so
));
5798 so
->pattern
= pattern
;
5802 /* Return the value associated to the key with a name obtained
5803 * substituting the first occurence of '*' in 'pattern' with 'subst' */
5804 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
5808 int prefixlen
, sublen
, postfixlen
;
5809 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
5813 char buf
[REDIS_SORTKEY_MAX
+1];
5816 /* If the pattern is "#" return the substitution object itself in order
5817 * to implement the "SORT ... GET #" feature. */
5818 spat
= pattern
->ptr
;
5819 if (spat
[0] == '#' && spat
[1] == '\0') {
5823 /* The substitution object may be specially encoded. If so we create
5824 * a decoded object on the fly. Otherwise getDecodedObject will just
5825 * increment the ref count, that we'll decrement later. */
5826 subst
= getDecodedObject(subst
);
5829 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
5830 p
= strchr(spat
,'*');
5832 decrRefCount(subst
);
5837 sublen
= sdslen(ssub
);
5838 postfixlen
= sdslen(spat
)-(prefixlen
+1);
5839 memcpy(keyname
.buf
,spat
,prefixlen
);
5840 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
5841 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
5842 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
5843 keyname
.len
= prefixlen
+sublen
+postfixlen
;
5845 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2))
5846 decrRefCount(subst
);
5848 /* printf("lookup '%s' => %p\n", keyname.buf,de); */
5849 return lookupKeyRead(db
,&keyobj
);
5852 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
5853 * the additional parameter is not standard but a BSD-specific we have to
5854 * pass sorting parameters via the global 'server' structure */
5855 static int sortCompare(const void *s1
, const void *s2
) {
5856 const redisSortObject
*so1
= s1
, *so2
= s2
;
5859 if (!server
.sort_alpha
) {
5860 /* Numeric sorting. Here it's trivial as we precomputed scores */
5861 if (so1
->u
.score
> so2
->u
.score
) {
5863 } else if (so1
->u
.score
< so2
->u
.score
) {
5869 /* Alphanumeric sorting */
5870 if (server
.sort_bypattern
) {
5871 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
5872 /* At least one compare object is NULL */
5873 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
5875 else if (so1
->u
.cmpobj
== NULL
)
5880 /* We have both the objects, use strcoll */
5881 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
5884 /* Compare elements directly */
5887 dec1
= getDecodedObject(so1
->obj
);
5888 dec2
= getDecodedObject(so2
->obj
);
5889 cmp
= strcoll(dec1
->ptr
,dec2
->ptr
);
5894 return server
.sort_desc
? -cmp
: cmp
;
5897 /* The SORT command is the most complex command in Redis. Warning: this code
5898 * is optimized for speed and a bit less for readability */
5899 static void sortCommand(redisClient
*c
) {
5902 int desc
= 0, alpha
= 0;
5903 int limit_start
= 0, limit_count
= -1, start
, end
;
5904 int j
, dontsort
= 0, vectorlen
;
5905 int getop
= 0; /* GET operation counter */
5906 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
5907 redisSortObject
*vector
; /* Resulting vector to sort */
5909 /* Lookup the key to sort. It must be of the right types */
5910 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
5911 if (sortval
== NULL
) {
5912 addReply(c
,shared
.nullmultibulk
);
5915 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
5916 sortval
->type
!= REDIS_ZSET
)
5918 addReply(c
,shared
.wrongtypeerr
);
5922 /* Create a list of operations to perform for every sorted element.
5923 * Operations can be GET/DEL/INCR/DECR */
5924 operations
= listCreate();
5925 listSetFreeMethod(operations
,zfree
);
5928 /* Now we need to protect sortval incrementing its count, in the future
5929 * SORT may have options able to overwrite/delete keys during the sorting
5930 * and the sorted key itself may get destroied */
5931 incrRefCount(sortval
);
5933 /* The SORT command has an SQL-alike syntax, parse it */
5934 while(j
< c
->argc
) {
5935 int leftargs
= c
->argc
-j
-1;
5936 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
5938 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
5940 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
5942 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
5943 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
5944 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
5946 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
5947 storekey
= c
->argv
[j
+1];
5949 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
5950 sortby
= c
->argv
[j
+1];
5951 /* If the BY pattern does not contain '*', i.e. it is constant,
5952 * we don't need to sort nor to lookup the weight keys. */
5953 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
5955 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
5956 listAddNodeTail(operations
,createSortOperation(
5957 REDIS_SORT_GET
,c
->argv
[j
+1]));
5961 decrRefCount(sortval
);
5962 listRelease(operations
);
5963 addReply(c
,shared
.syntaxerr
);
5969 /* Load the sorting vector with all the objects to sort */
5970 switch(sortval
->type
) {
5971 case REDIS_LIST
: vectorlen
= listLength((list
*)sortval
->ptr
); break;
5972 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
5973 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
5974 default: vectorlen
= 0; redisAssert(0); /* Avoid GCC warning */
5976 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
5979 if (sortval
->type
== REDIS_LIST
) {
5980 list
*list
= sortval
->ptr
;
5984 listRewind(list
,&li
);
5985 while((ln
= listNext(&li
))) {
5986 robj
*ele
= ln
->value
;
5987 vector
[j
].obj
= ele
;
5988 vector
[j
].u
.score
= 0;
5989 vector
[j
].u
.cmpobj
= NULL
;
5997 if (sortval
->type
== REDIS_SET
) {
6000 zset
*zs
= sortval
->ptr
;
6004 di
= dictGetIterator(set
);
6005 while((setele
= dictNext(di
)) != NULL
) {
6006 vector
[j
].obj
= dictGetEntryKey(setele
);
6007 vector
[j
].u
.score
= 0;
6008 vector
[j
].u
.cmpobj
= NULL
;
6011 dictReleaseIterator(di
);
6013 redisAssert(j
== vectorlen
);
6015 /* Now it's time to load the right scores in the sorting vector */
6016 if (dontsort
== 0) {
6017 for (j
= 0; j
< vectorlen
; j
++) {
6021 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
6022 if (!byval
|| byval
->type
!= REDIS_STRING
) continue;
6024 vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
6026 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
6027 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
6029 /* Don't need to decode the object if it's
6030 * integer-encoded (the only encoding supported) so
6031 * far. We can just cast it */
6032 if (byval
->encoding
== REDIS_ENCODING_INT
) {
6033 vector
[j
].u
.score
= (long)byval
->ptr
;
6035 redisAssert(1 != 1);
6040 if (vector
[j
].obj
->encoding
== REDIS_ENCODING_RAW
)
6041 vector
[j
].u
.score
= strtod(vector
[j
].obj
->ptr
,NULL
);
6043 if (vector
[j
].obj
->encoding
== REDIS_ENCODING_INT
)
6044 vector
[j
].u
.score
= (long) vector
[j
].obj
->ptr
;
6046 redisAssert(1 != 1);
6053 /* We are ready to sort the vector... perform a bit of sanity check
6054 * on the LIMIT option too. We'll use a partial version of quicksort. */
6055 start
= (limit_start
< 0) ? 0 : limit_start
;
6056 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
6057 if (start
>= vectorlen
) {
6058 start
= vectorlen
-1;
6061 if (end
>= vectorlen
) end
= vectorlen
-1;
6063 if (dontsort
== 0) {
6064 server
.sort_desc
= desc
;
6065 server
.sort_alpha
= alpha
;
6066 server
.sort_bypattern
= sortby
? 1 : 0;
6067 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
6068 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
6070 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
6073 /* Send command output to the output buffer, performing the specified
6074 * GET/DEL/INCR/DECR operations if any. */
6075 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
6076 if (storekey
== NULL
) {
6077 /* STORE option not specified, sent the sorting result to client */
6078 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
6079 for (j
= start
; j
<= end
; j
++) {
6084 addReplyBulkLen(c
,vector
[j
].obj
);
6085 addReply(c
,vector
[j
].obj
);
6086 addReply(c
,shared
.crlf
);
6088 listRewind(operations
,&li
);
6089 while((ln
= listNext(&li
))) {
6090 redisSortOperation
*sop
= ln
->value
;
6091 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6094 if (sop
->type
== REDIS_SORT_GET
) {
6095 if (!val
|| val
->type
!= REDIS_STRING
) {
6096 addReply(c
,shared
.nullbulk
);
6098 addReplyBulkLen(c
,val
);
6100 addReply(c
,shared
.crlf
);
6103 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6108 robj
*listObject
= createListObject();
6109 list
*listPtr
= (list
*) listObject
->ptr
;
6111 /* STORE option specified, set the sorting result as a List object */
6112 for (j
= start
; j
<= end
; j
++) {
6117 listAddNodeTail(listPtr
,vector
[j
].obj
);
6118 incrRefCount(vector
[j
].obj
);
6120 listRewind(operations
,&li
);
6121 while((ln
= listNext(&li
))) {
6122 redisSortOperation
*sop
= ln
->value
;
6123 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6126 if (sop
->type
== REDIS_SORT_GET
) {
6127 if (!val
|| val
->type
!= REDIS_STRING
) {
6128 listAddNodeTail(listPtr
,createStringObject("",0));
6130 listAddNodeTail(listPtr
,val
);
6134 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6138 if (dictReplace(c
->db
->dict
,storekey
,listObject
)) {
6139 incrRefCount(storekey
);
6141 /* Note: we add 1 because the DB is dirty anyway since even if the
6142 * SORT result is empty a new key is set and maybe the old content
6144 server
.dirty
+= 1+outputlen
;
6145 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
6149 decrRefCount(sortval
);
6150 listRelease(operations
);
6151 for (j
= 0; j
< vectorlen
; j
++) {
6152 if (sortby
&& alpha
&& vector
[j
].u
.cmpobj
)
6153 decrRefCount(vector
[j
].u
.cmpobj
);
6158 /* Convert an amount of bytes into a human readable string in the form
6159 * of 100B, 2G, 100M, 4K, and so forth. */
6160 static void bytesToHuman(char *s
, unsigned long long n
) {
6165 sprintf(s
,"%lluB",n
);
6167 } else if (n
< (1024*1024)) {
6168 d
= (double)n
/(1024);
6169 sprintf(s
,"%.2fK",d
);
6170 } else if (n
< (1024LL*1024*1024)) {
6171 d
= (double)n
/(1024*1024);
6172 sprintf(s
,"%.2fM",d
);
6173 } else if (n
< (1024LL*1024*1024*1024)) {
6174 d
= (double)n
/(1024LL*1024*1024);
6175 sprintf(s
,"%.2fG",d
);
6179 /* Create the string returned by the INFO command. This is decoupled
6180 * by the INFO command itself as we need to report the same information
6181 * on memory corruption problems. */
6182 static sds
genRedisInfoString(void) {
6184 time_t uptime
= time(NULL
)-server
.stat_starttime
;
6188 bytesToHuman(hmem
,zmalloc_used_memory());
6189 info
= sdscatprintf(sdsempty(),
6190 "redis_version:%s\r\n"
6192 "multiplexing_api:%s\r\n"
6193 "process_id:%ld\r\n"
6194 "uptime_in_seconds:%ld\r\n"
6195 "uptime_in_days:%ld\r\n"
6196 "connected_clients:%d\r\n"
6197 "connected_slaves:%d\r\n"
6198 "blocked_clients:%d\r\n"
6199 "used_memory:%zu\r\n"
6200 "used_memory_human:%s\r\n"
6201 "changes_since_last_save:%lld\r\n"
6202 "bgsave_in_progress:%d\r\n"
6203 "last_save_time:%ld\r\n"
6204 "bgrewriteaof_in_progress:%d\r\n"
6205 "total_connections_received:%lld\r\n"
6206 "total_commands_processed:%lld\r\n"
6210 (sizeof(long) == 8) ? "64" : "32",
6215 listLength(server
.clients
)-listLength(server
.slaves
),
6216 listLength(server
.slaves
),
6217 server
.blpop_blocked_clients
,
6218 zmalloc_used_memory(),
6221 server
.bgsavechildpid
!= -1,
6223 server
.bgrewritechildpid
!= -1,
6224 server
.stat_numconnections
,
6225 server
.stat_numcommands
,
6226 server
.vm_enabled
!= 0,
6227 server
.masterhost
== NULL
? "master" : "slave"
6229 if (server
.masterhost
) {
6230 info
= sdscatprintf(info
,
6231 "master_host:%s\r\n"
6232 "master_port:%d\r\n"
6233 "master_link_status:%s\r\n"
6234 "master_last_io_seconds_ago:%d\r\n"
6237 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
6239 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
6242 if (server
.vm_enabled
) {
6244 info
= sdscatprintf(info
,
6245 "vm_conf_max_memory:%llu\r\n"
6246 "vm_conf_page_size:%llu\r\n"
6247 "vm_conf_pages:%llu\r\n"
6248 "vm_stats_used_pages:%llu\r\n"
6249 "vm_stats_swapped_objects:%llu\r\n"
6250 "vm_stats_swappin_count:%llu\r\n"
6251 "vm_stats_swappout_count:%llu\r\n"
6252 "vm_stats_io_newjobs_len:%lu\r\n"
6253 "vm_stats_io_processing_len:%lu\r\n"
6254 "vm_stats_io_processed_len:%lu\r\n"
6255 "vm_stats_io_active_threads:%lu\r\n"
6256 "vm_stats_blocked_clients:%lu\r\n"
6257 ,(unsigned long long) server
.vm_max_memory
,
6258 (unsigned long long) server
.vm_page_size
,
6259 (unsigned long long) server
.vm_pages
,
6260 (unsigned long long) server
.vm_stats_used_pages
,
6261 (unsigned long long) server
.vm_stats_swapped_objects
,
6262 (unsigned long long) server
.vm_stats_swapins
,
6263 (unsigned long long) server
.vm_stats_swapouts
,
6264 (unsigned long) listLength(server
.io_newjobs
),
6265 (unsigned long) listLength(server
.io_processing
),
6266 (unsigned long) listLength(server
.io_processed
),
6267 (unsigned long) server
.io_active_threads
,
6268 (unsigned long) server
.vm_blocked_clients
6272 for (j
= 0; j
< server
.dbnum
; j
++) {
6273 long long keys
, vkeys
;
6275 keys
= dictSize(server
.db
[j
].dict
);
6276 vkeys
= dictSize(server
.db
[j
].expires
);
6277 if (keys
|| vkeys
) {
6278 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
6285 static void infoCommand(redisClient
*c
) {
6286 sds info
= genRedisInfoString();
6287 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
6288 (unsigned long)sdslen(info
)));
6289 addReplySds(c
,info
);
6290 addReply(c
,shared
.crlf
);
6293 static void monitorCommand(redisClient
*c
) {
6294 /* ignore MONITOR if aleady slave or in monitor mode */
6295 if (c
->flags
& REDIS_SLAVE
) return;
6297 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
6299 listAddNodeTail(server
.monitors
,c
);
6300 addReply(c
,shared
.ok
);
6303 /* ================================= Expire ================================= */
6304 static int removeExpire(redisDb
*db
, robj
*key
) {
6305 if (dictDelete(db
->expires
,key
) == DICT_OK
) {
6312 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
6313 if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) {
6321 /* Return the expire time of the specified key, or -1 if no expire
6322 * is associated with this key (i.e. the key is non volatile) */
6323 static time_t getExpire(redisDb
*db
, robj
*key
) {
6326 /* No expire? return ASAP */
6327 if (dictSize(db
->expires
) == 0 ||
6328 (de
= dictFind(db
->expires
,key
)) == NULL
) return -1;
6330 return (time_t) dictGetEntryVal(de
);
6333 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
6337 /* No expire? return ASAP */
6338 if (dictSize(db
->expires
) == 0 ||
6339 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
6341 /* Lookup the expire */
6342 when
= (time_t) dictGetEntryVal(de
);
6343 if (time(NULL
) <= when
) return 0;
6345 /* Delete the key */
6346 dictDelete(db
->expires
,key
);
6347 return dictDelete(db
->dict
,key
) == DICT_OK
;
6350 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
6353 /* No expire? return ASAP */
6354 if (dictSize(db
->expires
) == 0 ||
6355 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
6357 /* Delete the key */
6359 dictDelete(db
->expires
,key
);
6360 return dictDelete(db
->dict
,key
) == DICT_OK
;
6363 static void expireGenericCommand(redisClient
*c
, robj
*key
, time_t seconds
) {
6366 de
= dictFind(c
->db
->dict
,key
);
6368 addReply(c
,shared
.czero
);
6372 if (deleteKey(c
->db
,key
)) server
.dirty
++;
6373 addReply(c
, shared
.cone
);
6376 time_t when
= time(NULL
)+seconds
;
6377 if (setExpire(c
->db
,key
,when
)) {
6378 addReply(c
,shared
.cone
);
6381 addReply(c
,shared
.czero
);
6387 static void expireCommand(redisClient
*c
) {
6388 expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10));
6391 static void expireatCommand(redisClient
*c
) {
6392 expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10)-time(NULL
));
6395 static void ttlCommand(redisClient
*c
) {
6399 expire
= getExpire(c
->db
,c
->argv
[1]);
6401 ttl
= (int) (expire
-time(NULL
));
6402 if (ttl
< 0) ttl
= -1;
6404 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
6407 /* ================================ MULTI/EXEC ============================== */
6409 /* Client state initialization for MULTI/EXEC */
6410 static void initClientMultiState(redisClient
*c
) {
6411 c
->mstate
.commands
= NULL
;
6412 c
->mstate
.count
= 0;
6415 /* Release all the resources associated with MULTI/EXEC state */
6416 static void freeClientMultiState(redisClient
*c
) {
6419 for (j
= 0; j
< c
->mstate
.count
; j
++) {
6421 multiCmd
*mc
= c
->mstate
.commands
+j
;
6423 for (i
= 0; i
< mc
->argc
; i
++)
6424 decrRefCount(mc
->argv
[i
]);
6427 zfree(c
->mstate
.commands
);
6430 /* Add a new command into the MULTI commands queue */
6431 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
6435 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
6436 sizeof(multiCmd
)*(c
->mstate
.count
+1));
6437 mc
= c
->mstate
.commands
+c
->mstate
.count
;
6440 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
6441 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
6442 for (j
= 0; j
< c
->argc
; j
++)
6443 incrRefCount(mc
->argv
[j
]);
6447 static void multiCommand(redisClient
*c
) {
6448 c
->flags
|= REDIS_MULTI
;
6449 addReply(c
,shared
.ok
);
6452 static void discardCommand(redisClient
*c
) {
6453 if (!(c
->flags
& REDIS_MULTI
)) {
6454 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
6458 freeClientMultiState(c
);
6459 initClientMultiState(c
);
6460 c
->flags
&= (~REDIS_MULTI
);
6461 addReply(c
,shared
.ok
);
6464 static void execCommand(redisClient
*c
) {
6469 if (!(c
->flags
& REDIS_MULTI
)) {
6470 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
6474 orig_argv
= c
->argv
;
6475 orig_argc
= c
->argc
;
6476 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
6477 for (j
= 0; j
< c
->mstate
.count
; j
++) {
6478 c
->argc
= c
->mstate
.commands
[j
].argc
;
6479 c
->argv
= c
->mstate
.commands
[j
].argv
;
6480 call(c
,c
->mstate
.commands
[j
].cmd
);
6482 c
->argv
= orig_argv
;
6483 c
->argc
= orig_argc
;
6484 freeClientMultiState(c
);
6485 initClientMultiState(c
);
6486 c
->flags
&= (~REDIS_MULTI
);
6489 /* =========================== Blocking Operations ========================= */
6491 /* Currently Redis blocking operations support is limited to list POP ops,
6492 * so the current implementation is not fully generic, but it is also not
6493 * completely specific so it will not require a rewrite to support new
6494 * kind of blocking operations in the future.
6496 * Still it's important to note that list blocking operations can be already
6497 * used as a notification mechanism in order to implement other blocking
6498 * operations at application level, so there must be a very strong evidence
6499 * of usefulness and generality before new blocking operations are implemented.
6501 * This is how the current blocking POP works, we use BLPOP as example:
6502 * - If the user calls BLPOP and the key exists and contains a non empty list
6503 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
6504 * if there is not to block.
6505 * - If instead BLPOP is called and the key does not exists or the list is
6506 * empty we need to block. In order to do so we remove the notification for
6507 * new data to read in the client socket (so that we'll not serve new
6508 * requests if the blocking request is not served). Also we put the client
6509 * in a dictionary (db->blockingkeys) mapping keys to a list of clients
6510 * blocking for this keys.
6511 * - If a PUSH operation against a key with blocked clients waiting is
6512 * performed, we serve the first in the list: basically instead to push
6513 * the new element inside the list we return it to the (first / oldest)
6514 * blocking client, unblock the client, and remove it form the list.
6516 * The above comment and the source code should be enough in order to understand
6517 * the implementation and modify / fix it later.
6520 /* Set a client in blocking mode for the specified key, with the specified
6522 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
6527 c
->blockingkeys
= zmalloc(sizeof(robj
*)*numkeys
);
6528 c
->blockingkeysnum
= numkeys
;
6529 c
->blockingto
= timeout
;
6530 for (j
= 0; j
< numkeys
; j
++) {
6531 /* Add the key in the client structure, to map clients -> keys */
6532 c
->blockingkeys
[j
] = keys
[j
];
6533 incrRefCount(keys
[j
]);
6535 /* And in the other "side", to map keys -> clients */
6536 de
= dictFind(c
->db
->blockingkeys
,keys
[j
]);
6540 /* For every key we take a list of clients blocked for it */
6542 retval
= dictAdd(c
->db
->blockingkeys
,keys
[j
],l
);
6543 incrRefCount(keys
[j
]);
6544 assert(retval
== DICT_OK
);
6546 l
= dictGetEntryVal(de
);
6548 listAddNodeTail(l
,c
);
6550 /* Mark the client as a blocked client */
6551 c
->flags
|= REDIS_BLOCKED
;
6552 server
.blpop_blocked_clients
++;
6555 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
6556 static void unblockClientWaitingData(redisClient
*c
) {
6561 assert(c
->blockingkeys
!= NULL
);
6562 /* The client may wait for multiple keys, so unblock it for every key. */
6563 for (j
= 0; j
< c
->blockingkeysnum
; j
++) {
6564 /* Remove this client from the list of clients waiting for this key. */
6565 de
= dictFind(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
6567 l
= dictGetEntryVal(de
);
6568 listDelNode(l
,listSearchKey(l
,c
));
6569 /* If the list is empty we need to remove it to avoid wasting memory */
6570 if (listLength(l
) == 0)
6571 dictDelete(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
6572 decrRefCount(c
->blockingkeys
[j
]);
6574 /* Cleanup the client structure */
6575 zfree(c
->blockingkeys
);
6576 c
->blockingkeys
= NULL
;
6577 c
->flags
&= (~REDIS_BLOCKED
);
6578 server
.blpop_blocked_clients
--;
6579 /* We want to process data if there is some command waiting
6580 * in the input buffer. Note that this is safe even if
6581 * unblockClientWaitingData() gets called from freeClient() because
6582 * freeClient() will be smart enough to call this function
6583 * *after* c->querybuf was set to NULL. */
6584 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
6587 /* This should be called from any function PUSHing into lists.
6588 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
6589 * 'ele' is the element pushed.
6591 * If the function returns 0 there was no client waiting for a list push
6594 * If the function returns 1 there was a client waiting for a list push
6595 * against this key, the element was passed to this client thus it's not
6596 * needed to actually add it to the list and the caller should return asap. */
6597 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
6598 struct dictEntry
*de
;
6599 redisClient
*receiver
;
6603 de
= dictFind(c
->db
->blockingkeys
,key
);
6604 if (de
== NULL
) return 0;
6605 l
= dictGetEntryVal(de
);
6608 receiver
= ln
->value
;
6610 addReplySds(receiver
,sdsnew("*2\r\n"));
6611 addReplyBulkLen(receiver
,key
);
6612 addReply(receiver
,key
);
6613 addReply(receiver
,shared
.crlf
);
6614 addReplyBulkLen(receiver
,ele
);
6615 addReply(receiver
,ele
);
6616 addReply(receiver
,shared
.crlf
);
6617 unblockClientWaitingData(receiver
);
6621 /* Blocking RPOP/LPOP */
6622 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
6627 for (j
= 1; j
< c
->argc
-1; j
++) {
6628 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
6630 if (o
->type
!= REDIS_LIST
) {
6631 addReply(c
,shared
.wrongtypeerr
);
6634 list
*list
= o
->ptr
;
6635 if (listLength(list
) != 0) {
6636 /* If the list contains elements fall back to the usual
6637 * non-blocking POP operation */
6638 robj
*argv
[2], **orig_argv
;
6641 /* We need to alter the command arguments before to call
6642 * popGenericCommand() as the command takes a single key. */
6643 orig_argv
= c
->argv
;
6644 orig_argc
= c
->argc
;
6645 argv
[1] = c
->argv
[j
];
6649 /* Also the return value is different, we need to output
6650 * the multi bulk reply header and the key name. The
6651 * "real" command will add the last element (the value)
6652 * for us. If this souds like an hack to you it's just
6653 * because it is... */
6654 addReplySds(c
,sdsnew("*2\r\n"));
6655 addReplyBulkLen(c
,argv
[1]);
6656 addReply(c
,argv
[1]);
6657 addReply(c
,shared
.crlf
);
6658 popGenericCommand(c
,where
);
6660 /* Fix the client structure with the original stuff */
6661 c
->argv
= orig_argv
;
6662 c
->argc
= orig_argc
;
6668 /* If the list is empty or the key does not exists we must block */
6669 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
6670 if (timeout
> 0) timeout
+= time(NULL
);
6671 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
6674 static void blpopCommand(redisClient
*c
) {
6675 blockingPopGenericCommand(c
,REDIS_HEAD
);
6678 static void brpopCommand(redisClient
*c
) {
6679 blockingPopGenericCommand(c
,REDIS_TAIL
);
6682 /* =============================== Replication ============================= */
6684 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
6685 ssize_t nwritten
, ret
= size
;
6686 time_t start
= time(NULL
);
6690 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
6691 nwritten
= write(fd
,ptr
,size
);
6692 if (nwritten
== -1) return -1;
6696 if ((time(NULL
)-start
) > timeout
) {
6704 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
6705 ssize_t nread
, totread
= 0;
6706 time_t start
= time(NULL
);
6710 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
6711 nread
= read(fd
,ptr
,size
);
6712 if (nread
== -1) return -1;
6717 if ((time(NULL
)-start
) > timeout
) {
6725 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
6732 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
6735 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
6746 static void syncCommand(redisClient
*c
) {
6747 /* ignore SYNC if aleady slave or in monitor mode */
6748 if (c
->flags
& REDIS_SLAVE
) return;
6750 /* SYNC can't be issued when the server has pending data to send to
6751 * the client about already issued commands. We need a fresh reply
6752 * buffer registering the differences between the BGSAVE and the current
6753 * dataset, so that we can copy to other slaves if needed. */
6754 if (listLength(c
->reply
) != 0) {
6755 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
6759 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
6760 /* Here we need to check if there is a background saving operation
6761 * in progress, or if it is required to start one */
6762 if (server
.bgsavechildpid
!= -1) {
6763 /* Ok a background save is in progress. Let's check if it is a good
6764 * one for replication, i.e. if there is another slave that is
6765 * registering differences since the server forked to save */
6770 listRewind(server
.slaves
,&li
);
6771 while((ln
= listNext(&li
))) {
6773 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
6776 /* Perfect, the server is already registering differences for
6777 * another slave. Set the right state, and copy the buffer. */
6778 listRelease(c
->reply
);
6779 c
->reply
= listDup(slave
->reply
);
6780 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
6781 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
6783 /* No way, we need to wait for the next BGSAVE in order to
6784 * register differences */
6785 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
6786 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
6789 /* Ok we don't have a BGSAVE in progress, let's start one */
6790 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
6791 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
6792 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
6793 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
6796 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
6799 c
->flags
|= REDIS_SLAVE
;
6801 listAddNodeTail(server
.slaves
,c
);
6805 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
6806 redisClient
*slave
= privdata
;
6808 REDIS_NOTUSED(mask
);
6809 char buf
[REDIS_IOBUF_LEN
];
6810 ssize_t nwritten
, buflen
;
6812 if (slave
->repldboff
== 0) {
6813 /* Write the bulk write count before to transfer the DB. In theory here
6814 * we don't know how much room there is in the output buffer of the
6815 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
6816 * operations) will never be smaller than the few bytes we need. */
6819 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
6821 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
6829 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
6830 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
6832 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
6833 (buflen
== 0) ? "premature EOF" : strerror(errno
));
6837 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
6838 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
6843 slave
->repldboff
+= nwritten
;
6844 if (slave
->repldboff
== slave
->repldbsize
) {
6845 close(slave
->repldbfd
);
6846 slave
->repldbfd
= -1;
6847 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
6848 slave
->replstate
= REDIS_REPL_ONLINE
;
6849 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
6850 sendReplyToClient
, slave
) == AE_ERR
) {
6854 addReplySds(slave
,sdsempty());
6855 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
6859 /* This function is called at the end of every backgrond saving.
6860 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
6861 * otherwise REDIS_ERR is passed to the function.
6863 * The goal of this function is to handle slaves waiting for a successful
6864 * background saving in order to perform non-blocking synchronization. */
6865 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
6867 int startbgsave
= 0;
6870 listRewind(server
.slaves
,&li
);
6871 while((ln
= listNext(&li
))) {
6872 redisClient
*slave
= ln
->value
;
6874 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
6876 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
6877 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
6878 struct redis_stat buf
;
6880 if (bgsaveerr
!= REDIS_OK
) {
6882 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
6885 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
6886 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
6888 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
6891 slave
->repldboff
= 0;
6892 slave
->repldbsize
= buf
.st_size
;
6893 slave
->replstate
= REDIS_REPL_SEND_BULK
;
6894 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
6895 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
6902 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
6905 listRewind(server
.slaves
,&li
);
6906 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
6907 while((ln
= listNext(&li
))) {
6908 redisClient
*slave
= ln
->value
;
6910 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
6917 static int syncWithMaster(void) {
6918 char buf
[1024], tmpfile
[256], authcmd
[1024];
6920 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
6924 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
6929 /* AUTH with the master if required. */
6930 if(server
.masterauth
) {
6931 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
6932 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
6934 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
6938 /* Read the AUTH result. */
6939 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
6941 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
6945 if (buf
[0] != '+') {
6947 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
6952 /* Issue the SYNC command */
6953 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
6955 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
6959 /* Read the bulk write count */
6960 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
6962 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
6966 if (buf
[0] != '$') {
6968 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
6971 dumpsize
= strtol(buf
+1,NULL
,10);
6972 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
6973 /* Read the bulk write data on a temp file */
6974 snprintf(tmpfile
,256,"temp-%d.%ld.rdb",(int)time(NULL
),(long int)random());
6975 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
,0644);
6978 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
6982 int nread
, nwritten
;
6984 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
6986 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
6992 nwritten
= write(dfd
,buf
,nread
);
6993 if (nwritten
== -1) {
6994 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
7002 if (rename(tmpfile
,server
.dbfilename
) == -1) {
7003 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
7009 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
7010 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
7014 server
.master
= createClient(fd
);
7015 server
.master
->flags
|= REDIS_MASTER
;
7016 server
.master
->authenticated
= 1;
7017 server
.replstate
= REDIS_REPL_CONNECTED
;
7021 static void slaveofCommand(redisClient
*c
) {
7022 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
7023 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
7024 if (server
.masterhost
) {
7025 sdsfree(server
.masterhost
);
7026 server
.masterhost
= NULL
;
7027 if (server
.master
) freeClient(server
.master
);
7028 server
.replstate
= REDIS_REPL_NONE
;
7029 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
7032 sdsfree(server
.masterhost
);
7033 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
7034 server
.masterport
= atoi(c
->argv
[2]->ptr
);
7035 if (server
.master
) freeClient(server
.master
);
7036 server
.replstate
= REDIS_REPL_CONNECT
;
7037 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
7038 server
.masterhost
, server
.masterport
);
7040 addReply(c
,shared
.ok
);
7043 /* ============================ Maxmemory directive ======================== */
7045 /* Try to free one object form the pre-allocated objects free list.
7046 * This is useful under low mem conditions as by default we take 1 million
7047 * free objects allocated. On success REDIS_OK is returned, otherwise
7049 static int tryFreeOneObjectFromFreelist(void) {
7052 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
7053 if (listLength(server
.objfreelist
)) {
7054 listNode
*head
= listFirst(server
.objfreelist
);
7055 o
= listNodeValue(head
);
7056 listDelNode(server
.objfreelist
,head
);
7057 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7061 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7066 /* This function gets called when 'maxmemory' is set on the config file to limit
7067 * the max memory used by the server, and we are out of memory.
7068 * This function will try to, in order:
7070 * - Free objects from the free list
7071 * - Try to remove keys with an EXPIRE set
7073 * It is not possible to free enough memory to reach used-memory < maxmemory
7074 * the server will start refusing commands that will enlarge even more the
7077 static void freeMemoryIfNeeded(void) {
7078 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
7079 int j
, k
, freed
= 0;
7081 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
7082 for (j
= 0; j
< server
.dbnum
; j
++) {
7084 robj
*minkey
= NULL
;
7085 struct dictEntry
*de
;
7087 if (dictSize(server
.db
[j
].expires
)) {
7089 /* From a sample of three keys drop the one nearest to
7090 * the natural expire */
7091 for (k
= 0; k
< 3; k
++) {
7094 de
= dictGetRandomKey(server
.db
[j
].expires
);
7095 t
= (time_t) dictGetEntryVal(de
);
7096 if (minttl
== -1 || t
< minttl
) {
7097 minkey
= dictGetEntryKey(de
);
7101 deleteKey(server
.db
+j
,minkey
);
7104 if (!freed
) return; /* nothing to free... */
7108 /* ============================== Append Only file ========================== */
7110 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
7111 sds buf
= sdsempty();
7117 /* The DB this command was targetting is not the same as the last command
7118 * we appendend. To issue a SELECT command is needed. */
7119 if (dictid
!= server
.appendseldb
) {
7122 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
7123 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
7124 (unsigned long)strlen(seldb
),seldb
);
7125 server
.appendseldb
= dictid
;
7128 /* "Fix" the argv vector if the command is EXPIRE. We want to translate
7129 * EXPIREs into EXPIREATs calls */
7130 if (cmd
->proc
== expireCommand
) {
7133 tmpargv
[0] = createStringObject("EXPIREAT",8);
7134 tmpargv
[1] = argv
[1];
7135 incrRefCount(argv
[1]);
7136 when
= time(NULL
)+strtol(argv
[2]->ptr
,NULL
,10);
7137 tmpargv
[2] = createObject(REDIS_STRING
,
7138 sdscatprintf(sdsempty(),"%ld",when
));
7142 /* Append the actual command */
7143 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
7144 for (j
= 0; j
< argc
; j
++) {
7147 o
= getDecodedObject(o
);
7148 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
7149 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
7150 buf
= sdscatlen(buf
,"\r\n",2);
7154 /* Free the objects from the modified argv for EXPIREAT */
7155 if (cmd
->proc
== expireCommand
) {
7156 for (j
= 0; j
< 3; j
++)
7157 decrRefCount(argv
[j
]);
7160 /* We want to perform a single write. This should be guaranteed atomic
7161 * at least if the filesystem we are writing is a real physical one.
7162 * While this will save us against the server being killed I don't think
7163 * there is much to do about the whole server stopping for power problems
7165 nwritten
= write(server
.appendfd
,buf
,sdslen(buf
));
7166 if (nwritten
!= (signed)sdslen(buf
)) {
7167 /* Ooops, we are in troubles. The best thing to do for now is
7168 * to simply exit instead to give the illusion that everything is
7169 * working as expected. */
7170 if (nwritten
== -1) {
7171 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
7173 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
7177 /* If a background append only file rewriting is in progress we want to
7178 * accumulate the differences between the child DB and the current one
7179 * in a buffer, so that when the child process will do its work we
7180 * can append the differences to the new append only file. */
7181 if (server
.bgrewritechildpid
!= -1)
7182 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
7186 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
7187 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
7188 now
-server
.lastfsync
> 1))
7190 fsync(server
.appendfd
); /* Let's try to get this data on the disk */
7191 server
.lastfsync
= now
;
7195 /* In Redis commands are always executed in the context of a client, so in
7196 * order to load the append only file we need to create a fake client. */
7197 static struct redisClient
*createFakeClient(void) {
7198 struct redisClient
*c
= zmalloc(sizeof(*c
));
7202 c
->querybuf
= sdsempty();
7206 /* We set the fake client as a slave waiting for the synchronization
7207 * so that Redis will not try to send replies to this client. */
7208 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7209 c
->reply
= listCreate();
7210 listSetFreeMethod(c
->reply
,decrRefCount
);
7211 listSetDupMethod(c
->reply
,dupClientReplyValue
);
7215 static void freeFakeClient(struct redisClient
*c
) {
7216 sdsfree(c
->querybuf
);
7217 listRelease(c
->reply
);
7221 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
7222 * error (the append only file is zero-length) REDIS_ERR is returned. On
7223 * fatal error an error message is logged and the program exists. */
7224 int loadAppendOnlyFile(char *filename
) {
7225 struct redisClient
*fakeClient
;
7226 FILE *fp
= fopen(filename
,"r");
7227 struct redis_stat sb
;
7228 unsigned long long loadedkeys
= 0;
7230 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
7234 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
7238 fakeClient
= createFakeClient();
7245 struct redisCommand
*cmd
;
7247 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
7253 if (buf
[0] != '*') goto fmterr
;
7255 argv
= zmalloc(sizeof(robj
*)*argc
);
7256 for (j
= 0; j
< argc
; j
++) {
7257 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
7258 if (buf
[0] != '$') goto fmterr
;
7259 len
= strtol(buf
+1,NULL
,10);
7260 argsds
= sdsnewlen(NULL
,len
);
7261 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
7262 argv
[j
] = createObject(REDIS_STRING
,argsds
);
7263 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
7266 /* Command lookup */
7267 cmd
= lookupCommand(argv
[0]->ptr
);
7269 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
7272 /* Try object sharing and encoding */
7273 if (server
.shareobjects
) {
7275 for(j
= 1; j
< argc
; j
++)
7276 argv
[j
] = tryObjectSharing(argv
[j
]);
7278 if (cmd
->flags
& REDIS_CMD_BULK
)
7279 tryObjectEncoding(argv
[argc
-1]);
7280 /* Run the command in the context of a fake client */
7281 fakeClient
->argc
= argc
;
7282 fakeClient
->argv
= argv
;
7283 cmd
->proc(fakeClient
);
7284 /* Discard the reply objects list from the fake client */
7285 while(listLength(fakeClient
->reply
))
7286 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
7287 /* Clean up, ready for the next command */
7288 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
7290 /* Handle swapping while loading big datasets when VM is on */
7292 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
7293 while (zmalloc_used_memory() > server
.vm_max_memory
) {
7294 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
7299 freeFakeClient(fakeClient
);
7304 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
7306 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
7310 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
7314 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
7315 static int fwriteBulk(FILE *fp
, robj
*obj
) {
7319 /* Avoid the incr/decr ref count business if possible to help
7320 * copy-on-write (we are often in a child process when this function
7322 * Also makes sure that key objects don't get incrRefCount-ed when VM
7324 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
7325 obj
= getDecodedObject(obj
);
7328 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
));
7329 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
;
7330 if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0)
7332 if (fwrite("\r\n",2,1,fp
) == 0) goto err
;
7333 if (decrrc
) decrRefCount(obj
);
7336 if (decrrc
) decrRefCount(obj
);
7340 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
7341 static int fwriteBulkDouble(FILE *fp
, double d
) {
7342 char buf
[128], dbuf
[128];
7344 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
7345 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
7346 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7347 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
7351 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
7352 static int fwriteBulkLong(FILE *fp
, long l
) {
7353 char buf
[128], lbuf
[128];
7355 snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
);
7356 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2);
7357 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7358 if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0;
7362 /* Write a sequence of commands able to fully rebuild the dataset into
7363 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
7364 static int rewriteAppendOnlyFile(char *filename
) {
7365 dictIterator
*di
= NULL
;
7370 time_t now
= time(NULL
);
7372 /* Note that we have to use a different temp name here compared to the
7373 * one used by rewriteAppendOnlyFileBackground() function. */
7374 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
7375 fp
= fopen(tmpfile
,"w");
7377 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
7380 for (j
= 0; j
< server
.dbnum
; j
++) {
7381 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
7382 redisDb
*db
= server
.db
+j
;
7384 if (dictSize(d
) == 0) continue;
7385 di
= dictGetIterator(d
);
7391 /* SELECT the new DB */
7392 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
7393 if (fwriteBulkLong(fp
,j
) == 0) goto werr
;
7395 /* Iterate this DB writing every entry */
7396 while((de
= dictNext(di
)) != NULL
) {
7401 key
= dictGetEntryKey(de
);
7402 /* If the value for this key is swapped, load a preview in memory.
7403 * We use a "swapped" flag to remember if we need to free the
7404 * value object instead to just increment the ref count anyway
7405 * in order to avoid copy-on-write of pages if we are forked() */
7406 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
7407 key
->storage
== REDIS_VM_SWAPPING
) {
7408 o
= dictGetEntryVal(de
);
7411 o
= vmPreviewObject(key
);
7414 expiretime
= getExpire(db
,key
);
7416 /* Save the key and associated value */
7417 if (o
->type
== REDIS_STRING
) {
7418 /* Emit a SET command */
7419 char cmd
[]="*3\r\n$3\r\nSET\r\n";
7420 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7422 if (fwriteBulk(fp
,key
) == 0) goto werr
;
7423 if (fwriteBulk(fp
,o
) == 0) goto werr
;
7424 } else if (o
->type
== REDIS_LIST
) {
7425 /* Emit the RPUSHes needed to rebuild the list */
7426 list
*list
= o
->ptr
;
7430 listRewind(list
,&li
);
7431 while((ln
= listNext(&li
))) {
7432 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
7433 robj
*eleobj
= listNodeValue(ln
);
7435 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7436 if (fwriteBulk(fp
,key
) == 0) goto werr
;
7437 if (fwriteBulk(fp
,eleobj
) == 0) goto werr
;
7439 } else if (o
->type
== REDIS_SET
) {
7440 /* Emit the SADDs needed to rebuild the set */
7442 dictIterator
*di
= dictGetIterator(set
);
7445 while((de
= dictNext(di
)) != NULL
) {
7446 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
7447 robj
*eleobj
= dictGetEntryKey(de
);
7449 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7450 if (fwriteBulk(fp
,key
) == 0) goto werr
;
7451 if (fwriteBulk(fp
,eleobj
) == 0) goto werr
;
7453 dictReleaseIterator(di
);
7454 } else if (o
->type
== REDIS_ZSET
) {
7455 /* Emit the ZADDs needed to rebuild the sorted set */
7457 dictIterator
*di
= dictGetIterator(zs
->dict
);
7460 while((de
= dictNext(di
)) != NULL
) {
7461 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
7462 robj
*eleobj
= dictGetEntryKey(de
);
7463 double *score
= dictGetEntryVal(de
);
7465 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7466 if (fwriteBulk(fp
,key
) == 0) goto werr
;
7467 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
7468 if (fwriteBulk(fp
,eleobj
) == 0) goto werr
;
7470 dictReleaseIterator(di
);
7472 redisAssert(0 != 0);
7474 /* Save the expire time */
7475 if (expiretime
!= -1) {
7476 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
7477 /* If this key is already expired skip it */
7478 if (expiretime
< now
) continue;
7479 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7480 if (fwriteBulk(fp
,key
) == 0) goto werr
;
7481 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
;
7483 if (swapped
) decrRefCount(o
);
7485 dictReleaseIterator(di
);
7488 /* Make sure data will not remain on the OS's output buffers */
7493 /* Use RENAME to make sure the DB file is changed atomically only
7494 * if the generate DB file is ok. */
7495 if (rename(tmpfile
,filename
) == -1) {
7496 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
7500 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
7506 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
7507 if (di
) dictReleaseIterator(di
);
7511 /* This is how rewriting of the append only file in background works:
7513 * 1) The user calls BGREWRITEAOF
7514 * 2) Redis calls this function, that forks():
7515 * 2a) the child rewrite the append only file in a temp file.
7516 * 2b) the parent accumulates differences in server.bgrewritebuf.
7517 * 3) When the child finished '2a' exists.
7518 * 4) The parent will trap the exit code, if it's OK, will append the
7519 * data accumulated into server.bgrewritebuf into the temp file, and
7520 * finally will rename(2) the temp file in the actual file name.
7521 * The the new file is reopened as the new append only file. Profit!
7523 static int rewriteAppendOnlyFileBackground(void) {
7526 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
7527 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
7528 if ((childpid
= fork()) == 0) {
7532 if (server
.vm_enabled
) vmReopenSwapFile();
7534 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
7535 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
7542 if (childpid
== -1) {
7543 redisLog(REDIS_WARNING
,
7544 "Can't rewrite append only file in background: fork: %s",
7548 redisLog(REDIS_NOTICE
,
7549 "Background append only file rewriting started by pid %d",childpid
);
7550 server
.bgrewritechildpid
= childpid
;
7551 /* We set appendseldb to -1 in order to force the next call to the
7552 * feedAppendOnlyFile() to issue a SELECT command, so the differences
7553 * accumulated by the parent into server.bgrewritebuf will start
7554 * with a SELECT statement and it will be safe to merge. */
7555 server
.appendseldb
= -1;
7558 return REDIS_OK
; /* unreached */
7561 static void bgrewriteaofCommand(redisClient
*c
) {
7562 if (server
.bgrewritechildpid
!= -1) {
7563 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
7566 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
7567 char *status
= "+Background append only file rewriting started\r\n";
7568 addReplySds(c
,sdsnew(status
));
7570 addReply(c
,shared
.err
);
7574 static void aofRemoveTempFile(pid_t childpid
) {
7577 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
7581 /* Virtual Memory is composed mainly of two subsystems:
7582 * - Blocking Virutal Memory
7583 * - Threaded Virtual Memory I/O
7584 * The two parts are not fully decoupled, but functions are split among two
7585 * different sections of the source code (delimited by comments) in order to
7586 * make more clear what functionality is about the blocking VM and what about
7587 * the threaded (not blocking) VM.
7591 * Redis VM is a blocking VM (one that blocks reading swapped values from
7592 * disk into memory when a value swapped out is needed in memory) that is made
7593 * unblocking by trying to examine the command argument vector in order to
7594 * load in background values that will likely be needed in order to exec
7595 * the command. The command is executed only once all the relevant keys
7596 * are loaded into memory.
7598 * This basically is almost as simple of a blocking VM, but almost as parallel
7599 * as a fully non-blocking VM.
7602 /* =================== Virtual Memory - Blocking Side ====================== */
7604 /* substitute the first occurrence of '%p' with the process pid in the
7605 * swap file name. */
7606 static void expandVmSwapFilename(void) {
7607 char *p
= strstr(server
.vm_swap_file
,"%p");
7613 new = sdscat(new,server
.vm_swap_file
);
7614 new = sdscatprintf(new,"%ld",(long) getpid());
7615 new = sdscat(new,p
+2);
7616 zfree(server
.vm_swap_file
);
7617 server
.vm_swap_file
= new;
7620 static void vmInit(void) {
7625 if (server
.vm_max_threads
!= 0)
7626 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
7628 expandVmSwapFilename();
7629 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
7630 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
7631 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
7633 if (server
.vm_fp
== NULL
) {
7634 redisLog(REDIS_WARNING
,
7635 "Impossible to open the swap file: %s. Exiting.",
7639 server
.vm_fd
= fileno(server
.vm_fp
);
7640 server
.vm_next_page
= 0;
7641 server
.vm_near_pages
= 0;
7642 server
.vm_stats_used_pages
= 0;
7643 server
.vm_stats_swapped_objects
= 0;
7644 server
.vm_stats_swapouts
= 0;
7645 server
.vm_stats_swapins
= 0;
7646 totsize
= server
.vm_pages
*server
.vm_page_size
;
7647 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
7648 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
7649 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
7653 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
7655 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
7656 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
7657 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
7658 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
7660 /* Initialize threaded I/O (used by Virtual Memory) */
7661 server
.io_newjobs
= listCreate();
7662 server
.io_processing
= listCreate();
7663 server
.io_processed
= listCreate();
7664 server
.io_ready_clients
= listCreate();
7665 pthread_mutex_init(&server
.io_mutex
,NULL
);
7666 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
7667 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
7668 server
.io_active_threads
= 0;
7669 if (pipe(pipefds
) == -1) {
7670 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
7674 server
.io_ready_pipe_read
= pipefds
[0];
7675 server
.io_ready_pipe_write
= pipefds
[1];
7676 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
7677 /* LZF requires a lot of stack */
7678 pthread_attr_init(&server
.io_threads_attr
);
7679 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
7680 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
7681 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
7682 /* Listen for events in the threaded I/O pipe */
7683 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
7684 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
7685 oom("creating file event");
7688 /* Mark the page as used */
7689 static void vmMarkPageUsed(off_t page
) {
7690 off_t byte
= page
/8;
7692 redisAssert(vmFreePage(page
) == 1);
7693 server
.vm_bitmap
[byte
] |= 1<<bit
;
7696 /* Mark N contiguous pages as used, with 'page' being the first. */
7697 static void vmMarkPagesUsed(off_t page
, off_t count
) {
7700 for (j
= 0; j
< count
; j
++)
7701 vmMarkPageUsed(page
+j
);
7702 server
.vm_stats_used_pages
+= count
;
7703 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
7704 (long long)count
, (long long)page
);
7707 /* Mark the page as free */
7708 static void vmMarkPageFree(off_t page
) {
7709 off_t byte
= page
/8;
7711 redisAssert(vmFreePage(page
) == 0);
7712 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
7715 /* Mark N contiguous pages as free, with 'page' being the first. */
7716 static void vmMarkPagesFree(off_t page
, off_t count
) {
7719 for (j
= 0; j
< count
; j
++)
7720 vmMarkPageFree(page
+j
);
7721 server
.vm_stats_used_pages
-= count
;
7722 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
7723 (long long)count
, (long long)page
);
7726 /* Test if the page is free */
7727 static int vmFreePage(off_t page
) {
7728 off_t byte
= page
/8;
7730 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
7733 /* Find N contiguous free pages storing the first page of the cluster in *first.
7734 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
7735 * REDIS_ERR is returned.
7737 * This function uses a simple algorithm: we try to allocate
7738 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
7739 * again from the start of the swap file searching for free spaces.
7741 * If it looks pretty clear that there are no free pages near our offset
7742 * we try to find less populated places doing a forward jump of
7743 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
7744 * without hurry, and then we jump again and so forth...
7746 * This function can be improved using a free list to avoid to guess
7747 * too much, since we could collect data about freed pages.
7749 * note: I implemented this function just after watching an episode of
7750 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
7752 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
7753 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
7755 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
7756 server
.vm_near_pages
= 0;
7757 server
.vm_next_page
= 0;
7759 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
7760 base
= server
.vm_next_page
;
7762 while(offset
< server
.vm_pages
) {
7763 off_t
this = base
+offset
;
7765 /* If we overflow, restart from page zero */
7766 if (this >= server
.vm_pages
) {
7767 this -= server
.vm_pages
;
7769 /* Just overflowed, what we found on tail is no longer
7770 * interesting, as it's no longer contiguous. */
7774 if (vmFreePage(this)) {
7775 /* This is a free page */
7777 /* Already got N free pages? Return to the caller, with success */
7779 *first
= this-(n
-1);
7780 server
.vm_next_page
= this+1;
7781 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
7785 /* The current one is not a free page */
7789 /* Fast-forward if the current page is not free and we already
7790 * searched enough near this place. */
7792 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
7793 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
7795 /* Note that even if we rewind after the jump, we are don't need
7796 * to make sure numfree is set to zero as we only jump *if* it
7797 * is set to zero. */
7799 /* Otherwise just check the next page */
7806 /* Write the specified object at the specified page of the swap file */
7807 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
7808 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
7809 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
7810 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
7811 redisLog(REDIS_WARNING
,
7812 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
7816 rdbSaveObject(server
.vm_fp
,o
);
7817 fflush(server
.vm_fp
);
7818 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
7822 /* Swap the 'val' object relative to 'key' into disk. Store all the information
7823 * needed to later retrieve the object into the key object.
7824 * If we can't find enough contiguous empty pages to swap the object on disk
7825 * REDIS_ERR is returned. */
7826 static int vmSwapObjectBlocking(robj
*key
, robj
*val
) {
7827 off_t pages
= rdbSavedObjectPages(val
,NULL
);
7830 assert(key
->storage
== REDIS_VM_MEMORY
);
7831 assert(key
->refcount
== 1);
7832 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
;
7833 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
;
7834 key
->vm
.page
= page
;
7835 key
->vm
.usedpages
= pages
;
7836 key
->storage
= REDIS_VM_SWAPPED
;
7837 key
->vtype
= val
->type
;
7838 decrRefCount(val
); /* Deallocate the object from memory. */
7839 vmMarkPagesUsed(page
,pages
);
7840 redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)",
7841 (unsigned char*) key
->ptr
,
7842 (unsigned long long) page
, (unsigned long long) pages
);
7843 server
.vm_stats_swapped_objects
++;
7844 server
.vm_stats_swapouts
++;
7848 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
7851 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
7852 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
7853 redisLog(REDIS_WARNING
,
7854 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
7858 o
= rdbLoadObject(type
,server
.vm_fp
);
7860 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
7863 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
7867 /* Load the value object relative to the 'key' object from swap to memory.
7868 * The newly allocated object is returned.
7870 * If preview is true the unserialized object is returned to the caller but
7871 * no changes are made to the key object, nor the pages are marked as freed */
7872 static robj
*vmGenericLoadObject(robj
*key
, int preview
) {
7875 redisAssert(key
->storage
== REDIS_VM_SWAPPED
|| key
->storage
== REDIS_VM_LOADING
);
7876 val
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
);
7878 key
->storage
= REDIS_VM_MEMORY
;
7879 key
->vm
.atime
= server
.unixtime
;
7880 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
7881 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk",
7882 (unsigned char*) key
->ptr
);
7883 server
.vm_stats_swapped_objects
--;
7885 redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk",
7886 (unsigned char*) key
->ptr
);
7888 server
.vm_stats_swapins
++;
7892 /* Plain object loading, from swap to memory */
7893 static robj
*vmLoadObject(robj
*key
) {
7894 /* If we are loading the object in background, stop it, we
7895 * need to load this object synchronously ASAP. */
7896 if (key
->storage
== REDIS_VM_LOADING
)
7897 vmCancelThreadedIOJob(key
);
7898 return vmGenericLoadObject(key
,0);
7901 /* Just load the value on disk, without to modify the key.
7902 * This is useful when we want to perform some operation on the value
7903 * without to really bring it from swap to memory, like while saving the
7904 * dataset or rewriting the append only log. */
7905 static robj
*vmPreviewObject(robj
*key
) {
7906 return vmGenericLoadObject(key
,1);
7909 /* How a good candidate is this object for swapping?
7910 * The better candidate it is, the greater the returned value.
7912 * Currently we try to perform a fast estimation of the object size in
7913 * memory, and combine it with aging informations.
7915 * Basically swappability = idle-time * log(estimated size)
7917 * Bigger objects are preferred over smaller objects, but not
7918 * proportionally, this is why we use the logarithm. This algorithm is
7919 * just a first try and will probably be tuned later. */
7920 static double computeObjectSwappability(robj
*o
) {
7921 time_t age
= server
.unixtime
- o
->vm
.atime
;
7925 struct dictEntry
*de
;
7928 if (age
<= 0) return 0;
7931 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
7934 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
7939 listNode
*ln
= listFirst(l
);
7941 asize
= sizeof(list
);
7943 robj
*ele
= ln
->value
;
7946 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
7947 (sizeof(*o
)+sdslen(ele
->ptr
)) :
7949 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
7954 z
= (o
->type
== REDIS_ZSET
);
7955 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
7957 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
7958 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
7963 de
= dictGetRandomKey(d
);
7964 ele
= dictGetEntryKey(de
);
7965 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
7966 (sizeof(*o
)+sdslen(ele
->ptr
)) :
7968 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
7969 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
7973 return (double)age
*log(1+asize
);
7976 /* Try to swap an object that's a good candidate for swapping.
7977 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
7978 * to swap any object at all.
7980 * If 'usethreaded' is true, Redis will try to swap the object in background
7981 * using I/O threads. */
7982 static int vmSwapOneObject(int usethreads
) {
7984 struct dictEntry
*best
= NULL
;
7985 double best_swappability
= 0;
7986 redisDb
*best_db
= NULL
;
7989 for (j
= 0; j
< server
.dbnum
; j
++) {
7990 redisDb
*db
= server
.db
+j
;
7991 /* Why maxtries is set to 100?
7992 * Because this way (usually) we'll find 1 object even if just 1% - 2%
7993 * are swappable objects */
7996 if (dictSize(db
->dict
) == 0) continue;
7997 for (i
= 0; i
< 5; i
++) {
7999 double swappability
;
8001 if (maxtries
) maxtries
--;
8002 de
= dictGetRandomKey(db
->dict
);
8003 key
= dictGetEntryKey(de
);
8004 val
= dictGetEntryVal(de
);
8005 /* Only swap objects that are currently in memory.
8007 * Also don't swap shared objects if threaded VM is on, as we
8008 * try to ensure that the main thread does not touch the
8009 * object while the I/O thread is using it, but we can't
8010 * control other keys without adding additional mutex. */
8011 if (key
->storage
!= REDIS_VM_MEMORY
||
8012 (server
.vm_max_threads
!= 0 && val
->refcount
!= 1)) {
8013 if (maxtries
) i
--; /* don't count this try */
8016 swappability
= computeObjectSwappability(val
);
8017 if (!best
|| swappability
> best_swappability
) {
8019 best_swappability
= swappability
;
8024 if (best
== NULL
) return REDIS_ERR
;
8025 key
= dictGetEntryKey(best
);
8026 val
= dictGetEntryVal(best
);
8028 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
8029 key
->ptr
, best_swappability
);
8031 /* Unshare the key if needed */
8032 if (key
->refcount
> 1) {
8033 robj
*newkey
= dupStringObject(key
);
8035 key
= dictGetEntryKey(best
) = newkey
;
8039 vmSwapObjectThreaded(key
,val
,best_db
);
8042 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
8043 dictGetEntryVal(best
) = NULL
;
8051 static int vmSwapOneObjectBlocking() {
8052 return vmSwapOneObject(0);
8055 static int vmSwapOneObjectThreaded() {
8056 return vmSwapOneObject(1);
8059 /* Return true if it's safe to swap out objects in a given moment.
8060 * Basically we don't want to swap objects out while there is a BGSAVE
8061 * or a BGAEOREWRITE running in backgroud. */
8062 static int vmCanSwapOut(void) {
8063 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
8066 /* Delete a key if swapped. Returns 1 if the key was found, was swapped
8067 * and was deleted. Otherwise 0 is returned. */
8068 static int deleteIfSwapped(redisDb
*db
, robj
*key
) {
8072 if ((de
= dictFind(db
->dict
,key
)) == NULL
) return 0;
8073 foundkey
= dictGetEntryKey(de
);
8074 if (foundkey
->storage
== REDIS_VM_MEMORY
) return 0;
8079 /* =================== Virtual Memory - Threaded I/O ======================= */
8081 static void freeIOJob(iojob
*j
) {
8082 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
8083 j
->type
== REDIS_IOJOB_DO_SWAP
||
8084 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
8085 decrRefCount(j
->val
);
8086 decrRefCount(j
->key
);
8090 /* Every time a thread finished a Job, it writes a byte into the write side
8091 * of an unix pipe in order to "awake" the main thread, and this function
8093 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
8097 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
8099 REDIS_NOTUSED(mask
);
8100 REDIS_NOTUSED(privdata
);
8102 /* For every byte we read in the read side of the pipe, there is one
8103 * I/O job completed to process. */
8104 while((retval
= read(fd
,buf
,1)) == 1) {
8108 struct dictEntry
*de
;
8110 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
8112 /* Get the processed element (the oldest one) */
8114 assert(listLength(server
.io_processed
) != 0);
8115 if (toprocess
== -1) {
8116 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
8117 if (toprocess
<= 0) toprocess
= 1;
8119 ln
= listFirst(server
.io_processed
);
8121 listDelNode(server
.io_processed
,ln
);
8123 /* If this job is marked as canceled, just ignore it */
8128 /* Post process it in the main thread, as there are things we
8129 * can do just here to avoid race conditions and/or invasive locks */
8130 redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
);
8131 de
= dictFind(j
->db
->dict
,j
->key
);
8133 key
= dictGetEntryKey(de
);
8134 if (j
->type
== REDIS_IOJOB_LOAD
) {
8137 /* Key loaded, bring it at home */
8138 key
->storage
= REDIS_VM_MEMORY
;
8139 key
->vm
.atime
= server
.unixtime
;
8140 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8141 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
8142 (unsigned char*) key
->ptr
);
8143 server
.vm_stats_swapped_objects
--;
8144 server
.vm_stats_swapins
++;
8145 dictGetEntryVal(de
) = j
->val
;
8146 incrRefCount(j
->val
);
8149 /* Handle clients waiting for this key to be loaded. */
8150 handleClientsBlockedOnSwappedKey(db
,key
);
8151 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
8152 /* Now we know the amount of pages required to swap this object.
8153 * Let's find some space for it, and queue this task again
8154 * rebranded as REDIS_IOJOB_DO_SWAP. */
8155 if (!vmCanSwapOut() ||
8156 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
8158 /* Ooops... no space or we can't swap as there is
8159 * a fork()ed Redis trying to save stuff on disk. */
8161 key
->storage
= REDIS_VM_MEMORY
; /* undo operation */
8163 /* Note that we need to mark this pages as used now,
8164 * if the job will be canceled, we'll mark them as freed
8166 vmMarkPagesUsed(j
->page
,j
->pages
);
8167 j
->type
= REDIS_IOJOB_DO_SWAP
;
8172 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
8175 /* Key swapped. We can finally free some memory. */
8176 if (key
->storage
!= REDIS_VM_SWAPPING
) {
8177 printf("key->storage: %d\n",key
->storage
);
8178 printf("key->name: %s\n",(char*)key
->ptr
);
8179 printf("key->refcount: %d\n",key
->refcount
);
8180 printf("val: %p\n",(void*)j
->val
);
8181 printf("val->type: %d\n",j
->val
->type
);
8182 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
8184 redisAssert(key
->storage
== REDIS_VM_SWAPPING
);
8185 val
= dictGetEntryVal(de
);
8186 key
->vm
.page
= j
->page
;
8187 key
->vm
.usedpages
= j
->pages
;
8188 key
->storage
= REDIS_VM_SWAPPED
;
8189 key
->vtype
= j
->val
->type
;
8190 decrRefCount(val
); /* Deallocate the object from memory. */
8191 dictGetEntryVal(de
) = NULL
;
8192 redisLog(REDIS_DEBUG
,
8193 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
8194 (unsigned char*) key
->ptr
,
8195 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
8196 server
.vm_stats_swapped_objects
++;
8197 server
.vm_stats_swapouts
++;
8199 /* Put a few more swap requests in queue if we are still
8201 if (trytoswap
&& vmCanSwapOut() &&
8202 zmalloc_used_memory() > server
.vm_max_memory
)
8207 more
= listLength(server
.io_newjobs
) <
8208 (unsigned) server
.vm_max_threads
;
8210 /* Don't waste CPU time if swappable objects are rare. */
8211 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
8219 if (processed
== toprocess
) return;
8221 if (retval
< 0 && errno
!= EAGAIN
) {
8222 redisLog(REDIS_WARNING
,
8223 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
8228 static void lockThreadedIO(void) {
8229 pthread_mutex_lock(&server
.io_mutex
);
8232 static void unlockThreadedIO(void) {
8233 pthread_mutex_unlock(&server
.io_mutex
);
8236 /* Remove the specified object from the threaded I/O queue if still not
8237 * processed, otherwise make sure to flag it as canceled. */
8238 static void vmCancelThreadedIOJob(robj
*o
) {
8240 server
.io_newjobs
, /* 0 */
8241 server
.io_processing
, /* 1 */
8242 server
.io_processed
/* 2 */
8246 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
8249 /* Search for a matching key in one of the queues */
8250 for (i
= 0; i
< 3; i
++) {
8254 listRewind(lists
[i
],&li
);
8255 while ((ln
= listNext(&li
)) != NULL
) {
8256 iojob
*job
= ln
->value
;
8258 if (job
->canceled
) continue; /* Skip this, already canceled. */
8259 if (compareStringObjects(job
->key
,o
) == 0) {
8260 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n",
8261 (void*)job
, (char*)o
->ptr
, job
->type
, i
);
8262 /* Mark the pages as free since the swap didn't happened
8263 * or happened but is now discarded. */
8264 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
8265 vmMarkPagesFree(job
->page
,job
->pages
);
8266 /* Cancel the job. It depends on the list the job is
8269 case 0: /* io_newjobs */
8270 /* If the job was yet not processed the best thing to do
8271 * is to remove it from the queue at all */
8273 listDelNode(lists
[i
],ln
);
8275 case 1: /* io_processing */
8276 /* Oh Shi- the thread is messing with the Job:
8278 * Probably it's accessing the object if this is a
8279 * PREPARE_SWAP or DO_SWAP job.
8280 * If it's a LOAD job it may be reading from disk and
8281 * if we don't wait for the job to terminate before to
8282 * cancel it, maybe in a few microseconds data can be
8283 * corrupted in this pages. So the short story is:
8285 * Better to wait for the job to move into the
8286 * next queue (processed)... */
8288 /* We try again and again until the job is completed. */
8290 /* But let's wait some time for the I/O thread
8291 * to finish with this job. After all this condition
8292 * should be very rare. */
8295 case 2: /* io_processed */
8296 /* The job was already processed, that's easy...
8297 * just mark it as canceled so that we'll ignore it
8298 * when processing completed jobs. */
8302 /* Finally we have to adjust the storage type of the object
8303 * in order to "UNDO" the operaiton. */
8304 if (o
->storage
== REDIS_VM_LOADING
)
8305 o
->storage
= REDIS_VM_SWAPPED
;
8306 else if (o
->storage
== REDIS_VM_SWAPPING
)
8307 o
->storage
= REDIS_VM_MEMORY
;
8314 assert(1 != 1); /* We should never reach this */
8317 static void *IOThreadEntryPoint(void *arg
) {
8322 pthread_detach(pthread_self());
8324 /* Get a new job to process */
8326 if (listLength(server
.io_newjobs
) == 0) {
8327 /* No new jobs in queue, exit. */
8328 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
8329 (long) pthread_self());
8330 server
.io_active_threads
--;
8334 ln
= listFirst(server
.io_newjobs
);
8336 listDelNode(server
.io_newjobs
,ln
);
8337 /* Add the job in the processing queue */
8338 j
->thread
= pthread_self();
8339 listAddNodeTail(server
.io_processing
,j
);
8340 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
8342 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
8343 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
8345 /* Process the Job */
8346 if (j
->type
== REDIS_IOJOB_LOAD
) {
8347 j
->val
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
);
8348 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
8349 FILE *fp
= fopen("/dev/null","w+");
8350 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
8352 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
8353 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
8357 /* Done: insert the job into the processed queue */
8358 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
8359 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
8361 listDelNode(server
.io_processing
,ln
);
8362 listAddNodeTail(server
.io_processed
,j
);
8365 /* Signal the main thread there is new stuff to process */
8366 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
8368 return NULL
; /* never reached */
8371 static void spawnIOThread(void) {
8373 sigset_t mask
, omask
;
8376 sigaddset(&mask
,SIGCHLD
);
8377 sigaddset(&mask
,SIGHUP
);
8378 sigaddset(&mask
,SIGPIPE
);
8379 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
8380 pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
);
8381 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
8382 server
.io_active_threads
++;
8385 /* We need to wait for the last thread to exit before we are able to
8386 * fork() in order to BGSAVE or BGREWRITEAOF. */
8387 static void waitEmptyIOJobsQueue(void) {
8389 int io_processed_len
;
8392 if (listLength(server
.io_newjobs
) == 0 &&
8393 listLength(server
.io_processing
) == 0 &&
8394 server
.io_active_threads
== 0)
8399 /* While waiting for empty jobs queue condition we post-process some
8400 * finshed job, as I/O threads may be hanging trying to write against
8401 * the io_ready_pipe_write FD but there are so much pending jobs that
8403 io_processed_len
= listLength(server
.io_processed
);
8405 if (io_processed_len
) {
8406 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
8407 usleep(1000); /* 1 millisecond */
8409 usleep(10000); /* 10 milliseconds */
8414 static void vmReopenSwapFile(void) {
8415 /* Note: we don't close the old one as we are in the child process
8416 * and don't want to mess at all with the original file object. */
8417 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
8418 if (server
.vm_fp
== NULL
) {
8419 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
8420 server
.vm_swap_file
);
8423 server
.vm_fd
= fileno(server
.vm_fp
);
8426 /* This function must be called while with threaded IO locked */
8427 static void queueIOJob(iojob
*j
) {
8428 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
8429 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
8430 listAddNodeTail(server
.io_newjobs
,j
);
8431 if (server
.io_active_threads
< server
.vm_max_threads
)
8435 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
8438 assert(key
->storage
== REDIS_VM_MEMORY
);
8439 assert(key
->refcount
== 1);
8441 j
= zmalloc(sizeof(*j
));
8442 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
8444 j
->key
= dupStringObject(key
);
8448 j
->thread
= (pthread_t
) -1;
8449 key
->storage
= REDIS_VM_SWAPPING
;
8457 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
8459 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
8460 * If there is not already a job loading the key, it is craeted.
8461 * The key is added to the io_keys list in the client structure, and also
8462 * in the hash table mapping swapped keys to waiting clients, that is,
8463 * server.io_waited_keys. */
8464 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
8465 struct dictEntry
*de
;
8469 /* If the key does not exist or is already in RAM we don't need to
8470 * block the client at all. */
8471 de
= dictFind(c
->db
->dict
,key
);
8472 if (de
== NULL
) return 0;
8473 o
= dictGetEntryKey(de
);
8474 if (o
->storage
== REDIS_VM_MEMORY
) {
8476 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
8477 /* We were swapping the key, undo it! */
8478 vmCancelThreadedIOJob(o
);
8482 /* OK: the key is either swapped, or being loaded just now. */
8484 /* Add the key to the list of keys this client is waiting for.
8485 * This maps clients to keys they are waiting for. */
8486 listAddNodeTail(c
->io_keys
,key
);
8489 /* Add the client to the swapped keys => clients waiting map. */
8490 de
= dictFind(c
->db
->io_keys
,key
);
8494 /* For every key we take a list of clients blocked for it */
8496 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
8498 assert(retval
== DICT_OK
);
8500 l
= dictGetEntryVal(de
);
8502 listAddNodeTail(l
,c
);
8504 /* Are we already loading the key from disk? If not create a job */
8505 if (o
->storage
== REDIS_VM_SWAPPED
) {
8508 o
->storage
= REDIS_VM_LOADING
;
8509 j
= zmalloc(sizeof(*j
));
8510 j
->type
= REDIS_IOJOB_LOAD
;
8512 j
->key
= dupStringObject(key
);
8513 j
->key
->vtype
= o
->vtype
;
8514 j
->page
= o
->vm
.page
;
8517 j
->thread
= (pthread_t
) -1;
8525 /* Is this client attempting to run a command against swapped keys?
8526 * If so, block it ASAP, load the keys in background, then resume it.
8528 * The important idea about this function is that it can fail! If keys will
8529 * still be swapped when the client is resumed, this key lookups will
8530 * just block loading keys from disk. In practical terms this should only
8531 * happen with SORT BY command or if there is a bug in this function.
8533 * Return 1 if the client is marked as blocked, 0 if the client can
8534 * continue as the keys it is going to access appear to be in memory. */
8535 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
) {
8538 if (cmd
->vm_firstkey
== 0) return 0;
8539 last
= cmd
->vm_lastkey
;
8540 if (last
< 0) last
= c
->argc
+last
;
8541 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
)
8542 waitForSwappedKey(c
,c
->argv
[j
]);
8543 /* If the client was blocked for at least one key, mark it as blocked. */
8544 if (listLength(c
->io_keys
)) {
8545 c
->flags
|= REDIS_IO_WAIT
;
8546 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
8547 server
.vm_blocked_clients
++;
8554 /* Remove the 'key' from the list of blocked keys for a given client.
8556 * The function returns 1 when there are no longer blocking keys after
8557 * the current one was removed (and the client can be unblocked). */
8558 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
8562 struct dictEntry
*de
;
8564 /* Remove the key from the list of keys this client is waiting for. */
8565 listRewind(c
->io_keys
,&li
);
8566 while ((ln
= listNext(&li
)) != NULL
) {
8567 if (compareStringObjects(ln
->value
,key
) == 0) {
8568 listDelNode(c
->io_keys
,ln
);
8574 /* Remove the client form the key => waiting clients map. */
8575 de
= dictFind(c
->db
->io_keys
,key
);
8577 l
= dictGetEntryVal(de
);
8578 ln
= listSearchKey(l
,c
);
8581 if (listLength(l
) == 0)
8582 dictDelete(c
->db
->io_keys
,key
);
8584 return listLength(c
->io_keys
) == 0;
8587 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
8588 struct dictEntry
*de
;
8593 de
= dictFind(db
->io_keys
,key
);
8596 l
= dictGetEntryVal(de
);
8597 len
= listLength(l
);
8598 /* Note: we can't use something like while(listLength(l)) as the list
8599 * can be freed by the calling function when we remove the last element. */
8602 redisClient
*c
= ln
->value
;
8604 if (dontWaitForSwappedKey(c
,key
)) {
8605 /* Put the client in the list of clients ready to go as we
8606 * loaded all the keys about it. */
8607 listAddNodeTail(server
.io_ready_clients
,c
);
8612 /* ================================= Debugging ============================== */
8614 static void debugCommand(redisClient
*c
) {
8615 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
8617 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
8618 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
8619 addReply(c
,shared
.err
);
8623 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
8624 addReply(c
,shared
.err
);
8627 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
8628 addReply(c
,shared
.ok
);
8629 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
8631 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
8632 addReply(c
,shared
.err
);
8635 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
8636 addReply(c
,shared
.ok
);
8637 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
8638 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
8642 addReply(c
,shared
.nokeyerr
);
8645 key
= dictGetEntryKey(de
);
8646 val
= dictGetEntryVal(de
);
8647 if (!server
.vm_enabled
|| (key
->storage
== REDIS_VM_MEMORY
||
8648 key
->storage
== REDIS_VM_SWAPPING
)) {
8649 addReplySds(c
,sdscatprintf(sdsempty(),
8650 "+Key at:%p refcount:%d, value at:%p refcount:%d "
8651 "encoding:%d serializedlength:%lld\r\n",
8652 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
,
8653 val
->encoding
, (long long) rdbSavedObjectLen(val
,NULL
)));
8655 addReplySds(c
,sdscatprintf(sdsempty(),
8656 "+Key at:%p refcount:%d, value swapped at: page %llu "
8657 "using %llu pages\r\n",
8658 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
,
8659 (unsigned long long) key
->vm
.usedpages
));
8661 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
8662 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
8665 if (!server
.vm_enabled
) {
8666 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
8670 addReply(c
,shared
.nokeyerr
);
8673 key
= dictGetEntryKey(de
);
8674 val
= dictGetEntryVal(de
);
8675 /* If the key is shared we want to create a copy */
8676 if (key
->refcount
> 1) {
8677 robj
*newkey
= dupStringObject(key
);
8679 key
= dictGetEntryKey(de
) = newkey
;
8682 if (key
->storage
!= REDIS_VM_MEMORY
) {
8683 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
8684 } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
8685 dictGetEntryVal(de
) = NULL
;
8686 addReply(c
,shared
.ok
);
8688 addReply(c
,shared
.err
);
8691 addReplySds(c
,sdsnew(
8692 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPOUT <key>|RELOAD]\r\n"));
8696 static void _redisAssert(char *estr
, char *file
, int line
) {
8697 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
8698 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true\n",file
,line
,estr
);
8699 #ifdef HAVE_BACKTRACE
8700 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
8705 /* =================================== Main! ================================ */
8708 int linuxOvercommitMemoryValue(void) {
8709 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
8713 if (fgets(buf
,64,fp
) == NULL
) {
8722 void linuxOvercommitMemoryWarning(void) {
8723 if (linuxOvercommitMemoryValue() == 0) {
8724 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low condition memory. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
8727 #endif /* __linux__ */
8729 static void daemonize(void) {
8733 if (fork() != 0) exit(0); /* parent exits */
8734 setsid(); /* create a new session */
8736 /* Every output goes to /dev/null. If Redis is daemonized but
8737 * the 'logfile' is set to 'stdout' in the configuration file
8738 * it will not log at all. */
8739 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
8740 dup2(fd
, STDIN_FILENO
);
8741 dup2(fd
, STDOUT_FILENO
);
8742 dup2(fd
, STDERR_FILENO
);
8743 if (fd
> STDERR_FILENO
) close(fd
);
8745 /* Try to write the pid file */
8746 fp
= fopen(server
.pidfile
,"w");
8748 fprintf(fp
,"%d\n",getpid());
8753 int main(int argc
, char **argv
) {
8758 resetServerSaveParams();
8759 loadServerConfig(argv
[1]);
8760 } else if (argc
> 2) {
8761 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
8764 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
8766 if (server
.daemonize
) daemonize();
8768 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
8770 linuxOvercommitMemoryWarning();
8773 if (server
.appendonly
) {
8774 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
8775 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
8777 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
8778 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
8780 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
8781 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
8783 aeDeleteEventLoop(server
.el
);
8787 /* ============================= Backtrace support ========================= */
8789 #ifdef HAVE_BACKTRACE
8790 static char *findFuncName(void *pointer
, unsigned long *offset
);
8792 static void *getMcontextEip(ucontext_t
*uc
) {
8793 #if defined(__FreeBSD__)
8794 return (void*) uc
->uc_mcontext
.mc_eip
;
8795 #elif defined(__dietlibc__)
8796 return (void*) uc
->uc_mcontext
.eip
;
8797 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
8799 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
8801 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
8803 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
8804 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
8805 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
8807 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
8809 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
8810 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
8811 #elif defined(__ia64__) /* Linux IA64 */
8812 return (void*) uc
->uc_mcontext
.sc_ip
;
8818 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
8820 char **messages
= NULL
;
8821 int i
, trace_size
= 0;
8822 unsigned long offset
=0;
8823 ucontext_t
*uc
= (ucontext_t
*) secret
;
8825 REDIS_NOTUSED(info
);
8827 redisLog(REDIS_WARNING
,
8828 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
8829 infostring
= genRedisInfoString();
8830 redisLog(REDIS_WARNING
, "%s",infostring
);
8831 /* It's not safe to sdsfree() the returned string under memory
8832 * corruption conditions. Let it leak as we are going to abort */
8834 trace_size
= backtrace(trace
, 100);
8835 /* overwrite sigaction with caller's address */
8836 if (getMcontextEip(uc
) != NULL
) {
8837 trace
[1] = getMcontextEip(uc
);
8839 messages
= backtrace_symbols(trace
, trace_size
);
8841 for (i
=1; i
<trace_size
; ++i
) {
8842 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
8844 p
= strchr(messages
[i
],'+');
8845 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
8846 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
8848 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
8851 /* free(messages); Don't call free() with possibly corrupted memory. */
8855 static void setupSigSegvAction(void) {
8856 struct sigaction act
;
8858 sigemptyset (&act
.sa_mask
);
8859 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
8860 * is used. Otherwise, sa_handler is used */
8861 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
8862 act
.sa_sigaction
= segvHandler
;
8863 sigaction (SIGSEGV
, &act
, NULL
);
8864 sigaction (SIGBUS
, &act
, NULL
);
8865 sigaction (SIGFPE
, &act
, NULL
);
8866 sigaction (SIGILL
, &act
, NULL
);
8867 sigaction (SIGBUS
, &act
, NULL
);
8871 #include "staticsymbols.h"
8872 /* This function try to convert a pointer into a function name. It's used in
8873 * oreder to provide a backtrace under segmentation fault that's able to
8874 * display functions declared as static (otherwise the backtrace is useless). */
8875 static char *findFuncName(void *pointer
, unsigned long *offset
){
8877 unsigned long off
, minoff
= 0;
8879 /* Try to match against the Symbol with the smallest offset */
8880 for (i
=0; symsTable
[i
].pointer
; i
++) {
8881 unsigned long lp
= (unsigned long) pointer
;
8883 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
8884 off
=lp
-symsTable
[i
].pointer
;
8885 if (ret
< 0 || off
< minoff
) {
8891 if (ret
== -1) return NULL
;
8893 return symsTable
[ret
].name
;
8895 #else /* HAVE_BACKTRACE */
8896 static void setupSigSegvAction(void) {
8898 #endif /* HAVE_BACKTRACE */