2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "1.3.4"
40 #define __USE_POSIX199309
47 #endif /* HAVE_BACKTRACE */
55 #include <arpa/inet.h>
59 #include <sys/resource.h>
66 #include "solarisfixes.h"
70 #include "ae.h" /* Event driven programming library */
71 #include "sds.h" /* Dynamic safe strings */
72 #include "anet.h" /* Networking the easy way */
73 #include "dict.h" /* Hash tables */
74 #include "adlist.h" /* Linked lists */
75 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
76 #include "lzf.h" /* LZF compression library */
77 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
84 /* Static server configuration */
85 #define REDIS_SERVERPORT 6379 /* TCP port */
86 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
87 #define REDIS_IOBUF_LEN 1024
88 #define REDIS_LOADBUF_LEN 1024
89 #define REDIS_STATIC_ARGS 4
90 #define REDIS_DEFAULT_DBNUM 16
91 #define REDIS_CONFIGLINE_MAX 1024
92 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
93 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
94 #define REDIS_EXPIRELOOKUPS_PER_CRON 100 /* try to expire 100 keys/second */
95 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
96 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
98 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
99 #define REDIS_WRITEV_THRESHOLD 3
100 /* Max number of iovecs used for each writev call */
101 #define REDIS_WRITEV_IOVEC_COUNT 256
103 /* Hash table parameters */
104 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
107 #define REDIS_CMD_BULK 1 /* Bulk write command */
108 #define REDIS_CMD_INLINE 2 /* Inline command */
109 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
110 this flags will return an error when the 'maxmemory' option is set in the
111 config file and the server is using more than maxmemory bytes of memory.
112 In short this commands are denied on low memory conditions. */
113 #define REDIS_CMD_DENYOOM 4
116 #define REDIS_STRING 0
122 /* Objects encoding. Some kind of objects like Strings and Hashes can be
123 * internally represented in multiple ways. The 'encoding' field of the object
124 * is set to one of this fields for this object. */
125 #define REDIS_ENCODING_RAW 0 /* Raw representation */
126 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
127 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
128 #define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
130 /* Object types only used for dumping to disk */
131 #define REDIS_EXPIRETIME 253
132 #define REDIS_SELECTDB 254
133 #define REDIS_EOF 255
135 /* Defines related to the dump file format. To store 32 bits lengths for short
136 * keys requires a lot of space, so we check the most significant 2 bits of
137 * the first byte to interpreter the length:
139 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
140 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
141 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
142 * 11|000000 this means: specially encoded object will follow. The six bits
143 * number specify the kind of object that follows.
144 * See the REDIS_RDB_ENC_* defines.
146 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
147 * values, will fit inside. */
148 #define REDIS_RDB_6BITLEN 0
149 #define REDIS_RDB_14BITLEN 1
150 #define REDIS_RDB_32BITLEN 2
151 #define REDIS_RDB_ENCVAL 3
152 #define REDIS_RDB_LENERR UINT_MAX
154 /* When a length of a string object stored on disk has the first two bits
155 * set, the remaining two bits specify a special encoding for the object
156 * accordingly to the following defines: */
157 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
158 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
159 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
160 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
162 /* Virtual memory object->where field. */
163 #define REDIS_VM_MEMORY 0 /* The object is on memory */
164 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
165 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
166 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
168 /* Virtual memory static configuration stuff.
169 * Check vmFindContiguousPages() to know more about this magic numbers. */
170 #define REDIS_VM_MAX_NEAR_PAGES 65536
171 #define REDIS_VM_MAX_RANDOM_JUMP 4096
172 #define REDIS_VM_MAX_THREADS 32
173 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
174 /* The following is the *percentage* of completed I/O jobs to process when the
175 * handelr is called. While Virtual Memory I/O operations are performed by
176 * threads, this operations must be processed by the main thread when completed
177 * in order to take effect. */
178 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
181 #define REDIS_SLAVE 1 /* This client is a slave server */
182 #define REDIS_MASTER 2 /* This client is a master server */
183 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
184 #define REDIS_MULTI 8 /* This client is in a MULTI context */
185 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
186 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
188 /* Slave replication state - slave side */
189 #define REDIS_REPL_NONE 0 /* No active replication */
190 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
191 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
193 /* Slave replication state - from the point of view of master
194 * Note that in SEND_BULK and ONLINE state the slave receives new updates
195 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
196 * to start the next background saving in order to send updates to it. */
197 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
198 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
199 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
200 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
202 /* List related stuff */
206 /* Sort operations */
207 #define REDIS_SORT_GET 0
208 #define REDIS_SORT_ASC 1
209 #define REDIS_SORT_DESC 2
210 #define REDIS_SORTKEY_MAX 1024
213 #define REDIS_DEBUG 0
214 #define REDIS_VERBOSE 1
215 #define REDIS_NOTICE 2
216 #define REDIS_WARNING 3
218 /* Anti-warning macro... */
219 #define REDIS_NOTUSED(V) ((void) V)
221 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
222 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
224 /* Append only defines */
225 #define APPENDFSYNC_NO 0
226 #define APPENDFSYNC_ALWAYS 1
227 #define APPENDFSYNC_EVERYSEC 2
229 /* Hashes related defaults */
230 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
231 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
233 /* We can print the stacktrace, so our assert is defined this way: */
234 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
235 static void _redisAssert(char *estr
, char *file
, int line
);
237 /*================================= Data types ============================== */
239 /* A redis object, that is a type able to hold a string / list / set */
241 /* The VM object structure */
242 struct redisObjectVM
{
243 off_t page
; /* the page at witch the object is stored on disk */
244 off_t usedpages
; /* number of pages used on disk */
245 time_t atime
; /* Last access time */
248 /* The actual Redis Object */
249 typedef struct redisObject
{
252 unsigned char encoding
;
253 unsigned char storage
; /* If this object is a key, where is the value?
254 * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
255 unsigned char vtype
; /* If this object is a key, and value is swapped out,
256 * this is the type of the swapped out object. */
258 /* VM fields, this are only allocated if VM is active, otherwise the
259 * object allocation function will just allocate
260 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
261 * Redis without VM active will not have any overhead. */
262 struct redisObjectVM vm
;
265 /* Macro used to initalize a Redis object allocated on the stack.
266 * Note that this macro is taken near the structure definition to make sure
267 * we'll update it when the structure is changed, to avoid bugs like
268 * bug #85 introduced exactly in this way. */
269 #define initStaticStringObject(_var,_ptr) do { \
271 _var.type = REDIS_STRING; \
272 _var.encoding = REDIS_ENCODING_RAW; \
274 if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \
277 typedef struct redisDb
{
278 dict
*dict
; /* The keyspace for this DB */
279 dict
*expires
; /* Timeout of keys with a timeout set */
280 dict
*blockingkeys
; /* Keys with clients waiting for data (BLPOP) */
281 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
285 /* Client MULTI/EXEC state */
286 typedef struct multiCmd
{
289 struct redisCommand
*cmd
;
292 typedef struct multiState
{
293 multiCmd
*commands
; /* Array of MULTI commands */
294 int count
; /* Total number of MULTI commands */
297 /* With multiplexing we need to take per-clinet state.
298 * Clients are taken in a liked list. */
299 typedef struct redisClient
{
304 robj
**argv
, **mbargv
;
306 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
307 int multibulk
; /* multi bulk command format active */
310 time_t lastinteraction
; /* time of the last interaction, used for timeout */
311 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
312 int slaveseldb
; /* slave selected db, if this client is a slave */
313 int authenticated
; /* when requirepass is non-NULL */
314 int replstate
; /* replication state if this is a slave */
315 int repldbfd
; /* replication DB file descriptor */
316 long repldboff
; /* replication DB file offset */
317 off_t repldbsize
; /* replication DB file size */
318 multiState mstate
; /* MULTI/EXEC state */
319 robj
**blockingkeys
; /* The key we are waiting to terminate a blocking
320 * operation such as BLPOP. Otherwise NULL. */
321 int blockingkeysnum
; /* Number of blocking keys */
322 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
323 * is >= blockingto then the operation timed out. */
324 list
*io_keys
; /* Keys this client is waiting to be loaded from the
325 * swap file in order to continue. */
333 /* Global server state structure */
338 dict
*sharingpool
; /* Poll used for object sharing */
339 unsigned int sharingpoolsize
;
340 long long dirty
; /* changes to DB from the last save */
342 list
*slaves
, *monitors
;
343 char neterr
[ANET_ERR_LEN
];
345 int cronloops
; /* number of times the cron function run */
346 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
347 time_t lastsave
; /* Unix time of last save succeeede */
348 /* Fields used only for stats */
349 time_t stat_starttime
; /* server start time */
350 long long stat_numcommands
; /* number of processed commands */
351 long long stat_numconnections
; /* number of connections received */
364 pid_t bgsavechildpid
;
365 pid_t bgrewritechildpid
;
366 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
367 struct saveparam
*saveparams
;
372 char *appendfilename
;
376 /* Replication related */
381 redisClient
*master
; /* client that is master for this slave */
383 unsigned int maxclients
;
384 unsigned long long maxmemory
;
385 unsigned int blpop_blocked_clients
;
386 unsigned int vm_blocked_clients
;
387 /* Sort parameters - qsort_r() is only available under BSD so we
388 * have to take this state global, in order to pass it to sortCompare() */
392 /* Virtual memory configuration */
397 unsigned long long vm_max_memory
;
399 size_t hash_max_zipmap_entries
;
400 size_t hash_max_zipmap_value
;
401 /* Virtual memory state */
404 off_t vm_next_page
; /* Next probably empty page */
405 off_t vm_near_pages
; /* Number of pages allocated sequentially */
406 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
407 time_t unixtime
; /* Unix time sampled every second. */
408 /* Virtual memory I/O threads stuff */
409 /* An I/O thread process an element taken from the io_jobs queue and
410 * put the result of the operation in the io_done list. While the
411 * job is being processed, it's put on io_processing queue. */
412 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
413 list
*io_processing
; /* List of VM I/O jobs being processed */
414 list
*io_processed
; /* List of VM I/O jobs already processed */
415 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
416 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
417 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
418 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
419 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
420 int io_active_threads
; /* Number of running I/O threads */
421 int vm_max_threads
; /* Max number of I/O threads running at the same time */
422 /* Our main thread is blocked on the event loop, locking for sockets ready
423 * to be read or written, so when a threaded I/O operation is ready to be
424 * processed by the main thread, the I/O thread will use a unix pipe to
425 * awake the main thread. The followings are the two pipe FDs. */
426 int io_ready_pipe_read
;
427 int io_ready_pipe_write
;
428 /* Virtual memory stats */
429 unsigned long long vm_stats_used_pages
;
430 unsigned long long vm_stats_swapped_objects
;
431 unsigned long long vm_stats_swapouts
;
432 unsigned long long vm_stats_swapins
;
436 typedef void redisCommandProc(redisClient
*c
);
437 struct redisCommand
{
439 redisCommandProc
*proc
;
442 /* What keys should be loaded in background when calling this command? */
443 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
444 int vm_lastkey
; /* THe last argument that's a key */
445 int vm_keystep
; /* The step between first and last key */
448 struct redisFunctionSym
{
450 unsigned long pointer
;
453 typedef struct _redisSortObject
{
461 typedef struct _redisSortOperation
{
464 } redisSortOperation
;
466 /* ZSETs use a specialized version of Skiplists */
468 typedef struct zskiplistNode
{
469 struct zskiplistNode
**forward
;
470 struct zskiplistNode
*backward
;
476 typedef struct zskiplist
{
477 struct zskiplistNode
*header
, *tail
;
478 unsigned long length
;
482 typedef struct zset
{
487 /* Our shared "common" objects */
489 struct sharedObjectsStruct
{
490 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
491 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
492 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
493 *outofrangeerr
, *plus
,
494 *select0
, *select1
, *select2
, *select3
, *select4
,
495 *select5
, *select6
, *select7
, *select8
, *select9
;
498 /* Global vars that are actally used as constants. The following double
499 * values are used for double on-disk serialization, and are initialized
500 * at runtime to avoid strange compiler optimizations. */
502 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
504 /* VM threaded I/O request message */
505 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
506 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
507 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
508 typedef struct iojob
{
509 int type
; /* Request type, REDIS_IOJOB_* */
510 redisDb
*db
;/* Redis database */
511 robj
*key
; /* This I/O request is about swapping this key */
512 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
513 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
514 off_t page
; /* Swap page where to read/write the object */
515 off_t pages
; /* Swap pages needed to safe object. PREPARE_SWAP return val */
516 int canceled
; /* True if this command was canceled by blocking side of VM */
517 pthread_t thread
; /* ID of the thread processing this entry */
520 /*================================ Prototypes =============================== */
522 static void freeStringObject(robj
*o
);
523 static void freeListObject(robj
*o
);
524 static void freeSetObject(robj
*o
);
525 static void decrRefCount(void *o
);
526 static robj
*createObject(int type
, void *ptr
);
527 static void freeClient(redisClient
*c
);
528 static int rdbLoad(char *filename
);
529 static void addReply(redisClient
*c
, robj
*obj
);
530 static void addReplySds(redisClient
*c
, sds s
);
531 static void incrRefCount(robj
*o
);
532 static int rdbSaveBackground(char *filename
);
533 static robj
*createStringObject(char *ptr
, size_t len
);
534 static robj
*dupStringObject(robj
*o
);
535 static void replicationFeedSlaves(list
*slaves
, struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
536 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
537 static int syncWithMaster(void);
538 static robj
*tryObjectSharing(robj
*o
);
539 static int tryObjectEncoding(robj
*o
);
540 static robj
*getDecodedObject(robj
*o
);
541 static int removeExpire(redisDb
*db
, robj
*key
);
542 static int expireIfNeeded(redisDb
*db
, robj
*key
);
543 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
544 static int deleteIfSwapped(redisDb
*db
, robj
*key
);
545 static int deleteKey(redisDb
*db
, robj
*key
);
546 static time_t getExpire(redisDb
*db
, robj
*key
);
547 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
548 static void updateSlavesWaitingBgsave(int bgsaveerr
);
549 static void freeMemoryIfNeeded(void);
550 static int processCommand(redisClient
*c
);
551 static void setupSigSegvAction(void);
552 static void rdbRemoveTempFile(pid_t childpid
);
553 static void aofRemoveTempFile(pid_t childpid
);
554 static size_t stringObjectLen(robj
*o
);
555 static void processInputBuffer(redisClient
*c
);
556 static zskiplist
*zslCreate(void);
557 static void zslFree(zskiplist
*zsl
);
558 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
559 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
560 static void initClientMultiState(redisClient
*c
);
561 static void freeClientMultiState(redisClient
*c
);
562 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
563 static void unblockClientWaitingData(redisClient
*c
);
564 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
565 static void vmInit(void);
566 static void vmMarkPagesFree(off_t page
, off_t count
);
567 static robj
*vmLoadObject(robj
*key
);
568 static robj
*vmPreviewObject(robj
*key
);
569 static int vmSwapOneObjectBlocking(void);
570 static int vmSwapOneObjectThreaded(void);
571 static int vmCanSwapOut(void);
572 static int tryFreeOneObjectFromFreelist(void);
573 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
574 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
575 static void vmCancelThreadedIOJob(robj
*o
);
576 static void lockThreadedIO(void);
577 static void unlockThreadedIO(void);
578 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
579 static void freeIOJob(iojob
*j
);
580 static void queueIOJob(iojob
*j
);
581 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
582 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
583 static void waitEmptyIOJobsQueue(void);
584 static void vmReopenSwapFile(void);
585 static int vmFreePage(off_t page
);
586 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
);
587 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
588 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
589 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
590 static struct redisCommand
*lookupCommand(char *name
);
591 static void call(redisClient
*c
, struct redisCommand
*cmd
);
592 static void resetClient(redisClient
*c
);
594 static void authCommand(redisClient
*c
);
595 static void pingCommand(redisClient
*c
);
596 static void echoCommand(redisClient
*c
);
597 static void setCommand(redisClient
*c
);
598 static void setnxCommand(redisClient
*c
);
599 static void getCommand(redisClient
*c
);
600 static void delCommand(redisClient
*c
);
601 static void existsCommand(redisClient
*c
);
602 static void incrCommand(redisClient
*c
);
603 static void decrCommand(redisClient
*c
);
604 static void incrbyCommand(redisClient
*c
);
605 static void decrbyCommand(redisClient
*c
);
606 static void selectCommand(redisClient
*c
);
607 static void randomkeyCommand(redisClient
*c
);
608 static void keysCommand(redisClient
*c
);
609 static void dbsizeCommand(redisClient
*c
);
610 static void lastsaveCommand(redisClient
*c
);
611 static void saveCommand(redisClient
*c
);
612 static void bgsaveCommand(redisClient
*c
);
613 static void bgrewriteaofCommand(redisClient
*c
);
614 static void shutdownCommand(redisClient
*c
);
615 static void moveCommand(redisClient
*c
);
616 static void renameCommand(redisClient
*c
);
617 static void renamenxCommand(redisClient
*c
);
618 static void lpushCommand(redisClient
*c
);
619 static void rpushCommand(redisClient
*c
);
620 static void lpopCommand(redisClient
*c
);
621 static void rpopCommand(redisClient
*c
);
622 static void llenCommand(redisClient
*c
);
623 static void lindexCommand(redisClient
*c
);
624 static void lrangeCommand(redisClient
*c
);
625 static void ltrimCommand(redisClient
*c
);
626 static void typeCommand(redisClient
*c
);
627 static void lsetCommand(redisClient
*c
);
628 static void saddCommand(redisClient
*c
);
629 static void sremCommand(redisClient
*c
);
630 static void smoveCommand(redisClient
*c
);
631 static void sismemberCommand(redisClient
*c
);
632 static void scardCommand(redisClient
*c
);
633 static void spopCommand(redisClient
*c
);
634 static void srandmemberCommand(redisClient
*c
);
635 static void sinterCommand(redisClient
*c
);
636 static void sinterstoreCommand(redisClient
*c
);
637 static void sunionCommand(redisClient
*c
);
638 static void sunionstoreCommand(redisClient
*c
);
639 static void sdiffCommand(redisClient
*c
);
640 static void sdiffstoreCommand(redisClient
*c
);
641 static void syncCommand(redisClient
*c
);
642 static void flushdbCommand(redisClient
*c
);
643 static void flushallCommand(redisClient
*c
);
644 static void sortCommand(redisClient
*c
);
645 static void lremCommand(redisClient
*c
);
646 static void rpoplpushcommand(redisClient
*c
);
647 static void infoCommand(redisClient
*c
);
648 static void mgetCommand(redisClient
*c
);
649 static void monitorCommand(redisClient
*c
);
650 static void expireCommand(redisClient
*c
);
651 static void expireatCommand(redisClient
*c
);
652 static void getsetCommand(redisClient
*c
);
653 static void ttlCommand(redisClient
*c
);
654 static void slaveofCommand(redisClient
*c
);
655 static void debugCommand(redisClient
*c
);
656 static void msetCommand(redisClient
*c
);
657 static void msetnxCommand(redisClient
*c
);
658 static void zaddCommand(redisClient
*c
);
659 static void zincrbyCommand(redisClient
*c
);
660 static void zrangeCommand(redisClient
*c
);
661 static void zrangebyscoreCommand(redisClient
*c
);
662 static void zcountCommand(redisClient
*c
);
663 static void zrevrangeCommand(redisClient
*c
);
664 static void zcardCommand(redisClient
*c
);
665 static void zremCommand(redisClient
*c
);
666 static void zscoreCommand(redisClient
*c
);
667 static void zremrangebyscoreCommand(redisClient
*c
);
668 static void multiCommand(redisClient
*c
);
669 static void execCommand(redisClient
*c
);
670 static void discardCommand(redisClient
*c
);
671 static void blpopCommand(redisClient
*c
);
672 static void brpopCommand(redisClient
*c
);
673 static void appendCommand(redisClient
*c
);
674 static void substrCommand(redisClient
*c
);
675 static void zrankCommand(redisClient
*c
);
676 static void hsetCommand(redisClient
*c
);
677 static void hgetCommand(redisClient
*c
);
678 static void zunionCommand(redisClient
*c
);
679 static void zinterCommand(redisClient
*c
);
681 /*================================= Globals ================================= */
684 static struct redisServer server
; /* server global state */
685 static struct redisCommand cmdTable
[] = {
686 {"get",getCommand
,2,REDIS_CMD_INLINE
,1,1,1},
687 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,0,0,0},
688 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,0,0,0},
689 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1},
690 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,1,1,1},
691 {"del",delCommand
,-2,REDIS_CMD_INLINE
,0,0,0},
692 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,1,1,1},
693 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,1,1},
694 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,1,1},
695 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,1,-1,1},
696 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1},
697 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1},
698 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,1,1,1},
699 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,1,1,1},
700 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,1,1,1},
701 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,1,1,1},
702 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,1,1,1},
703 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,1,1,1},
704 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1},
705 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,1,1,1},
706 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,1,1,1},
707 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,1,1,1},
708 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,2,1},
709 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1},
710 {"srem",sremCommand
,3,REDIS_CMD_BULK
,1,1,1},
711 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,1,2,1},
712 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,1,1,1},
713 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,1,1,1},
714 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,1,1,1},
715 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,1,1,1},
716 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,-1,1},
717 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,2,-1,1},
718 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,-1,1},
719 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,2,-1,1},
720 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,-1,1},
721 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,2,-1,1},
722 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,1,1,1},
723 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1},
724 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1},
725 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,1,1,1},
726 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,1,1,1},
727 {"zunion",zunionCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,0,0,0},
728 {"zinter",zinterCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,0,0,0},
729 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,1,1,1},
730 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,1,1,1},
731 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,1,1,1},
732 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,1,1,1},
733 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,1,1,1},
734 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1},
735 {"zrank",zrankCommand
,3,REDIS_CMD_INLINE
,1,1,1},
736 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1},
737 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,1,1,1},
738 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,1,1},
739 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,1,1},
740 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,1,1},
741 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,-1,2},
742 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,1,-1,2},
743 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,0,0,0},
744 {"select",selectCommand
,2,REDIS_CMD_INLINE
,0,0,0},
745 {"move",moveCommand
,3,REDIS_CMD_INLINE
,1,1,1},
746 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,1,1,1},
747 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,1,1,1},
748 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,0,0,0},
749 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,0,0,0},
750 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,0,0,0},
751 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,0,0,0},
752 {"auth",authCommand
,2,REDIS_CMD_INLINE
,0,0,0},
753 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,0,0,0},
754 {"echo",echoCommand
,2,REDIS_CMD_BULK
,0,0,0},
755 {"save",saveCommand
,1,REDIS_CMD_INLINE
,0,0,0},
756 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,0,0,0},
757 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,0,0,0},
758 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,0,0,0},
759 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,0,0,0},
760 {"type",typeCommand
,2,REDIS_CMD_INLINE
,1,1,1},
761 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,0,0,0},
762 {"exec",execCommand
,1,REDIS_CMD_INLINE
,0,0,0},
763 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,0,0,0},
764 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,0,0,0},
765 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,0,0,0},
766 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,0,0,0},
767 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,1,1,1},
768 {"info",infoCommand
,1,REDIS_CMD_INLINE
,0,0,0},
769 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,0,0,0},
770 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,1,1,1},
771 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,0,0,0},
772 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,0,0,0},
773 {NULL
,NULL
,0,0,0,0,0}
776 /*============================ Utility functions ============================ */
778 /* Glob-style pattern matching. */
779 int stringmatchlen(const char *pattern
, int patternLen
,
780 const char *string
, int stringLen
, int nocase
)
785 while (pattern
[1] == '*') {
790 return 1; /* match */
792 if (stringmatchlen(pattern
+1, patternLen
-1,
793 string
, stringLen
, nocase
))
794 return 1; /* match */
798 return 0; /* no match */
802 return 0; /* no match */
812 not = pattern
[0] == '^';
819 if (pattern
[0] == '\\') {
822 if (pattern
[0] == string
[0])
824 } else if (pattern
[0] == ']') {
826 } else if (patternLen
== 0) {
830 } else if (pattern
[1] == '-' && patternLen
>= 3) {
831 int start
= pattern
[0];
832 int end
= pattern
[2];
840 start
= tolower(start
);
846 if (c
>= start
&& c
<= end
)
850 if (pattern
[0] == string
[0])
853 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
863 return 0; /* no match */
869 if (patternLen
>= 2) {
876 if (pattern
[0] != string
[0])
877 return 0; /* no match */
879 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
880 return 0; /* no match */
888 if (stringLen
== 0) {
889 while(*pattern
== '*') {
896 if (patternLen
== 0 && stringLen
== 0)
901 static void redisLog(int level
, const char *fmt
, ...) {
905 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
909 if (level
>= server
.verbosity
) {
915 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
916 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
917 vfprintf(fp
, fmt
, ap
);
923 if (server
.logfile
) fclose(fp
);
926 /*====================== Hash table type implementation ==================== */
928 /* This is an hash table type that uses the SDS dynamic strings libary as
929 * keys and radis objects as values (objects can hold SDS strings,
932 static void dictVanillaFree(void *privdata
, void *val
)
934 DICT_NOTUSED(privdata
);
938 static void dictListDestructor(void *privdata
, void *val
)
940 DICT_NOTUSED(privdata
);
941 listRelease((list
*)val
);
944 static int sdsDictKeyCompare(void *privdata
, const void *key1
,
948 DICT_NOTUSED(privdata
);
950 l1
= sdslen((sds
)key1
);
951 l2
= sdslen((sds
)key2
);
952 if (l1
!= l2
) return 0;
953 return memcmp(key1
, key2
, l1
) == 0;
956 static void dictRedisObjectDestructor(void *privdata
, void *val
)
958 DICT_NOTUSED(privdata
);
960 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
964 static int dictObjKeyCompare(void *privdata
, const void *key1
,
967 const robj
*o1
= key1
, *o2
= key2
;
968 return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
971 static unsigned int dictObjHash(const void *key
) {
973 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
976 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
979 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
982 o1
= getDecodedObject(o1
);
983 o2
= getDecodedObject(o2
);
984 cmp
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
990 static unsigned int dictEncObjHash(const void *key
) {
991 robj
*o
= (robj
*) key
;
993 if (o
->encoding
== REDIS_ENCODING_RAW
) {
994 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
996 if (o
->encoding
== REDIS_ENCODING_INT
) {
1000 len
= snprintf(buf
,32,"%ld",(long)o
->ptr
);
1001 return dictGenHashFunction((unsigned char*)buf
, len
);
1005 o
= getDecodedObject(o
);
1006 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1013 /* Sets type and expires */
1014 static dictType setDictType
= {
1015 dictEncObjHash
, /* hash function */
1018 dictEncObjKeyCompare
, /* key compare */
1019 dictRedisObjectDestructor
, /* key destructor */
1020 NULL
/* val destructor */
1023 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1024 static dictType zsetDictType
= {
1025 dictEncObjHash
, /* hash function */
1028 dictEncObjKeyCompare
, /* key compare */
1029 dictRedisObjectDestructor
, /* key destructor */
1030 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1034 static dictType dbDictType
= {
1035 dictObjHash
, /* hash function */
1038 dictObjKeyCompare
, /* key compare */
1039 dictRedisObjectDestructor
, /* key destructor */
1040 dictRedisObjectDestructor
/* val destructor */
1044 static dictType keyptrDictType
= {
1045 dictObjHash
, /* hash function */
1048 dictObjKeyCompare
, /* key compare */
1049 dictRedisObjectDestructor
, /* key destructor */
1050 NULL
/* val destructor */
1053 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1054 static dictType hashDictType
= {
1055 dictEncObjHash
, /* hash function */
1058 dictEncObjKeyCompare
, /* key compare */
1059 dictRedisObjectDestructor
, /* key destructor */
1060 dictRedisObjectDestructor
/* val destructor */
1063 /* Keylist hash table type has unencoded redis objects as keys and
1064 * lists as values. It's used for blocking operations (BLPOP) and to
1065 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1066 static dictType keylistDictType
= {
1067 dictObjHash
, /* hash function */
1070 dictObjKeyCompare
, /* key compare */
1071 dictRedisObjectDestructor
, /* key destructor */
1072 dictListDestructor
/* val destructor */
1075 /* ========================= Random utility functions ======================= */
1077 /* Redis generally does not try to recover from out of memory conditions
1078 * when allocating objects or strings, it is not clear if it will be possible
1079 * to report this condition to the client since the networking layer itself
1080 * is based on heap allocation for send buffers, so we simply abort.
1081 * At least the code will be simpler to read... */
1082 static void oom(const char *msg
) {
1083 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1088 /* ====================== Redis server networking stuff ===================== */
1089 static void closeTimedoutClients(void) {
1092 time_t now
= time(NULL
);
1095 listRewind(server
.clients
,&li
);
1096 while ((ln
= listNext(&li
)) != NULL
) {
1097 c
= listNodeValue(ln
);
1098 if (server
.maxidletime
&&
1099 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1100 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1101 (now
- c
->lastinteraction
> server
.maxidletime
))
1103 redisLog(REDIS_VERBOSE
,"Closing idle client");
1105 } else if (c
->flags
& REDIS_BLOCKED
) {
1106 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1107 addReply(c
,shared
.nullmultibulk
);
1108 unblockClientWaitingData(c
);
1114 static int htNeedsResize(dict
*dict
) {
1115 long long size
, used
;
1117 size
= dictSlots(dict
);
1118 used
= dictSize(dict
);
1119 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1120 (used
*100/size
< REDIS_HT_MINFILL
));
1123 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1124 * we resize the hash table to save memory */
1125 static void tryResizeHashTables(void) {
1128 for (j
= 0; j
< server
.dbnum
; j
++) {
1129 if (htNeedsResize(server
.db
[j
].dict
)) {
1130 redisLog(REDIS_VERBOSE
,"The hash table %d is too sparse, resize it...",j
);
1131 dictResize(server
.db
[j
].dict
);
1132 redisLog(REDIS_VERBOSE
,"Hash table %d resized.",j
);
1134 if (htNeedsResize(server
.db
[j
].expires
))
1135 dictResize(server
.db
[j
].expires
);
1139 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1140 void backgroundSaveDoneHandler(int statloc
) {
1141 int exitcode
= WEXITSTATUS(statloc
);
1142 int bysignal
= WIFSIGNALED(statloc
);
1144 if (!bysignal
&& exitcode
== 0) {
1145 redisLog(REDIS_NOTICE
,
1146 "Background saving terminated with success");
1148 server
.lastsave
= time(NULL
);
1149 } else if (!bysignal
&& exitcode
!= 0) {
1150 redisLog(REDIS_WARNING
, "Background saving error");
1152 redisLog(REDIS_WARNING
,
1153 "Background saving terminated by signal");
1154 rdbRemoveTempFile(server
.bgsavechildpid
);
1156 server
.bgsavechildpid
= -1;
1157 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1158 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1159 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1162 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1164 void backgroundRewriteDoneHandler(int statloc
) {
1165 int exitcode
= WEXITSTATUS(statloc
);
1166 int bysignal
= WIFSIGNALED(statloc
);
1168 if (!bysignal
&& exitcode
== 0) {
1172 redisLog(REDIS_NOTICE
,
1173 "Background append only file rewriting terminated with success");
1174 /* Now it's time to flush the differences accumulated by the parent */
1175 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1176 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1178 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1181 /* Flush our data... */
1182 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1183 (signed) sdslen(server
.bgrewritebuf
)) {
1184 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1188 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1189 /* Now our work is to rename the temp file into the stable file. And
1190 * switch the file descriptor used by the server for append only. */
1191 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1192 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1196 /* Mission completed... almost */
1197 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1198 if (server
.appendfd
!= -1) {
1199 /* If append only is actually enabled... */
1200 close(server
.appendfd
);
1201 server
.appendfd
= fd
;
1203 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1204 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1206 /* If append only is disabled we just generate a dump in this
1207 * format. Why not? */
1210 } else if (!bysignal
&& exitcode
!= 0) {
1211 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1213 redisLog(REDIS_WARNING
,
1214 "Background append only file rewriting terminated by signal");
1217 sdsfree(server
.bgrewritebuf
);
1218 server
.bgrewritebuf
= sdsempty();
1219 aofRemoveTempFile(server
.bgrewritechildpid
);
1220 server
.bgrewritechildpid
= -1;
1223 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1224 int j
, loops
= server
.cronloops
++;
1225 REDIS_NOTUSED(eventLoop
);
1227 REDIS_NOTUSED(clientData
);
1229 /* We take a cached value of the unix time in the global state because
1230 * with virtual memory and aging there is to store the current time
1231 * in objects at every object access, and accuracy is not needed.
1232 * To access a global var is faster than calling time(NULL) */
1233 server
.unixtime
= time(NULL
);
1235 /* Show some info about non-empty databases */
1236 for (j
= 0; j
< server
.dbnum
; j
++) {
1237 long long size
, used
, vkeys
;
1239 size
= dictSlots(server
.db
[j
].dict
);
1240 used
= dictSize(server
.db
[j
].dict
);
1241 vkeys
= dictSize(server
.db
[j
].expires
);
1242 if (!(loops
% 5) && (used
|| vkeys
)) {
1243 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1244 /* dictPrintStats(server.dict); */
1248 /* We don't want to resize the hash tables while a bacground saving
1249 * is in progress: the saving child is created using fork() that is
1250 * implemented with a copy-on-write semantic in most modern systems, so
1251 * if we resize the HT while there is the saving child at work actually
1252 * a lot of memory movements in the parent will cause a lot of pages
1254 if (server
.bgsavechildpid
== -1) tryResizeHashTables();
1256 /* Show information about connected clients */
1258 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use, %d shared objects",
1259 listLength(server
.clients
)-listLength(server
.slaves
),
1260 listLength(server
.slaves
),
1261 zmalloc_used_memory(),
1262 dictSize(server
.sharingpool
));
1265 /* Close connections of timedout clients */
1266 if ((server
.maxidletime
&& !(loops
% 10)) || server
.blpop_blocked_clients
)
1267 closeTimedoutClients();
1269 /* Check if a background saving or AOF rewrite in progress terminated */
1270 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1274 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1275 if (pid
== server
.bgsavechildpid
) {
1276 backgroundSaveDoneHandler(statloc
);
1278 backgroundRewriteDoneHandler(statloc
);
1282 /* If there is not a background saving in progress check if
1283 * we have to save now */
1284 time_t now
= time(NULL
);
1285 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1286 struct saveparam
*sp
= server
.saveparams
+j
;
1288 if (server
.dirty
>= sp
->changes
&&
1289 now
-server
.lastsave
> sp
->seconds
) {
1290 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1291 sp
->changes
, sp
->seconds
);
1292 rdbSaveBackground(server
.dbfilename
);
1298 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1299 * will use few CPU cycles if there are few expiring keys, otherwise
1300 * it will get more aggressive to avoid that too much memory is used by
1301 * keys that can be removed from the keyspace. */
1302 for (j
= 0; j
< server
.dbnum
; j
++) {
1304 redisDb
*db
= server
.db
+j
;
1306 /* Continue to expire if at the end of the cycle more than 25%
1307 * of the keys were expired. */
1309 long num
= dictSize(db
->expires
);
1310 time_t now
= time(NULL
);
1313 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1314 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1319 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1320 t
= (time_t) dictGetEntryVal(de
);
1322 deleteKey(db
,dictGetEntryKey(de
));
1326 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1329 /* Swap a few keys on disk if we are over the memory limit and VM
1330 * is enbled. Try to free objects from the free list first. */
1331 if (vmCanSwapOut()) {
1332 while (server
.vm_enabled
&& zmalloc_used_memory() >
1333 server
.vm_max_memory
)
1337 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1338 retval
= (server
.vm_max_threads
== 0) ?
1339 vmSwapOneObjectBlocking() :
1340 vmSwapOneObjectThreaded();
1341 if (retval
== REDIS_ERR
&& (loops
% 30) == 0 &&
1342 zmalloc_used_memory() >
1343 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1345 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1347 /* Note that when using threade I/O we free just one object,
1348 * because anyway when the I/O thread in charge to swap this
1349 * object out will finish, the handler of completed jobs
1350 * will try to swap more objects if we are still out of memory. */
1351 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1355 /* Check if we should connect to a MASTER */
1356 if (server
.replstate
== REDIS_REPL_CONNECT
) {
1357 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1358 if (syncWithMaster() == REDIS_OK
) {
1359 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1365 /* This function gets called every time Redis is entering the
1366 * main loop of the event driven library, that is, before to sleep
1367 * for ready file descriptors. */
1368 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1369 REDIS_NOTUSED(eventLoop
);
1371 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1375 listRewind(server
.io_ready_clients
,&li
);
1376 while((ln
= listNext(&li
))) {
1377 redisClient
*c
= ln
->value
;
1378 struct redisCommand
*cmd
;
1380 /* Resume the client. */
1381 listDelNode(server
.io_ready_clients
,ln
);
1382 c
->flags
&= (~REDIS_IO_WAIT
);
1383 server
.vm_blocked_clients
--;
1384 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1385 readQueryFromClient
, c
);
1386 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1387 assert(cmd
!= NULL
);
1390 /* There may be more data to process in the input buffer. */
1391 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1392 processInputBuffer(c
);
1397 static void createSharedObjects(void) {
1398 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1399 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1400 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1401 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1402 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1403 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1404 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1405 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1406 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1407 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1408 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1409 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1410 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1411 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1412 "-ERR no such key\r\n"));
1413 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1414 "-ERR syntax error\r\n"));
1415 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1416 "-ERR source and destination objects are the same\r\n"));
1417 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1418 "-ERR index out of range\r\n"));
1419 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1420 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1421 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1422 shared
.select0
= createStringObject("select 0\r\n",10);
1423 shared
.select1
= createStringObject("select 1\r\n",10);
1424 shared
.select2
= createStringObject("select 2\r\n",10);
1425 shared
.select3
= createStringObject("select 3\r\n",10);
1426 shared
.select4
= createStringObject("select 4\r\n",10);
1427 shared
.select5
= createStringObject("select 5\r\n",10);
1428 shared
.select6
= createStringObject("select 6\r\n",10);
1429 shared
.select7
= createStringObject("select 7\r\n",10);
1430 shared
.select8
= createStringObject("select 8\r\n",10);
1431 shared
.select9
= createStringObject("select 9\r\n",10);
1434 static void appendServerSaveParams(time_t seconds
, int changes
) {
1435 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1436 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1437 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1438 server
.saveparamslen
++;
1441 static void resetServerSaveParams() {
1442 zfree(server
.saveparams
);
1443 server
.saveparams
= NULL
;
1444 server
.saveparamslen
= 0;
1447 static void initServerConfig() {
1448 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1449 server
.port
= REDIS_SERVERPORT
;
1450 server
.verbosity
= REDIS_VERBOSE
;
1451 server
.maxidletime
= REDIS_MAXIDLETIME
;
1452 server
.saveparams
= NULL
;
1453 server
.logfile
= NULL
; /* NULL = log on standard output */
1454 server
.bindaddr
= NULL
;
1455 server
.glueoutputbuf
= 1;
1456 server
.daemonize
= 0;
1457 server
.appendonly
= 0;
1458 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1459 server
.lastfsync
= time(NULL
);
1460 server
.appendfd
= -1;
1461 server
.appendseldb
= -1; /* Make sure the first time will not match */
1462 server
.pidfile
= "/var/run/redis.pid";
1463 server
.dbfilename
= "dump.rdb";
1464 server
.appendfilename
= "appendonly.aof";
1465 server
.requirepass
= NULL
;
1466 server
.shareobjects
= 0;
1467 server
.rdbcompression
= 1;
1468 server
.sharingpoolsize
= 1024;
1469 server
.maxclients
= 0;
1470 server
.blpop_blocked_clients
= 0;
1471 server
.maxmemory
= 0;
1472 server
.vm_enabled
= 0;
1473 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1474 server
.vm_page_size
= 256; /* 256 bytes per page */
1475 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1476 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1477 server
.vm_max_threads
= 4;
1478 server
.vm_blocked_clients
= 0;
1479 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1480 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1482 resetServerSaveParams();
1484 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1485 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1486 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1487 /* Replication related */
1489 server
.masterauth
= NULL
;
1490 server
.masterhost
= NULL
;
1491 server
.masterport
= 6379;
1492 server
.master
= NULL
;
1493 server
.replstate
= REDIS_REPL_NONE
;
1495 /* Double constants initialization */
1497 R_PosInf
= 1.0/R_Zero
;
1498 R_NegInf
= -1.0/R_Zero
;
1499 R_Nan
= R_Zero
/R_Zero
;
1502 static void initServer() {
1505 signal(SIGHUP
, SIG_IGN
);
1506 signal(SIGPIPE
, SIG_IGN
);
1507 setupSigSegvAction();
1509 server
.devnull
= fopen("/dev/null","w");
1510 if (server
.devnull
== NULL
) {
1511 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1514 server
.clients
= listCreate();
1515 server
.slaves
= listCreate();
1516 server
.monitors
= listCreate();
1517 server
.objfreelist
= listCreate();
1518 createSharedObjects();
1519 server
.el
= aeCreateEventLoop();
1520 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1521 server
.sharingpool
= dictCreate(&setDictType
,NULL
);
1522 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1523 if (server
.fd
== -1) {
1524 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1527 for (j
= 0; j
< server
.dbnum
; j
++) {
1528 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1529 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1530 server
.db
[j
].blockingkeys
= dictCreate(&keylistDictType
,NULL
);
1531 if (server
.vm_enabled
)
1532 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1533 server
.db
[j
].id
= j
;
1535 server
.cronloops
= 0;
1536 server
.bgsavechildpid
= -1;
1537 server
.bgrewritechildpid
= -1;
1538 server
.bgrewritebuf
= sdsempty();
1539 server
.lastsave
= time(NULL
);
1541 server
.stat_numcommands
= 0;
1542 server
.stat_numconnections
= 0;
1543 server
.stat_starttime
= time(NULL
);
1544 server
.unixtime
= time(NULL
);
1545 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1546 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1547 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1549 if (server
.appendonly
) {
1550 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1551 if (server
.appendfd
== -1) {
1552 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1558 if (server
.vm_enabled
) vmInit();
1561 /* Empty the whole database */
1562 static long long emptyDb() {
1564 long long removed
= 0;
1566 for (j
= 0; j
< server
.dbnum
; j
++) {
1567 removed
+= dictSize(server
.db
[j
].dict
);
1568 dictEmpty(server
.db
[j
].dict
);
1569 dictEmpty(server
.db
[j
].expires
);
1574 static int yesnotoi(char *s
) {
1575 if (!strcasecmp(s
,"yes")) return 1;
1576 else if (!strcasecmp(s
,"no")) return 0;
1580 /* I agree, this is a very rudimental way to load a configuration...
1581 will improve later if the config gets more complex */
1582 static void loadServerConfig(char *filename
) {
1584 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1588 if (filename
[0] == '-' && filename
[1] == '\0')
1591 if ((fp
= fopen(filename
,"r")) == NULL
) {
1592 redisLog(REDIS_WARNING
,"Fatal error, can't open config file");
1597 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1603 line
= sdstrim(line
," \t\r\n");
1605 /* Skip comments and blank lines*/
1606 if (line
[0] == '#' || line
[0] == '\0') {
1611 /* Split into arguments */
1612 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1613 sdstolower(argv
[0]);
1615 /* Execute config directives */
1616 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1617 server
.maxidletime
= atoi(argv
[1]);
1618 if (server
.maxidletime
< 0) {
1619 err
= "Invalid timeout value"; goto loaderr
;
1621 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1622 server
.port
= atoi(argv
[1]);
1623 if (server
.port
< 1 || server
.port
> 65535) {
1624 err
= "Invalid port"; goto loaderr
;
1626 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1627 server
.bindaddr
= zstrdup(argv
[1]);
1628 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1629 int seconds
= atoi(argv
[1]);
1630 int changes
= atoi(argv
[2]);
1631 if (seconds
< 1 || changes
< 0) {
1632 err
= "Invalid save parameters"; goto loaderr
;
1634 appendServerSaveParams(seconds
,changes
);
1635 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1636 if (chdir(argv
[1]) == -1) {
1637 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1638 argv
[1], strerror(errno
));
1641 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1642 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1643 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1644 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1645 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1647 err
= "Invalid log level. Must be one of debug, notice, warning";
1650 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1653 server
.logfile
= zstrdup(argv
[1]);
1654 if (!strcasecmp(server
.logfile
,"stdout")) {
1655 zfree(server
.logfile
);
1656 server
.logfile
= NULL
;
1658 if (server
.logfile
) {
1659 /* Test if we are able to open the file. The server will not
1660 * be able to abort just for this problem later... */
1661 logfp
= fopen(server
.logfile
,"a");
1662 if (logfp
== NULL
) {
1663 err
= sdscatprintf(sdsempty(),
1664 "Can't open the log file: %s", strerror(errno
));
1669 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1670 server
.dbnum
= atoi(argv
[1]);
1671 if (server
.dbnum
< 1) {
1672 err
= "Invalid number of databases"; goto loaderr
;
1674 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1675 server
.maxclients
= atoi(argv
[1]);
1676 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1677 server
.maxmemory
= strtoll(argv
[1], NULL
, 10);
1678 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1679 server
.masterhost
= sdsnew(argv
[1]);
1680 server
.masterport
= atoi(argv
[2]);
1681 server
.replstate
= REDIS_REPL_CONNECT
;
1682 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1683 server
.masterauth
= zstrdup(argv
[1]);
1684 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1685 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1686 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1688 } else if (!strcasecmp(argv
[0],"shareobjects") && argc
== 2) {
1689 if ((server
.shareobjects
= yesnotoi(argv
[1])) == -1) {
1690 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1692 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1693 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1694 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1696 } else if (!strcasecmp(argv
[0],"shareobjectspoolsize") && argc
== 2) {
1697 server
.sharingpoolsize
= atoi(argv
[1]);
1698 if (server
.sharingpoolsize
< 1) {
1699 err
= "invalid object sharing pool size"; goto loaderr
;
1701 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1702 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1703 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1705 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1706 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1707 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1709 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
1710 if (!strcasecmp(argv
[1],"no")) {
1711 server
.appendfsync
= APPENDFSYNC_NO
;
1712 } else if (!strcasecmp(argv
[1],"always")) {
1713 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1714 } else if (!strcasecmp(argv
[1],"everysec")) {
1715 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1717 err
= "argument must be 'no', 'always' or 'everysec'";
1720 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
1721 server
.requirepass
= zstrdup(argv
[1]);
1722 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
1723 server
.pidfile
= zstrdup(argv
[1]);
1724 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
1725 server
.dbfilename
= zstrdup(argv
[1]);
1726 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
1727 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
1728 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1730 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
1731 zfree(server
.vm_swap_file
);
1732 server
.vm_swap_file
= zstrdup(argv
[1]);
1733 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
1734 server
.vm_max_memory
= strtoll(argv
[1], NULL
, 10);
1735 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
1736 server
.vm_page_size
= strtoll(argv
[1], NULL
, 10);
1737 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
1738 server
.vm_pages
= strtoll(argv
[1], NULL
, 10);
1739 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1740 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1741 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
1742 server
.hash_max_zipmap_entries
= strtol(argv
[1], NULL
, 10);
1743 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
1744 server
.hash_max_zipmap_value
= strtol(argv
[1], NULL
, 10);
1745 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1746 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1748 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
1750 for (j
= 0; j
< argc
; j
++)
1755 if (fp
!= stdin
) fclose(fp
);
1759 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
1760 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
1761 fprintf(stderr
, ">>> '%s'\n", line
);
1762 fprintf(stderr
, "%s\n", err
);
1766 static void freeClientArgv(redisClient
*c
) {
1769 for (j
= 0; j
< c
->argc
; j
++)
1770 decrRefCount(c
->argv
[j
]);
1771 for (j
= 0; j
< c
->mbargc
; j
++)
1772 decrRefCount(c
->mbargv
[j
]);
1777 static void freeClient(redisClient
*c
) {
1780 /* Note that if the client we are freeing is blocked into a blocking
1781 * call, we have to set querybuf to NULL *before* to call
1782 * unblockClientWaitingData() to avoid processInputBuffer() will get
1783 * called. Also it is important to remove the file events after
1784 * this, because this call adds the READABLE event. */
1785 sdsfree(c
->querybuf
);
1787 if (c
->flags
& REDIS_BLOCKED
)
1788 unblockClientWaitingData(c
);
1790 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
1791 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
1792 listRelease(c
->reply
);
1795 /* Remove from the list of clients */
1796 ln
= listSearchKey(server
.clients
,c
);
1797 redisAssert(ln
!= NULL
);
1798 listDelNode(server
.clients
,ln
);
1799 /* Remove from the list of clients waiting for swapped keys */
1800 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
1801 ln
= listSearchKey(server
.io_ready_clients
,c
);
1803 listDelNode(server
.io_ready_clients
,ln
);
1804 server
.vm_blocked_clients
--;
1807 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
1808 ln
= listFirst(c
->io_keys
);
1809 dontWaitForSwappedKey(c
,ln
->value
);
1811 listRelease(c
->io_keys
);
1813 if (c
->flags
& REDIS_SLAVE
) {
1814 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
1816 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
1817 ln
= listSearchKey(l
,c
);
1818 redisAssert(ln
!= NULL
);
1821 if (c
->flags
& REDIS_MASTER
) {
1822 server
.master
= NULL
;
1823 server
.replstate
= REDIS_REPL_CONNECT
;
1827 freeClientMultiState(c
);
1831 #define GLUEREPLY_UP_TO (1024)
1832 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
1834 char buf
[GLUEREPLY_UP_TO
];
1839 listRewind(c
->reply
,&li
);
1840 while((ln
= listNext(&li
))) {
1844 objlen
= sdslen(o
->ptr
);
1845 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
1846 memcpy(buf
+copylen
,o
->ptr
,objlen
);
1848 listDelNode(c
->reply
,ln
);
1850 if (copylen
== 0) return;
1854 /* Now the output buffer is empty, add the new single element */
1855 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
1856 listAddNodeHead(c
->reply
,o
);
1859 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
1860 redisClient
*c
= privdata
;
1861 int nwritten
= 0, totwritten
= 0, objlen
;
1864 REDIS_NOTUSED(mask
);
1866 /* Use writev() if we have enough buffers to send */
1867 if (!server
.glueoutputbuf
&&
1868 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
1869 !(c
->flags
& REDIS_MASTER
))
1871 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
1875 while(listLength(c
->reply
)) {
1876 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
1877 glueReplyBuffersIfNeeded(c
);
1879 o
= listNodeValue(listFirst(c
->reply
));
1880 objlen
= sdslen(o
->ptr
);
1883 listDelNode(c
->reply
,listFirst(c
->reply
));
1887 if (c
->flags
& REDIS_MASTER
) {
1888 /* Don't reply to a master */
1889 nwritten
= objlen
- c
->sentlen
;
1891 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
1892 if (nwritten
<= 0) break;
1894 c
->sentlen
+= nwritten
;
1895 totwritten
+= nwritten
;
1896 /* If we fully sent the object on head go to the next one */
1897 if (c
->sentlen
== objlen
) {
1898 listDelNode(c
->reply
,listFirst(c
->reply
));
1901 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
1902 * bytes, in a single threaded server it's a good idea to serve
1903 * other clients as well, even if a very large request comes from
1904 * super fast link that is always able to accept data (in real world
1905 * scenario think about 'KEYS *' against the loopback interfae) */
1906 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
1908 if (nwritten
== -1) {
1909 if (errno
== EAGAIN
) {
1912 redisLog(REDIS_VERBOSE
,
1913 "Error writing to client: %s", strerror(errno
));
1918 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
1919 if (listLength(c
->reply
) == 0) {
1921 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
1925 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
1927 redisClient
*c
= privdata
;
1928 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
1930 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
1931 int offset
, ion
= 0;
1933 REDIS_NOTUSED(mask
);
1936 while (listLength(c
->reply
)) {
1937 offset
= c
->sentlen
;
1941 /* fill-in the iov[] array */
1942 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
1943 o
= listNodeValue(node
);
1944 objlen
= sdslen(o
->ptr
);
1946 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
1949 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
1950 break; /* no more iovecs */
1952 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
1953 iov
[ion
].iov_len
= objlen
- offset
;
1954 willwrite
+= objlen
- offset
;
1955 offset
= 0; /* just for the first item */
1962 /* write all collected blocks at once */
1963 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
1964 if (errno
!= EAGAIN
) {
1965 redisLog(REDIS_VERBOSE
,
1966 "Error writing to client: %s", strerror(errno
));
1973 totwritten
+= nwritten
;
1974 offset
= c
->sentlen
;
1976 /* remove written robjs from c->reply */
1977 while (nwritten
&& listLength(c
->reply
)) {
1978 o
= listNodeValue(listFirst(c
->reply
));
1979 objlen
= sdslen(o
->ptr
);
1981 if(nwritten
>= objlen
- offset
) {
1982 listDelNode(c
->reply
, listFirst(c
->reply
));
1983 nwritten
-= objlen
- offset
;
1987 c
->sentlen
+= nwritten
;
1995 c
->lastinteraction
= time(NULL
);
1997 if (listLength(c
->reply
) == 0) {
1999 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2003 static struct redisCommand
*lookupCommand(char *name
) {
2005 while(cmdTable
[j
].name
!= NULL
) {
2006 if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
];
2012 /* resetClient prepare the client to process the next command */
2013 static void resetClient(redisClient
*c
) {
2019 /* Call() is the core of Redis execution of a command */
2020 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2023 dirty
= server
.dirty
;
2025 if (server
.appendonly
&& server
.dirty
-dirty
)
2026 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2027 if (server
.dirty
-dirty
&& listLength(server
.slaves
))
2028 replicationFeedSlaves(server
.slaves
,cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2029 if (listLength(server
.monitors
))
2030 replicationFeedSlaves(server
.monitors
,cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2031 server
.stat_numcommands
++;
2034 /* If this function gets called we already read a whole
2035 * command, argments are in the client argv/argc fields.
2036 * processCommand() execute the command or prepare the
2037 * server for a bulk read from the client.
2039 * If 1 is returned the client is still alive and valid and
2040 * and other operations can be performed by the caller. Otherwise
2041 * if 0 is returned the client was destroied (i.e. after QUIT). */
2042 static int processCommand(redisClient
*c
) {
2043 struct redisCommand
*cmd
;
2045 /* Free some memory if needed (maxmemory setting) */
2046 if (server
.maxmemory
) freeMemoryIfNeeded();
2048 /* Handle the multi bulk command type. This is an alternative protocol
2049 * supported by Redis in order to receive commands that are composed of
2050 * multiple binary-safe "bulk" arguments. The latency of processing is
2051 * a bit higher but this allows things like multi-sets, so if this
2052 * protocol is used only for MSET and similar commands this is a big win. */
2053 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2054 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2055 if (c
->multibulk
<= 0) {
2059 decrRefCount(c
->argv
[c
->argc
-1]);
2063 } else if (c
->multibulk
) {
2064 if (c
->bulklen
== -1) {
2065 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2066 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2070 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2071 decrRefCount(c
->argv
[0]);
2072 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2074 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2079 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2083 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2084 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2088 if (c
->multibulk
== 0) {
2092 /* Here we need to swap the multi-bulk argc/argv with the
2093 * normal argc/argv of the client structure. */
2095 c
->argv
= c
->mbargv
;
2096 c
->mbargv
= auxargv
;
2099 c
->argc
= c
->mbargc
;
2100 c
->mbargc
= auxargc
;
2102 /* We need to set bulklen to something different than -1
2103 * in order for the code below to process the command without
2104 * to try to read the last argument of a bulk command as
2105 * a special argument. */
2107 /* continue below and process the command */
2114 /* -- end of multi bulk commands processing -- */
2116 /* The QUIT command is handled as a special case. Normal command
2117 * procs are unable to close the client connection safely */
2118 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2123 /* Now lookup the command and check ASAP about trivial error conditions
2124 * such wrong arity, bad command name and so forth. */
2125 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2128 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2129 (char*)c
->argv
[0]->ptr
));
2132 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2133 (c
->argc
< -cmd
->arity
)) {
2135 sdscatprintf(sdsempty(),
2136 "-ERR wrong number of arguments for '%s' command\r\n",
2140 } else if (server
.maxmemory
&& cmd
->flags
& REDIS_CMD_DENYOOM
&& zmalloc_used_memory() > server
.maxmemory
) {
2141 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2144 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2145 /* This is a bulk command, we have to read the last argument yet. */
2146 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2148 decrRefCount(c
->argv
[c
->argc
-1]);
2149 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2151 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2156 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2157 /* It is possible that the bulk read is already in the
2158 * buffer. Check this condition and handle it accordingly.
2159 * This is just a fast path, alternative to call processInputBuffer().
2160 * It's a good idea since the code is small and this condition
2161 * happens most of the times. */
2162 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2163 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2165 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2167 /* Otherwise return... there is to read the last argument
2168 * from the socket. */
2172 /* Let's try to share objects on the command arguments vector */
2173 if (server
.shareobjects
) {
2175 for(j
= 1; j
< c
->argc
; j
++)
2176 c
->argv
[j
] = tryObjectSharing(c
->argv
[j
]);
2178 /* Let's try to encode the bulk object to save space. */
2179 if (cmd
->flags
& REDIS_CMD_BULK
)
2180 tryObjectEncoding(c
->argv
[c
->argc
-1]);
2182 /* Check if the user is authenticated */
2183 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2184 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2189 /* Exec the command */
2190 if (c
->flags
& REDIS_MULTI
&& cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
) {
2191 queueMultiCommand(c
,cmd
);
2192 addReply(c
,shared
.queued
);
2194 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2195 blockClientOnSwappedKeys(cmd
,c
)) return 1;
2199 /* Prepare the client for the next command */
2204 static void replicationFeedSlaves(list
*slaves
, struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
2209 /* (args*2)+1 is enough room for args, spaces, newlines */
2210 robj
*static_outv
[REDIS_STATIC_ARGS
*2+1];
2212 if (argc
<= REDIS_STATIC_ARGS
) {
2215 outv
= zmalloc(sizeof(robj
*)*(argc
*2+1));
2218 for (j
= 0; j
< argc
; j
++) {
2219 if (j
!= 0) outv
[outc
++] = shared
.space
;
2220 if ((cmd
->flags
& REDIS_CMD_BULK
) && j
== argc
-1) {
2223 lenobj
= createObject(REDIS_STRING
,
2224 sdscatprintf(sdsempty(),"%lu\r\n",
2225 (unsigned long) stringObjectLen(argv
[j
])));
2226 lenobj
->refcount
= 0;
2227 outv
[outc
++] = lenobj
;
2229 outv
[outc
++] = argv
[j
];
2231 outv
[outc
++] = shared
.crlf
;
2233 /* Increment all the refcounts at start and decrement at end in order to
2234 * be sure to free objects if there is no slave in a replication state
2235 * able to be feed with commands */
2236 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2237 listRewind(slaves
,&li
);
2238 while((ln
= listNext(&li
))) {
2239 redisClient
*slave
= ln
->value
;
2241 /* Don't feed slaves that are still waiting for BGSAVE to start */
2242 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2244 /* Feed all the other slaves, MONITORs and so on */
2245 if (slave
->slaveseldb
!= dictid
) {
2249 case 0: selectcmd
= shared
.select0
; break;
2250 case 1: selectcmd
= shared
.select1
; break;
2251 case 2: selectcmd
= shared
.select2
; break;
2252 case 3: selectcmd
= shared
.select3
; break;
2253 case 4: selectcmd
= shared
.select4
; break;
2254 case 5: selectcmd
= shared
.select5
; break;
2255 case 6: selectcmd
= shared
.select6
; break;
2256 case 7: selectcmd
= shared
.select7
; break;
2257 case 8: selectcmd
= shared
.select8
; break;
2258 case 9: selectcmd
= shared
.select9
; break;
2260 selectcmd
= createObject(REDIS_STRING
,
2261 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2262 selectcmd
->refcount
= 0;
2265 addReply(slave
,selectcmd
);
2266 slave
->slaveseldb
= dictid
;
2268 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2270 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2271 if (outv
!= static_outv
) zfree(outv
);
2274 static void processInputBuffer(redisClient
*c
) {
2276 /* Before to process the input buffer, make sure the client is not
2277 * waitig for a blocking operation such as BLPOP. Note that the first
2278 * iteration the client is never blocked, otherwise the processInputBuffer
2279 * would not be called at all, but after the execution of the first commands
2280 * in the input buffer the client may be blocked, and the "goto again"
2281 * will try to reiterate. The following line will make it return asap. */
2282 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2283 if (c
->bulklen
== -1) {
2284 /* Read the first line of the query */
2285 char *p
= strchr(c
->querybuf
,'\n');
2292 query
= c
->querybuf
;
2293 c
->querybuf
= sdsempty();
2294 querylen
= 1+(p
-(query
));
2295 if (sdslen(query
) > querylen
) {
2296 /* leave data after the first line of the query in the buffer */
2297 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2299 *p
= '\0'; /* remove "\n" */
2300 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2301 sdsupdatelen(query
);
2303 /* Now we can split the query in arguments */
2304 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2307 if (c
->argv
) zfree(c
->argv
);
2308 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2310 for (j
= 0; j
< argc
; j
++) {
2311 if (sdslen(argv
[j
])) {
2312 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2320 /* Execute the command. If the client is still valid
2321 * after processCommand() return and there is something
2322 * on the query buffer try to process the next command. */
2323 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2325 /* Nothing to process, argc == 0. Just process the query
2326 * buffer if it's not empty or return to the caller */
2327 if (sdslen(c
->querybuf
)) goto again
;
2330 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2331 redisLog(REDIS_VERBOSE
, "Client protocol error");
2336 /* Bulk read handling. Note that if we are at this point
2337 the client already sent a command terminated with a newline,
2338 we are reading the bulk data that is actually the last
2339 argument of the command. */
2340 int qbl
= sdslen(c
->querybuf
);
2342 if (c
->bulklen
<= qbl
) {
2343 /* Copy everything but the final CRLF as final argument */
2344 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2346 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2347 /* Process the command. If the client is still valid after
2348 * the processing and there is more data in the buffer
2349 * try to parse it. */
2350 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2356 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2357 redisClient
*c
= (redisClient
*) privdata
;
2358 char buf
[REDIS_IOBUF_LEN
];
2361 REDIS_NOTUSED(mask
);
2363 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2365 if (errno
== EAGAIN
) {
2368 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2372 } else if (nread
== 0) {
2373 redisLog(REDIS_VERBOSE
, "Client closed connection");
2378 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2379 c
->lastinteraction
= time(NULL
);
2383 if (!(c
->flags
& REDIS_BLOCKED
))
2384 processInputBuffer(c
);
2387 static int selectDb(redisClient
*c
, int id
) {
2388 if (id
< 0 || id
>= server
.dbnum
)
2390 c
->db
= &server
.db
[id
];
2394 static void *dupClientReplyValue(void *o
) {
2395 incrRefCount((robj
*)o
);
2399 static redisClient
*createClient(int fd
) {
2400 redisClient
*c
= zmalloc(sizeof(*c
));
2402 anetNonBlock(NULL
,fd
);
2403 anetTcpNoDelay(NULL
,fd
);
2404 if (!c
) return NULL
;
2407 c
->querybuf
= sdsempty();
2416 c
->lastinteraction
= time(NULL
);
2417 c
->authenticated
= 0;
2418 c
->replstate
= REDIS_REPL_NONE
;
2419 c
->reply
= listCreate();
2420 listSetFreeMethod(c
->reply
,decrRefCount
);
2421 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2422 c
->blockingkeys
= NULL
;
2423 c
->blockingkeysnum
= 0;
2424 c
->io_keys
= listCreate();
2425 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2426 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2427 readQueryFromClient
, c
) == AE_ERR
) {
2431 listAddNodeTail(server
.clients
,c
);
2432 initClientMultiState(c
);
2436 static void addReply(redisClient
*c
, robj
*obj
) {
2437 if (listLength(c
->reply
) == 0 &&
2438 (c
->replstate
== REDIS_REPL_NONE
||
2439 c
->replstate
== REDIS_REPL_ONLINE
) &&
2440 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2441 sendReplyToClient
, c
) == AE_ERR
) return;
2443 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2444 obj
= dupStringObject(obj
);
2445 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2447 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2450 static void addReplySds(redisClient
*c
, sds s
) {
2451 robj
*o
= createObject(REDIS_STRING
,s
);
2456 static void addReplyDouble(redisClient
*c
, double d
) {
2459 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2460 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2461 (unsigned long) strlen(buf
),buf
));
2464 static void addReplyLong(redisClient
*c
, long l
) {
2468 len
= snprintf(buf
,sizeof(buf
),":%ld\r\n",l
);
2469 addReplySds(c
,sdsnewlen(buf
,len
));
2472 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2475 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2476 len
= sdslen(obj
->ptr
);
2478 long n
= (long)obj
->ptr
;
2480 /* Compute how many bytes will take this integer as a radix 10 string */
2486 while((n
= n
/10) != 0) {
2490 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",(unsigned long)len
));
2493 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2498 REDIS_NOTUSED(mask
);
2499 REDIS_NOTUSED(privdata
);
2501 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2502 if (cfd
== AE_ERR
) {
2503 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2506 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2507 if ((c
= createClient(cfd
)) == NULL
) {
2508 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2509 close(cfd
); /* May be already closed, just ingore errors */
2512 /* If maxclient directive is set and this is one client more... close the
2513 * connection. Note that we create the client instead to check before
2514 * for this condition, since now the socket is already set in nonblocking
2515 * mode and we can send an error for free using the Kernel I/O */
2516 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2517 char *err
= "-ERR max number of clients reached\r\n";
2519 /* That's a best effort error message, don't check write errors */
2520 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2521 /* Nothing to do, Just to avoid the warning... */
2526 server
.stat_numconnections
++;
2529 /* ======================= Redis objects implementation ===================== */
2531 static robj
*createObject(int type
, void *ptr
) {
2534 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2535 if (listLength(server
.objfreelist
)) {
2536 listNode
*head
= listFirst(server
.objfreelist
);
2537 o
= listNodeValue(head
);
2538 listDelNode(server
.objfreelist
,head
);
2539 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2541 if (server
.vm_enabled
) {
2542 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2543 o
= zmalloc(sizeof(*o
));
2545 o
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
));
2549 o
->encoding
= REDIS_ENCODING_RAW
;
2552 if (server
.vm_enabled
) {
2553 /* Note that this code may run in the context of an I/O thread
2554 * and accessing to server.unixtime in theory is an error
2555 * (no locks). But in practice this is safe, and even if we read
2556 * garbage Redis will not fail, as it's just a statistical info */
2557 o
->vm
.atime
= server
.unixtime
;
2558 o
->storage
= REDIS_VM_MEMORY
;
2563 static robj
*createStringObject(char *ptr
, size_t len
) {
2564 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
2567 static robj
*dupStringObject(robj
*o
) {
2568 assert(o
->encoding
== REDIS_ENCODING_RAW
);
2569 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
2572 static robj
*createListObject(void) {
2573 list
*l
= listCreate();
2575 listSetFreeMethod(l
,decrRefCount
);
2576 return createObject(REDIS_LIST
,l
);
2579 static robj
*createSetObject(void) {
2580 dict
*d
= dictCreate(&setDictType
,NULL
);
2581 return createObject(REDIS_SET
,d
);
2584 static robj
*createHashObject(void) {
2585 /* All the Hashes start as zipmaps. Will be automatically converted
2586 * into hash tables if there are enough elements or big elements
2588 unsigned char *zm
= zipmapNew();
2589 robj
*o
= createObject(REDIS_HASH
,zm
);
2590 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
2594 static robj
*createZsetObject(void) {
2595 zset
*zs
= zmalloc(sizeof(*zs
));
2597 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
2598 zs
->zsl
= zslCreate();
2599 return createObject(REDIS_ZSET
,zs
);
2602 static void freeStringObject(robj
*o
) {
2603 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2608 static void freeListObject(robj
*o
) {
2609 listRelease((list
*) o
->ptr
);
2612 static void freeSetObject(robj
*o
) {
2613 dictRelease((dict
*) o
->ptr
);
2616 static void freeZsetObject(robj
*o
) {
2619 dictRelease(zs
->dict
);
2624 static void freeHashObject(robj
*o
) {
2625 switch (o
->encoding
) {
2626 case REDIS_ENCODING_HT
:
2627 dictRelease((dict
*) o
->ptr
);
2629 case REDIS_ENCODING_ZIPMAP
:
2638 static void incrRefCount(robj
*o
) {
2639 redisAssert(!server
.vm_enabled
|| o
->storage
== REDIS_VM_MEMORY
);
2643 static void decrRefCount(void *obj
) {
2646 /* Object is a key of a swapped out value, or in the process of being
2648 if (server
.vm_enabled
&&
2649 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
2651 if (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
) {
2652 redisAssert(o
->refcount
== 1);
2654 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
);
2655 redisAssert(o
->type
== REDIS_STRING
);
2656 freeStringObject(o
);
2657 vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
);
2658 pthread_mutex_lock(&server
.obj_freelist_mutex
);
2659 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2660 !listAddNodeHead(server
.objfreelist
,o
))
2662 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2663 server
.vm_stats_swapped_objects
--;
2666 /* Object is in memory, or in the process of being swapped out. */
2667 if (--(o
->refcount
) == 0) {
2668 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
2669 vmCancelThreadedIOJob(obj
);
2671 case REDIS_STRING
: freeStringObject(o
); break;
2672 case REDIS_LIST
: freeListObject(o
); break;
2673 case REDIS_SET
: freeSetObject(o
); break;
2674 case REDIS_ZSET
: freeZsetObject(o
); break;
2675 case REDIS_HASH
: freeHashObject(o
); break;
2676 default: redisAssert(0 != 0); break;
2678 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2679 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2680 !listAddNodeHead(server
.objfreelist
,o
))
2682 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2686 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
2687 dictEntry
*de
= dictFind(db
->dict
,key
);
2689 robj
*key
= dictGetEntryKey(de
);
2690 robj
*val
= dictGetEntryVal(de
);
2692 if (server
.vm_enabled
) {
2693 if (key
->storage
== REDIS_VM_MEMORY
||
2694 key
->storage
== REDIS_VM_SWAPPING
)
2696 /* If we were swapping the object out, stop it, this key
2698 if (key
->storage
== REDIS_VM_SWAPPING
)
2699 vmCancelThreadedIOJob(key
);
2700 /* Update the access time of the key for the aging algorithm. */
2701 key
->vm
.atime
= server
.unixtime
;
2703 int notify
= (key
->storage
== REDIS_VM_LOADING
);
2705 /* Our value was swapped on disk. Bring it at home. */
2706 redisAssert(val
== NULL
);
2707 val
= vmLoadObject(key
);
2708 dictGetEntryVal(de
) = val
;
2710 /* Clients blocked by the VM subsystem may be waiting for
2712 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
2721 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
2722 expireIfNeeded(db
,key
);
2723 return lookupKey(db
,key
);
2726 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
2727 deleteIfVolatile(db
,key
);
2728 return lookupKey(db
,key
);
2731 static int deleteKey(redisDb
*db
, robj
*key
) {
2734 /* We need to protect key from destruction: after the first dictDelete()
2735 * it may happen that 'key' is no longer valid if we don't increment
2736 * it's count. This may happen when we get the object reference directly
2737 * from the hash table with dictRandomKey() or dict iterators */
2739 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
);
2740 retval
= dictDelete(db
->dict
,key
);
2743 return retval
== DICT_OK
;
2746 /* Try to share an object against the shared objects pool */
2747 static robj
*tryObjectSharing(robj
*o
) {
2748 struct dictEntry
*de
;
2751 if (o
== NULL
|| server
.shareobjects
== 0) return o
;
2753 redisAssert(o
->type
== REDIS_STRING
);
2754 de
= dictFind(server
.sharingpool
,o
);
2756 robj
*shared
= dictGetEntryKey(de
);
2758 c
= ((unsigned long) dictGetEntryVal(de
))+1;
2759 dictGetEntryVal(de
) = (void*) c
;
2760 incrRefCount(shared
);
2764 /* Here we are using a stream algorihtm: Every time an object is
2765 * shared we increment its count, everytime there is a miss we
2766 * recrement the counter of a random object. If this object reaches
2767 * zero we remove the object and put the current object instead. */
2768 if (dictSize(server
.sharingpool
) >=
2769 server
.sharingpoolsize
) {
2770 de
= dictGetRandomKey(server
.sharingpool
);
2771 redisAssert(de
!= NULL
);
2772 c
= ((unsigned long) dictGetEntryVal(de
))-1;
2773 dictGetEntryVal(de
) = (void*) c
;
2775 dictDelete(server
.sharingpool
,de
->key
);
2778 c
= 0; /* If the pool is empty we want to add this object */
2783 retval
= dictAdd(server
.sharingpool
,o
,(void*)1);
2784 redisAssert(retval
== DICT_OK
);
2791 /* Check if the nul-terminated string 's' can be represented by a long
2792 * (that is, is a number that fits into long without any other space or
2793 * character before or after the digits).
2795 * If so, the function returns REDIS_OK and *longval is set to the value
2796 * of the number. Otherwise REDIS_ERR is returned */
2797 static int isStringRepresentableAsLong(sds s
, long *longval
) {
2798 char buf
[32], *endptr
;
2802 value
= strtol(s
, &endptr
, 10);
2803 if (endptr
[0] != '\0') return REDIS_ERR
;
2804 slen
= snprintf(buf
,32,"%ld",value
);
2806 /* If the number converted back into a string is not identical
2807 * then it's not possible to encode the string as integer */
2808 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
2809 if (longval
) *longval
= value
;
2813 /* Try to encode a string object in order to save space */
2814 static int tryObjectEncoding(robj
*o
) {
2818 if (o
->encoding
!= REDIS_ENCODING_RAW
)
2819 return REDIS_ERR
; /* Already encoded */
2821 /* It's not save to encode shared objects: shared objects can be shared
2822 * everywhere in the "object space" of Redis. Encoded objects can only
2823 * appear as "values" (and not, for instance, as keys) */
2824 if (o
->refcount
> 1) return REDIS_ERR
;
2826 /* Currently we try to encode only strings */
2827 redisAssert(o
->type
== REDIS_STRING
);
2829 /* Check if we can represent this string as a long integer */
2830 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return REDIS_ERR
;
2832 /* Ok, this object can be encoded */
2833 o
->encoding
= REDIS_ENCODING_INT
;
2835 o
->ptr
= (void*) value
;
2839 /* Get a decoded version of an encoded object (returned as a new object).
2840 * If the object is already raw-encoded just increment the ref count. */
2841 static robj
*getDecodedObject(robj
*o
) {
2844 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2848 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
2851 snprintf(buf
,32,"%ld",(long)o
->ptr
);
2852 dec
= createStringObject(buf
,strlen(buf
));
2855 redisAssert(1 != 1);
2859 /* Compare two string objects via strcmp() or alike.
2860 * Note that the objects may be integer-encoded. In such a case we
2861 * use snprintf() to get a string representation of the numbers on the stack
2862 * and compare the strings, it's much faster than calling getDecodedObject().
2864 * Important note: if objects are not integer encoded, but binary-safe strings,
2865 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
2867 static int compareStringObjects(robj
*a
, robj
*b
) {
2868 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
2869 char bufa
[128], bufb
[128], *astr
, *bstr
;
2872 if (a
== b
) return 0;
2873 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
2874 snprintf(bufa
,sizeof(bufa
),"%ld",(long) a
->ptr
);
2880 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
2881 snprintf(bufb
,sizeof(bufb
),"%ld",(long) b
->ptr
);
2887 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
2890 static size_t stringObjectLen(robj
*o
) {
2891 redisAssert(o
->type
== REDIS_STRING
);
2892 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2893 return sdslen(o
->ptr
);
2897 return snprintf(buf
,32,"%ld",(long)o
->ptr
);
2901 /*============================ RDB saving/loading =========================== */
2903 static int rdbSaveType(FILE *fp
, unsigned char type
) {
2904 if (fwrite(&type
,1,1,fp
) == 0) return -1;
2908 static int rdbSaveTime(FILE *fp
, time_t t
) {
2909 int32_t t32
= (int32_t) t
;
2910 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
2914 /* check rdbLoadLen() comments for more info */
2915 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
2916 unsigned char buf
[2];
2919 /* Save a 6 bit len */
2920 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
2921 if (fwrite(buf
,1,1,fp
) == 0) return -1;
2922 } else if (len
< (1<<14)) {
2923 /* Save a 14 bit len */
2924 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
2926 if (fwrite(buf
,2,1,fp
) == 0) return -1;
2928 /* Save a 32 bit len */
2929 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
2930 if (fwrite(buf
,1,1,fp
) == 0) return -1;
2932 if (fwrite(&len
,4,1,fp
) == 0) return -1;
2937 /* String objects in the form "2391" "-100" without any space and with a
2938 * range of values that can fit in an 8, 16 or 32 bit signed value can be
2939 * encoded as integers to save space */
2940 static int rdbTryIntegerEncoding(sds s
, unsigned char *enc
) {
2942 char *endptr
, buf
[32];
2944 /* Check if it's possible to encode this value as a number */
2945 value
= strtoll(s
, &endptr
, 10);
2946 if (endptr
[0] != '\0') return 0;
2947 snprintf(buf
,32,"%lld",value
);
2949 /* If the number converted back into a string is not identical
2950 * then it's not possible to encode the string as integer */
2951 if (strlen(buf
) != sdslen(s
) || memcmp(buf
,s
,sdslen(s
))) return 0;
2953 /* Finally check if it fits in our ranges */
2954 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
2955 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
2956 enc
[1] = value
&0xFF;
2958 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
2959 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
2960 enc
[1] = value
&0xFF;
2961 enc
[2] = (value
>>8)&0xFF;
2963 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
2964 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
2965 enc
[1] = value
&0xFF;
2966 enc
[2] = (value
>>8)&0xFF;
2967 enc
[3] = (value
>>16)&0xFF;
2968 enc
[4] = (value
>>24)&0xFF;
2975 static int rdbSaveLzfStringObject(FILE *fp
, robj
*obj
) {
2976 unsigned int comprlen
, outlen
;
2980 /* We require at least four bytes compression for this to be worth it */
2981 outlen
= sdslen(obj
->ptr
)-4;
2982 if (outlen
<= 0) return 0;
2983 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
2984 comprlen
= lzf_compress(obj
->ptr
, sdslen(obj
->ptr
), out
, outlen
);
2985 if (comprlen
== 0) {
2989 /* Data compressed! Let's save it on disk */
2990 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
2991 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
2992 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
2993 if (rdbSaveLen(fp
,sdslen(obj
->ptr
)) == -1) goto writeerr
;
2994 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3003 /* Save a string objet as [len][data] on disk. If the object is a string
3004 * representation of an integer value we try to safe it in a special form */
3005 static int rdbSaveStringObjectRaw(FILE *fp
, robj
*obj
) {
3009 len
= sdslen(obj
->ptr
);
3011 /* Try integer encoding */
3013 unsigned char buf
[5];
3014 if ((enclen
= rdbTryIntegerEncoding(obj
->ptr
,buf
)) > 0) {
3015 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3020 /* Try LZF compression - under 20 bytes it's unable to compress even
3021 * aaaaaaaaaaaaaaaaaa so skip it */
3022 if (server
.rdbcompression
&& len
> 20) {
3025 retval
= rdbSaveLzfStringObject(fp
,obj
);
3026 if (retval
== -1) return -1;
3027 if (retval
> 0) return 0;
3028 /* retval == 0 means data can't be compressed, save the old way */
3031 /* Store verbatim */
3032 if (rdbSaveLen(fp
,len
) == -1) return -1;
3033 if (len
&& fwrite(obj
->ptr
,len
,1,fp
) == 0) return -1;
3037 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3038 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3041 /* Avoid incr/decr ref count business when possible.
3042 * This plays well with copy-on-write given that we are probably
3043 * in a child process (BGSAVE). Also this makes sure key objects
3044 * of swapped objects are not incRefCount-ed (an assert does not allow
3045 * this in order to avoid bugs) */
3046 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
3047 obj
= getDecodedObject(obj
);
3048 retval
= rdbSaveStringObjectRaw(fp
,obj
);
3051 retval
= rdbSaveStringObjectRaw(fp
,obj
);
3056 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3057 * 8 bit integer specifing the length of the representation.
3058 * This 8 bit integer has special values in order to specify the following
3064 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3065 unsigned char buf
[128];
3071 } else if (!isfinite(val
)) {
3073 buf
[0] = (val
< 0) ? 255 : 254;
3075 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3076 buf
[0] = strlen((char*)buf
+1);
3079 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3083 /* Save a Redis object. */
3084 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3085 if (o
->type
== REDIS_STRING
) {
3086 /* Save a string value */
3087 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3088 } else if (o
->type
== REDIS_LIST
) {
3089 /* Save a list value */
3090 list
*list
= o
->ptr
;
3094 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3095 listRewind(list
,&li
);
3096 while((ln
= listNext(&li
))) {
3097 robj
*eleobj
= listNodeValue(ln
);
3099 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3101 } else if (o
->type
== REDIS_SET
) {
3102 /* Save a set value */
3104 dictIterator
*di
= dictGetIterator(set
);
3107 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3108 while((de
= dictNext(di
)) != NULL
) {
3109 robj
*eleobj
= dictGetEntryKey(de
);
3111 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3113 dictReleaseIterator(di
);
3114 } else if (o
->type
== REDIS_ZSET
) {
3115 /* Save a set value */
3117 dictIterator
*di
= dictGetIterator(zs
->dict
);
3120 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3121 while((de
= dictNext(di
)) != NULL
) {
3122 robj
*eleobj
= dictGetEntryKey(de
);
3123 double *score
= dictGetEntryVal(de
);
3125 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3126 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3128 dictReleaseIterator(di
);
3130 redisAssert(0 != 0);
3135 /* Return the length the object will have on disk if saved with
3136 * the rdbSaveObject() function. Currently we use a trick to get
3137 * this length with very little changes to the code. In the future
3138 * we could switch to a faster solution. */
3139 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3140 if (fp
== NULL
) fp
= server
.devnull
;
3142 assert(rdbSaveObject(fp
,o
) != 1);
3146 /* Return the number of pages required to save this object in the swap file */
3147 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3148 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3150 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3153 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3154 static int rdbSave(char *filename
) {
3155 dictIterator
*di
= NULL
;
3160 time_t now
= time(NULL
);
3162 /* Wait for I/O therads to terminate, just in case this is a
3163 * foreground-saving, to avoid seeking the swap file descriptor at the
3165 if (server
.vm_enabled
)
3166 waitEmptyIOJobsQueue();
3168 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3169 fp
= fopen(tmpfile
,"w");
3171 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3174 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3175 for (j
= 0; j
< server
.dbnum
; j
++) {
3176 redisDb
*db
= server
.db
+j
;
3178 if (dictSize(d
) == 0) continue;
3179 di
= dictGetIterator(d
);
3185 /* Write the SELECT DB opcode */
3186 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3187 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3189 /* Iterate this DB writing every entry */
3190 while((de
= dictNext(di
)) != NULL
) {
3191 robj
*key
= dictGetEntryKey(de
);
3192 robj
*o
= dictGetEntryVal(de
);
3193 time_t expiretime
= getExpire(db
,key
);
3195 /* Save the expire time */
3196 if (expiretime
!= -1) {
3197 /* If this key is already expired skip it */
3198 if (expiretime
< now
) continue;
3199 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3200 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3202 /* Save the key and associated value. This requires special
3203 * handling if the value is swapped out. */
3204 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
3205 key
->storage
== REDIS_VM_SWAPPING
) {
3206 /* Save type, key, value */
3207 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3208 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3209 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3211 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3213 /* Get a preview of the object in memory */
3214 po
= vmPreviewObject(key
);
3215 /* Save type, key, value */
3216 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
;
3217 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3218 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3219 /* Remove the loaded object from memory */
3223 dictReleaseIterator(di
);
3226 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3228 /* Make sure data will not remain on the OS's output buffers */
3233 /* Use RENAME to make sure the DB file is changed atomically only
3234 * if the generate DB file is ok. */
3235 if (rename(tmpfile
,filename
) == -1) {
3236 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3240 redisLog(REDIS_NOTICE
,"DB saved on disk");
3242 server
.lastsave
= time(NULL
);
3248 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3249 if (di
) dictReleaseIterator(di
);
3253 static int rdbSaveBackground(char *filename
) {
3256 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3257 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3258 if ((childpid
= fork()) == 0) {
3260 if (server
.vm_enabled
) vmReopenSwapFile();
3262 if (rdbSave(filename
) == REDIS_OK
) {
3269 if (childpid
== -1) {
3270 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3274 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3275 server
.bgsavechildpid
= childpid
;
3278 return REDIS_OK
; /* unreached */
3281 static void rdbRemoveTempFile(pid_t childpid
) {
3284 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
3288 static int rdbLoadType(FILE *fp
) {
3290 if (fread(&type
,1,1,fp
) == 0) return -1;
3294 static time_t rdbLoadTime(FILE *fp
) {
3296 if (fread(&t32
,4,1,fp
) == 0) return -1;
3297 return (time_t) t32
;
3300 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3301 * of this file for a description of how this are stored on disk.
3303 * isencoded is set to 1 if the readed length is not actually a length but
3304 * an "encoding type", check the above comments for more info */
3305 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
3306 unsigned char buf
[2];
3310 if (isencoded
) *isencoded
= 0;
3311 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3312 type
= (buf
[0]&0xC0)>>6;
3313 if (type
== REDIS_RDB_6BITLEN
) {
3314 /* Read a 6 bit len */
3316 } else if (type
== REDIS_RDB_ENCVAL
) {
3317 /* Read a 6 bit len encoding type */
3318 if (isencoded
) *isencoded
= 1;
3320 } else if (type
== REDIS_RDB_14BITLEN
) {
3321 /* Read a 14 bit len */
3322 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3323 return ((buf
[0]&0x3F)<<8)|buf
[1];
3325 /* Read a 32 bit len */
3326 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
3331 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
) {
3332 unsigned char enc
[4];
3335 if (enctype
== REDIS_RDB_ENC_INT8
) {
3336 if (fread(enc
,1,1,fp
) == 0) return NULL
;
3337 val
= (signed char)enc
[0];
3338 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
3340 if (fread(enc
,2,1,fp
) == 0) return NULL
;
3341 v
= enc
[0]|(enc
[1]<<8);
3343 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
3345 if (fread(enc
,4,1,fp
) == 0) return NULL
;
3346 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
3349 val
= 0; /* anti-warning */
3352 return createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",val
));
3355 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
3356 unsigned int len
, clen
;
3357 unsigned char *c
= NULL
;
3360 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3361 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3362 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
3363 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
3364 if (fread(c
,clen
,1,fp
) == 0) goto err
;
3365 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
3367 return createObject(REDIS_STRING
,val
);
3374 static robj
*rdbLoadStringObject(FILE*fp
) {
3379 len
= rdbLoadLen(fp
,&isencoded
);
3382 case REDIS_RDB_ENC_INT8
:
3383 case REDIS_RDB_ENC_INT16
:
3384 case REDIS_RDB_ENC_INT32
:
3385 return tryObjectSharing(rdbLoadIntegerObject(fp
,len
));
3386 case REDIS_RDB_ENC_LZF
:
3387 return tryObjectSharing(rdbLoadLzfStringObject(fp
));
3393 if (len
== REDIS_RDB_LENERR
) return NULL
;
3394 val
= sdsnewlen(NULL
,len
);
3395 if (len
&& fread(val
,len
,1,fp
) == 0) {
3399 return tryObjectSharing(createObject(REDIS_STRING
,val
));
3402 /* For information about double serialization check rdbSaveDoubleValue() */
3403 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
3407 if (fread(&len
,1,1,fp
) == 0) return -1;
3409 case 255: *val
= R_NegInf
; return 0;
3410 case 254: *val
= R_PosInf
; return 0;
3411 case 253: *val
= R_Nan
; return 0;
3413 if (fread(buf
,len
,1,fp
) == 0) return -1;
3415 sscanf(buf
, "%lg", val
);
3420 /* Load a Redis object of the specified type from the specified file.
3421 * On success a newly allocated object is returned, otherwise NULL. */
3422 static robj
*rdbLoadObject(int type
, FILE *fp
) {
3425 if (type
== REDIS_STRING
) {
3426 /* Read string value */
3427 if ((o
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3428 tryObjectEncoding(o
);
3429 } else if (type
== REDIS_LIST
|| type
== REDIS_SET
) {
3430 /* Read list/set value */
3433 if ((listlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3434 o
= (type
== REDIS_LIST
) ? createListObject() : createSetObject();
3435 /* It's faster to expand the dict to the right size asap in order
3436 * to avoid rehashing */
3437 if (type
== REDIS_SET
&& listlen
> DICT_HT_INITIAL_SIZE
)
3438 dictExpand(o
->ptr
,listlen
);
3439 /* Load every single element of the list/set */
3443 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3444 tryObjectEncoding(ele
);
3445 if (type
== REDIS_LIST
) {
3446 listAddNodeTail((list
*)o
->ptr
,ele
);
3448 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
3451 } else if (type
== REDIS_ZSET
) {
3452 /* Read list/set value */
3456 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3457 o
= createZsetObject();
3459 /* Load every single element of the list/set */
3462 double *score
= zmalloc(sizeof(double));
3464 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3465 tryObjectEncoding(ele
);
3466 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
3467 dictAdd(zs
->dict
,ele
,score
);
3468 zslInsert(zs
->zsl
,*score
,ele
);
3469 incrRefCount(ele
); /* added to skiplist */
3472 redisAssert(0 != 0);
3477 static int rdbLoad(char *filename
) {
3479 robj
*keyobj
= NULL
;
3481 int type
, retval
, rdbver
;
3482 dict
*d
= server
.db
[0].dict
;
3483 redisDb
*db
= server
.db
+0;
3485 time_t expiretime
= -1, now
= time(NULL
);
3486 long long loadedkeys
= 0;
3488 fp
= fopen(filename
,"r");
3489 if (!fp
) return REDIS_ERR
;
3490 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
3492 if (memcmp(buf
,"REDIS",5) != 0) {
3494 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
3497 rdbver
= atoi(buf
+5);
3500 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
3507 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3508 if (type
== REDIS_EXPIRETIME
) {
3509 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
3510 /* We read the time so we need to read the object type again */
3511 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3513 if (type
== REDIS_EOF
) break;
3514 /* Handle SELECT DB opcode as a special case */
3515 if (type
== REDIS_SELECTDB
) {
3516 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
3518 if (dbid
>= (unsigned)server
.dbnum
) {
3519 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
3522 db
= server
.db
+dbid
;
3527 if ((keyobj
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
3529 if ((o
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
3530 /* Add the new object in the hash table */
3531 retval
= dictAdd(d
,keyobj
,o
);
3532 if (retval
== DICT_ERR
) {
3533 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj
->ptr
);
3536 /* Set the expire time if needed */
3537 if (expiretime
!= -1) {
3538 setExpire(db
,keyobj
,expiretime
);
3539 /* Delete this key if already expired */
3540 if (expiretime
< now
) deleteKey(db
,keyobj
);
3544 /* Handle swapping while loading big datasets when VM is on */
3546 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
3547 while (zmalloc_used_memory() > server
.vm_max_memory
) {
3548 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
3555 eoferr
: /* unexpected end of file is handled here with a fatal exit */
3556 if (keyobj
) decrRefCount(keyobj
);
3557 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
3559 return REDIS_ERR
; /* Just to avoid warning */
3562 /*================================== Commands =============================== */
3564 static void authCommand(redisClient
*c
) {
3565 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
3566 c
->authenticated
= 1;
3567 addReply(c
,shared
.ok
);
3569 c
->authenticated
= 0;
3570 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
3574 static void pingCommand(redisClient
*c
) {
3575 addReply(c
,shared
.pong
);
3578 static void echoCommand(redisClient
*c
) {
3579 addReplyBulkLen(c
,c
->argv
[1]);
3580 addReply(c
,c
->argv
[1]);
3581 addReply(c
,shared
.crlf
);
3584 /*=================================== Strings =============================== */
3586 static void setGenericCommand(redisClient
*c
, int nx
) {
3589 if (nx
) deleteIfVolatile(c
->db
,c
->argv
[1]);
3590 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3591 if (retval
== DICT_ERR
) {
3593 /* If the key is about a swapped value, we want a new key object
3594 * to overwrite the old. So we delete the old key in the database.
3595 * This will also make sure that swap pages about the old object
3596 * will be marked as free. */
3597 if (server
.vm_enabled
&& deleteIfSwapped(c
->db
,c
->argv
[1]))
3598 incrRefCount(c
->argv
[1]);
3599 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3600 incrRefCount(c
->argv
[2]);
3602 addReply(c
,shared
.czero
);
3606 incrRefCount(c
->argv
[1]);
3607 incrRefCount(c
->argv
[2]);
3610 removeExpire(c
->db
,c
->argv
[1]);
3611 addReply(c
, nx
? shared
.cone
: shared
.ok
);
3614 static void setCommand(redisClient
*c
) {
3615 setGenericCommand(c
,0);
3618 static void setnxCommand(redisClient
*c
) {
3619 setGenericCommand(c
,1);
3622 static int getGenericCommand(redisClient
*c
) {
3623 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[1]);
3626 addReply(c
,shared
.nullbulk
);
3629 if (o
->type
!= REDIS_STRING
) {
3630 addReply(c
,shared
.wrongtypeerr
);
3633 addReplyBulkLen(c
,o
);
3635 addReply(c
,shared
.crlf
);
3641 static void getCommand(redisClient
*c
) {
3642 getGenericCommand(c
);
3645 static void getsetCommand(redisClient
*c
) {
3646 if (getGenericCommand(c
) == REDIS_ERR
) return;
3647 if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) {
3648 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3650 incrRefCount(c
->argv
[1]);
3652 incrRefCount(c
->argv
[2]);
3654 removeExpire(c
->db
,c
->argv
[1]);
3657 static void mgetCommand(redisClient
*c
) {
3660 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
3661 for (j
= 1; j
< c
->argc
; j
++) {
3662 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
3664 addReply(c
,shared
.nullbulk
);
3666 if (o
->type
!= REDIS_STRING
) {
3667 addReply(c
,shared
.nullbulk
);
3669 addReplyBulkLen(c
,o
);
3671 addReply(c
,shared
.crlf
);
3677 static void msetGenericCommand(redisClient
*c
, int nx
) {
3678 int j
, busykeys
= 0;
3680 if ((c
->argc
% 2) == 0) {
3681 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
3684 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
3685 * set nothing at all if at least one already key exists. */
3687 for (j
= 1; j
< c
->argc
; j
+= 2) {
3688 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
3694 addReply(c
, shared
.czero
);
3698 for (j
= 1; j
< c
->argc
; j
+= 2) {
3701 tryObjectEncoding(c
->argv
[j
+1]);
3702 retval
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
3703 if (retval
== DICT_ERR
) {
3704 dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
3705 incrRefCount(c
->argv
[j
+1]);
3707 incrRefCount(c
->argv
[j
]);
3708 incrRefCount(c
->argv
[j
+1]);
3710 removeExpire(c
->db
,c
->argv
[j
]);
3712 server
.dirty
+= (c
->argc
-1)/2;
3713 addReply(c
, nx
? shared
.cone
: shared
.ok
);
3716 static void msetCommand(redisClient
*c
) {
3717 msetGenericCommand(c
,0);
3720 static void msetnxCommand(redisClient
*c
) {
3721 msetGenericCommand(c
,1);
3724 static void incrDecrCommand(redisClient
*c
, long long incr
) {
3729 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
3733 if (o
->type
!= REDIS_STRING
) {
3738 if (o
->encoding
== REDIS_ENCODING_RAW
)
3739 value
= strtoll(o
->ptr
, &eptr
, 10);
3740 else if (o
->encoding
== REDIS_ENCODING_INT
)
3741 value
= (long)o
->ptr
;
3743 redisAssert(1 != 1);
3748 o
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
3749 tryObjectEncoding(o
);
3750 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],o
);
3751 if (retval
== DICT_ERR
) {
3752 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
3753 removeExpire(c
->db
,c
->argv
[1]);
3755 incrRefCount(c
->argv
[1]);
3758 addReply(c
,shared
.colon
);
3760 addReply(c
,shared
.crlf
);
3763 static void incrCommand(redisClient
*c
) {
3764 incrDecrCommand(c
,1);
3767 static void decrCommand(redisClient
*c
) {
3768 incrDecrCommand(c
,-1);
3771 static void incrbyCommand(redisClient
*c
) {
3772 long long incr
= strtoll(c
->argv
[2]->ptr
, NULL
, 10);
3773 incrDecrCommand(c
,incr
);
3776 static void decrbyCommand(redisClient
*c
) {
3777 long long incr
= strtoll(c
->argv
[2]->ptr
, NULL
, 10);
3778 incrDecrCommand(c
,-incr
);
3781 static void appendCommand(redisClient
*c
) {
3786 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
3788 /* Create the key */
3789 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3790 incrRefCount(c
->argv
[1]);
3791 incrRefCount(c
->argv
[2]);
3792 totlen
= stringObjectLen(c
->argv
[2]);
3796 de
= dictFind(c
->db
->dict
,c
->argv
[1]);
3799 o
= dictGetEntryVal(de
);
3800 if (o
->type
!= REDIS_STRING
) {
3801 addReply(c
,shared
.wrongtypeerr
);
3804 /* If the object is specially encoded or shared we have to make
3806 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
3807 robj
*decoded
= getDecodedObject(o
);
3809 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
3810 decrRefCount(decoded
);
3811 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
3814 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
3815 o
->ptr
= sdscatlen(o
->ptr
,
3816 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
3818 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
3819 (unsigned long) c
->argv
[2]->ptr
);
3821 totlen
= sdslen(o
->ptr
);
3824 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
3827 static void substrCommand(redisClient
*c
) {
3829 long start
= atoi(c
->argv
[2]->ptr
);
3830 long end
= atoi(c
->argv
[3]->ptr
);
3832 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
3834 addReply(c
,shared
.nullbulk
);
3836 if (o
->type
!= REDIS_STRING
) {
3837 addReply(c
,shared
.wrongtypeerr
);
3839 size_t rangelen
, strlen
;
3842 o
= getDecodedObject(o
);
3843 strlen
= sdslen(o
->ptr
);
3845 /* convert negative indexes */
3846 if (start
< 0) start
= strlen
+start
;
3847 if (end
< 0) end
= strlen
+end
;
3848 if (start
< 0) start
= 0;
3849 if (end
< 0) end
= 0;
3851 /* indexes sanity checks */
3852 if (start
> end
|| (size_t)start
>= strlen
) {
3853 /* Out of range start or start > end result in null reply */
3854 addReply(c
,shared
.nullbulk
);
3858 if ((size_t)end
>= strlen
) end
= strlen
-1;
3859 rangelen
= (end
-start
)+1;
3861 /* Return the result */
3862 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",rangelen
));
3863 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
3864 addReplySds(c
,range
);
3865 addReply(c
,shared
.crlf
);
3871 /* ========================= Type agnostic commands ========================= */
3873 static void delCommand(redisClient
*c
) {
3876 for (j
= 1; j
< c
->argc
; j
++) {
3877 if (deleteKey(c
->db
,c
->argv
[j
])) {
3884 addReply(c
,shared
.czero
);
3887 addReply(c
,shared
.cone
);
3890 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",deleted
));
3895 static void existsCommand(redisClient
*c
) {
3896 addReply(c
,lookupKeyRead(c
->db
,c
->argv
[1]) ? shared
.cone
: shared
.czero
);
3899 static void selectCommand(redisClient
*c
) {
3900 int id
= atoi(c
->argv
[1]->ptr
);
3902 if (selectDb(c
,id
) == REDIS_ERR
) {
3903 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
3905 addReply(c
,shared
.ok
);
3909 static void randomkeyCommand(redisClient
*c
) {
3913 de
= dictGetRandomKey(c
->db
->dict
);
3914 if (!de
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break;
3917 addReply(c
,shared
.plus
);
3918 addReply(c
,shared
.crlf
);
3920 addReply(c
,shared
.plus
);
3921 addReply(c
,dictGetEntryKey(de
));
3922 addReply(c
,shared
.crlf
);
3926 static void keysCommand(redisClient
*c
) {
3929 sds pattern
= c
->argv
[1]->ptr
;
3930 int plen
= sdslen(pattern
);
3931 unsigned long numkeys
= 0;
3932 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
3934 di
= dictGetIterator(c
->db
->dict
);
3936 decrRefCount(lenobj
);
3937 while((de
= dictNext(di
)) != NULL
) {
3938 robj
*keyobj
= dictGetEntryKey(de
);
3940 sds key
= keyobj
->ptr
;
3941 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
3942 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
3943 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
3944 addReplyBulkLen(c
,keyobj
);
3946 addReply(c
,shared
.crlf
);
3951 dictReleaseIterator(di
);
3952 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
3955 static void dbsizeCommand(redisClient
*c
) {
3957 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
3960 static void lastsaveCommand(redisClient
*c
) {
3962 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
3965 static void typeCommand(redisClient
*c
) {
3969 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
3974 case REDIS_STRING
: type
= "+string"; break;
3975 case REDIS_LIST
: type
= "+list"; break;
3976 case REDIS_SET
: type
= "+set"; break;
3977 case REDIS_ZSET
: type
= "+zset"; break;
3978 default: type
= "unknown"; break;
3981 addReplySds(c
,sdsnew(type
));
3982 addReply(c
,shared
.crlf
);
3985 static void saveCommand(redisClient
*c
) {
3986 if (server
.bgsavechildpid
!= -1) {
3987 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
3990 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
3991 addReply(c
,shared
.ok
);
3993 addReply(c
,shared
.err
);
3997 static void bgsaveCommand(redisClient
*c
) {
3998 if (server
.bgsavechildpid
!= -1) {
3999 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4002 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4003 char *status
= "+Background saving started\r\n";
4004 addReplySds(c
,sdsnew(status
));
4006 addReply(c
,shared
.err
);
4010 static void shutdownCommand(redisClient
*c
) {
4011 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4012 /* Kill the saving child if there is a background saving in progress.
4013 We want to avoid race conditions, for instance our saving child may
4014 overwrite the synchronous saving did by SHUTDOWN. */
4015 if (server
.bgsavechildpid
!= -1) {
4016 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4017 kill(server
.bgsavechildpid
,SIGKILL
);
4018 rdbRemoveTempFile(server
.bgsavechildpid
);
4020 if (server
.appendonly
) {
4021 /* Append only file: fsync() the AOF and exit */
4022 fsync(server
.appendfd
);
4023 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4026 /* Snapshotting. Perform a SYNC SAVE and exit */
4027 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4028 if (server
.daemonize
)
4029 unlink(server
.pidfile
);
4030 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4031 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4032 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4035 /* Ooops.. error saving! The best we can do is to continue operating.
4036 * Note that if there was a background saving process, in the next
4037 * cron() Redis will be notified that the background saving aborted,
4038 * handling special stuff like slaves pending for synchronization... */
4039 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4040 addReplySds(c
,sdsnew("-ERR can't quit, problems saving the DB\r\n"));
4045 static void renameGenericCommand(redisClient
*c
, int nx
) {
4048 /* To use the same key as src and dst is probably an error */
4049 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4050 addReply(c
,shared
.sameobjecterr
);
4054 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4056 addReply(c
,shared
.nokeyerr
);
4060 deleteIfVolatile(c
->db
,c
->argv
[2]);
4061 if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) {
4064 addReply(c
,shared
.czero
);
4067 dictReplace(c
->db
->dict
,c
->argv
[2],o
);
4069 incrRefCount(c
->argv
[2]);
4071 deleteKey(c
->db
,c
->argv
[1]);
4073 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4076 static void renameCommand(redisClient
*c
) {
4077 renameGenericCommand(c
,0);
4080 static void renamenxCommand(redisClient
*c
) {
4081 renameGenericCommand(c
,1);
4084 static void moveCommand(redisClient
*c
) {
4089 /* Obtain source and target DB pointers */
4092 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4093 addReply(c
,shared
.outofrangeerr
);
4097 selectDb(c
,srcid
); /* Back to the source DB */
4099 /* If the user is moving using as target the same
4100 * DB as the source DB it is probably an error. */
4102 addReply(c
,shared
.sameobjecterr
);
4106 /* Check if the element exists and get a reference */
4107 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4109 addReply(c
,shared
.czero
);
4113 /* Try to add the element to the target DB */
4114 deleteIfVolatile(dst
,c
->argv
[1]);
4115 if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) {
4116 addReply(c
,shared
.czero
);
4119 incrRefCount(c
->argv
[1]);
4122 /* OK! key moved, free the entry in the source DB */
4123 deleteKey(src
,c
->argv
[1]);
4125 addReply(c
,shared
.cone
);
4128 /* =================================== Lists ================================ */
4129 static void pushGenericCommand(redisClient
*c
, int where
) {
4133 lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4135 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4136 addReply(c
,shared
.cone
);
4139 lobj
= createListObject();
4141 if (where
== REDIS_HEAD
) {
4142 listAddNodeHead(list
,c
->argv
[2]);
4144 listAddNodeTail(list
,c
->argv
[2]);
4146 dictAdd(c
->db
->dict
,c
->argv
[1],lobj
);
4147 incrRefCount(c
->argv
[1]);
4148 incrRefCount(c
->argv
[2]);
4150 if (lobj
->type
!= REDIS_LIST
) {
4151 addReply(c
,shared
.wrongtypeerr
);
4154 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4155 addReply(c
,shared
.cone
);
4159 if (where
== REDIS_HEAD
) {
4160 listAddNodeHead(list
,c
->argv
[2]);
4162 listAddNodeTail(list
,c
->argv
[2]);
4164 incrRefCount(c
->argv
[2]);
4167 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",listLength(list
)));
4170 static void lpushCommand(redisClient
*c
) {
4171 pushGenericCommand(c
,REDIS_HEAD
);
4174 static void rpushCommand(redisClient
*c
) {
4175 pushGenericCommand(c
,REDIS_TAIL
);
4178 static void llenCommand(redisClient
*c
) {
4182 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4184 addReply(c
,shared
.czero
);
4187 if (o
->type
!= REDIS_LIST
) {
4188 addReply(c
,shared
.wrongtypeerr
);
4191 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",listLength(l
)));
4196 static void lindexCommand(redisClient
*c
) {
4198 int index
= atoi(c
->argv
[2]->ptr
);
4200 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4202 addReply(c
,shared
.nullbulk
);
4204 if (o
->type
!= REDIS_LIST
) {
4205 addReply(c
,shared
.wrongtypeerr
);
4207 list
*list
= o
->ptr
;
4210 ln
= listIndex(list
, index
);
4212 addReply(c
,shared
.nullbulk
);
4214 robj
*ele
= listNodeValue(ln
);
4215 addReplyBulkLen(c
,ele
);
4217 addReply(c
,shared
.crlf
);
4223 static void lsetCommand(redisClient
*c
) {
4225 int index
= atoi(c
->argv
[2]->ptr
);
4227 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4229 addReply(c
,shared
.nokeyerr
);
4231 if (o
->type
!= REDIS_LIST
) {
4232 addReply(c
,shared
.wrongtypeerr
);
4234 list
*list
= o
->ptr
;
4237 ln
= listIndex(list
, index
);
4239 addReply(c
,shared
.outofrangeerr
);
4241 robj
*ele
= listNodeValue(ln
);
4244 listNodeValue(ln
) = c
->argv
[3];
4245 incrRefCount(c
->argv
[3]);
4246 addReply(c
,shared
.ok
);
4253 static void popGenericCommand(redisClient
*c
, int where
) {
4256 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4258 addReply(c
,shared
.nullbulk
);
4260 if (o
->type
!= REDIS_LIST
) {
4261 addReply(c
,shared
.wrongtypeerr
);
4263 list
*list
= o
->ptr
;
4266 if (where
== REDIS_HEAD
)
4267 ln
= listFirst(list
);
4269 ln
= listLast(list
);
4272 addReply(c
,shared
.nullbulk
);
4274 robj
*ele
= listNodeValue(ln
);
4275 addReplyBulkLen(c
,ele
);
4277 addReply(c
,shared
.crlf
);
4278 listDelNode(list
,ln
);
4285 static void lpopCommand(redisClient
*c
) {
4286 popGenericCommand(c
,REDIS_HEAD
);
4289 static void rpopCommand(redisClient
*c
) {
4290 popGenericCommand(c
,REDIS_TAIL
);
4293 static void lrangeCommand(redisClient
*c
) {
4295 int start
= atoi(c
->argv
[2]->ptr
);
4296 int end
= atoi(c
->argv
[3]->ptr
);
4298 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4300 addReply(c
,shared
.nullmultibulk
);
4302 if (o
->type
!= REDIS_LIST
) {
4303 addReply(c
,shared
.wrongtypeerr
);
4305 list
*list
= o
->ptr
;
4307 int llen
= listLength(list
);
4311 /* convert negative indexes */
4312 if (start
< 0) start
= llen
+start
;
4313 if (end
< 0) end
= llen
+end
;
4314 if (start
< 0) start
= 0;
4315 if (end
< 0) end
= 0;
4317 /* indexes sanity checks */
4318 if (start
> end
|| start
>= llen
) {
4319 /* Out of range start or start > end result in empty list */
4320 addReply(c
,shared
.emptymultibulk
);
4323 if (end
>= llen
) end
= llen
-1;
4324 rangelen
= (end
-start
)+1;
4326 /* Return the result in form of a multi-bulk reply */
4327 ln
= listIndex(list
, start
);
4328 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
4329 for (j
= 0; j
< rangelen
; j
++) {
4330 ele
= listNodeValue(ln
);
4331 addReplyBulkLen(c
,ele
);
4333 addReply(c
,shared
.crlf
);
4340 static void ltrimCommand(redisClient
*c
) {
4342 int start
= atoi(c
->argv
[2]->ptr
);
4343 int end
= atoi(c
->argv
[3]->ptr
);
4345 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4347 addReply(c
,shared
.ok
);
4349 if (o
->type
!= REDIS_LIST
) {
4350 addReply(c
,shared
.wrongtypeerr
);
4352 list
*list
= o
->ptr
;
4354 int llen
= listLength(list
);
4355 int j
, ltrim
, rtrim
;
4357 /* convert negative indexes */
4358 if (start
< 0) start
= llen
+start
;
4359 if (end
< 0) end
= llen
+end
;
4360 if (start
< 0) start
= 0;
4361 if (end
< 0) end
= 0;
4363 /* indexes sanity checks */
4364 if (start
> end
|| start
>= llen
) {
4365 /* Out of range start or start > end result in empty list */
4369 if (end
>= llen
) end
= llen
-1;
4374 /* Remove list elements to perform the trim */
4375 for (j
= 0; j
< ltrim
; j
++) {
4376 ln
= listFirst(list
);
4377 listDelNode(list
,ln
);
4379 for (j
= 0; j
< rtrim
; j
++) {
4380 ln
= listLast(list
);
4381 listDelNode(list
,ln
);
4384 addReply(c
,shared
.ok
);
4389 static void lremCommand(redisClient
*c
) {
4392 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4394 addReply(c
,shared
.czero
);
4396 if (o
->type
!= REDIS_LIST
) {
4397 addReply(c
,shared
.wrongtypeerr
);
4399 list
*list
= o
->ptr
;
4400 listNode
*ln
, *next
;
4401 int toremove
= atoi(c
->argv
[2]->ptr
);
4406 toremove
= -toremove
;
4409 ln
= fromtail
? list
->tail
: list
->head
;
4411 robj
*ele
= listNodeValue(ln
);
4413 next
= fromtail
? ln
->prev
: ln
->next
;
4414 if (compareStringObjects(ele
,c
->argv
[3]) == 0) {
4415 listDelNode(list
,ln
);
4418 if (toremove
&& removed
== toremove
) break;
4422 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
4427 /* This is the semantic of this command:
4428 * RPOPLPUSH srclist dstlist:
4429 * IF LLEN(srclist) > 0
4430 * element = RPOP srclist
4431 * LPUSH dstlist element
4438 * The idea is to be able to get an element from a list in a reliable way
4439 * since the element is not just returned but pushed against another list
4440 * as well. This command was originally proposed by Ezra Zygmuntowicz.
4442 static void rpoplpushcommand(redisClient
*c
) {
4445 sobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4447 addReply(c
,shared
.nullbulk
);
4449 if (sobj
->type
!= REDIS_LIST
) {
4450 addReply(c
,shared
.wrongtypeerr
);
4452 list
*srclist
= sobj
->ptr
;
4453 listNode
*ln
= listLast(srclist
);
4456 addReply(c
,shared
.nullbulk
);
4458 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4459 robj
*ele
= listNodeValue(ln
);
4462 if (dobj
&& dobj
->type
!= REDIS_LIST
) {
4463 addReply(c
,shared
.wrongtypeerr
);
4467 /* Add the element to the target list (unless it's directly
4468 * passed to some BLPOP-ing client */
4469 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) {
4471 /* Create the list if the key does not exist */
4472 dobj
= createListObject();
4473 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
);
4474 incrRefCount(c
->argv
[2]);
4476 dstlist
= dobj
->ptr
;
4477 listAddNodeHead(dstlist
,ele
);
4481 /* Send the element to the client as reply as well */
4482 addReplyBulkLen(c
,ele
);
4484 addReply(c
,shared
.crlf
);
4486 /* Finally remove the element from the source list */
4487 listDelNode(srclist
,ln
);
4495 /* ==================================== Sets ================================ */
4497 static void saddCommand(redisClient
*c
) {
4500 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4502 set
= createSetObject();
4503 dictAdd(c
->db
->dict
,c
->argv
[1],set
);
4504 incrRefCount(c
->argv
[1]);
4506 if (set
->type
!= REDIS_SET
) {
4507 addReply(c
,shared
.wrongtypeerr
);
4511 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
4512 incrRefCount(c
->argv
[2]);
4514 addReply(c
,shared
.cone
);
4516 addReply(c
,shared
.czero
);
4520 static void sremCommand(redisClient
*c
) {
4523 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4525 addReply(c
,shared
.czero
);
4527 if (set
->type
!= REDIS_SET
) {
4528 addReply(c
,shared
.wrongtypeerr
);
4531 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
4533 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4534 addReply(c
,shared
.cone
);
4536 addReply(c
,shared
.czero
);
4541 static void smoveCommand(redisClient
*c
) {
4542 robj
*srcset
, *dstset
;
4544 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4545 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4547 /* If the source key does not exist return 0, if it's of the wrong type
4549 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
4550 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
4553 /* Error if the destination key is not a set as well */
4554 if (dstset
&& dstset
->type
!= REDIS_SET
) {
4555 addReply(c
,shared
.wrongtypeerr
);
4558 /* Remove the element from the source set */
4559 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
4560 /* Key not found in the src set! return zero */
4561 addReply(c
,shared
.czero
);
4565 /* Add the element to the destination set */
4567 dstset
= createSetObject();
4568 dictAdd(c
->db
->dict
,c
->argv
[2],dstset
);
4569 incrRefCount(c
->argv
[2]);
4571 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
4572 incrRefCount(c
->argv
[3]);
4573 addReply(c
,shared
.cone
);
4576 static void sismemberCommand(redisClient
*c
) {
4579 set
= lookupKeyRead(c
->db
,c
->argv
[1]);
4581 addReply(c
,shared
.czero
);
4583 if (set
->type
!= REDIS_SET
) {
4584 addReply(c
,shared
.wrongtypeerr
);
4587 if (dictFind(set
->ptr
,c
->argv
[2]))
4588 addReply(c
,shared
.cone
);
4590 addReply(c
,shared
.czero
);
4594 static void scardCommand(redisClient
*c
) {
4598 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4600 addReply(c
,shared
.czero
);
4603 if (o
->type
!= REDIS_SET
) {
4604 addReply(c
,shared
.wrongtypeerr
);
4607 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",
4613 static void spopCommand(redisClient
*c
) {
4617 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4619 addReply(c
,shared
.nullbulk
);
4621 if (set
->type
!= REDIS_SET
) {
4622 addReply(c
,shared
.wrongtypeerr
);
4625 de
= dictGetRandomKey(set
->ptr
);
4627 addReply(c
,shared
.nullbulk
);
4629 robj
*ele
= dictGetEntryKey(de
);
4631 addReplyBulkLen(c
,ele
);
4633 addReply(c
,shared
.crlf
);
4634 dictDelete(set
->ptr
,ele
);
4635 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4641 static void srandmemberCommand(redisClient
*c
) {
4645 set
= lookupKeyRead(c
->db
,c
->argv
[1]);
4647 addReply(c
,shared
.nullbulk
);
4649 if (set
->type
!= REDIS_SET
) {
4650 addReply(c
,shared
.wrongtypeerr
);
4653 de
= dictGetRandomKey(set
->ptr
);
4655 addReply(c
,shared
.nullbulk
);
4657 robj
*ele
= dictGetEntryKey(de
);
4659 addReplyBulkLen(c
,ele
);
4661 addReply(c
,shared
.crlf
);
4666 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
4667 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
4669 return dictSize(*d1
)-dictSize(*d2
);
4672 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
4673 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4676 robj
*lenobj
= NULL
, *dstset
= NULL
;
4677 unsigned long j
, cardinality
= 0;
4679 for (j
= 0; j
< setsnum
; j
++) {
4683 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4684 lookupKeyRead(c
->db
,setskeys
[j
]);
4688 if (deleteKey(c
->db
,dstkey
))
4690 addReply(c
,shared
.czero
);
4692 addReply(c
,shared
.nullmultibulk
);
4696 if (setobj
->type
!= REDIS_SET
) {
4698 addReply(c
,shared
.wrongtypeerr
);
4701 dv
[j
] = setobj
->ptr
;
4703 /* Sort sets from the smallest to largest, this will improve our
4704 * algorithm's performace */
4705 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
4707 /* The first thing we should output is the total number of elements...
4708 * since this is a multi-bulk write, but at this stage we don't know
4709 * the intersection set size, so we use a trick, append an empty object
4710 * to the output list and save the pointer to later modify it with the
4713 lenobj
= createObject(REDIS_STRING
,NULL
);
4715 decrRefCount(lenobj
);
4717 /* If we have a target key where to store the resulting set
4718 * create this key with an empty set inside */
4719 dstset
= createSetObject();
4722 /* Iterate all the elements of the first (smallest) set, and test
4723 * the element against all the other sets, if at least one set does
4724 * not include the element it is discarded */
4725 di
= dictGetIterator(dv
[0]);
4727 while((de
= dictNext(di
)) != NULL
) {
4730 for (j
= 1; j
< setsnum
; j
++)
4731 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
4733 continue; /* at least one set does not contain the member */
4734 ele
= dictGetEntryKey(de
);
4736 addReplyBulkLen(c
,ele
);
4738 addReply(c
,shared
.crlf
);
4741 dictAdd(dstset
->ptr
,ele
,NULL
);
4745 dictReleaseIterator(di
);
4748 /* Store the resulting set into the target */
4749 deleteKey(c
->db
,dstkey
);
4750 dictAdd(c
->db
->dict
,dstkey
,dstset
);
4751 incrRefCount(dstkey
);
4755 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
4757 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",
4758 dictSize((dict
*)dstset
->ptr
)));
4764 static void sinterCommand(redisClient
*c
) {
4765 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
4768 static void sinterstoreCommand(redisClient
*c
) {
4769 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
4772 #define REDIS_OP_UNION 0
4773 #define REDIS_OP_DIFF 1
4774 #define REDIS_OP_INTER 2
4776 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
4777 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4780 robj
*dstset
= NULL
;
4781 int j
, cardinality
= 0;
4783 for (j
= 0; j
< setsnum
; j
++) {
4787 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4788 lookupKeyRead(c
->db
,setskeys
[j
]);
4793 if (setobj
->type
!= REDIS_SET
) {
4795 addReply(c
,shared
.wrongtypeerr
);
4798 dv
[j
] = setobj
->ptr
;
4801 /* We need a temp set object to store our union. If the dstkey
4802 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
4803 * this set object will be the resulting object to set into the target key*/
4804 dstset
= createSetObject();
4806 /* Iterate all the elements of all the sets, add every element a single
4807 * time to the result set */
4808 for (j
= 0; j
< setsnum
; j
++) {
4809 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
4810 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
4812 di
= dictGetIterator(dv
[j
]);
4814 while((de
= dictNext(di
)) != NULL
) {
4817 /* dictAdd will not add the same element multiple times */
4818 ele
= dictGetEntryKey(de
);
4819 if (op
== REDIS_OP_UNION
|| j
== 0) {
4820 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
4824 } else if (op
== REDIS_OP_DIFF
) {
4825 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
4830 dictReleaseIterator(di
);
4832 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break; /* result set is empty */
4835 /* Output the content of the resulting set, if not in STORE mode */
4837 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
4838 di
= dictGetIterator(dstset
->ptr
);
4839 while((de
= dictNext(di
)) != NULL
) {
4842 ele
= dictGetEntryKey(de
);
4843 addReplyBulkLen(c
,ele
);
4845 addReply(c
,shared
.crlf
);
4847 dictReleaseIterator(di
);
4849 /* If we have a target key where to store the resulting set
4850 * create this key with the result set inside */
4851 deleteKey(c
->db
,dstkey
);
4852 dictAdd(c
->db
->dict
,dstkey
,dstset
);
4853 incrRefCount(dstkey
);
4858 decrRefCount(dstset
);
4860 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",
4861 dictSize((dict
*)dstset
->ptr
)));
4867 static void sunionCommand(redisClient
*c
) {
4868 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
4871 static void sunionstoreCommand(redisClient
*c
) {
4872 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
4875 static void sdiffCommand(redisClient
*c
) {
4876 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
4879 static void sdiffstoreCommand(redisClient
*c
) {
4880 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
4883 /* ==================================== ZSets =============================== */
4885 /* ZSETs are ordered sets using two data structures to hold the same elements
4886 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
4889 * The elements are added to an hash table mapping Redis objects to scores.
4890 * At the same time the elements are added to a skip list mapping scores
4891 * to Redis objects (so objects are sorted by scores in this "view"). */
4893 /* This skiplist implementation is almost a C translation of the original
4894 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
4895 * Alternative to Balanced Trees", modified in three ways:
4896 * a) this implementation allows for repeated values.
4897 * b) the comparison is not just by key (our 'score') but by satellite data.
4898 * c) there is a back pointer, so it's a doubly linked list with the back
4899 * pointers being only at "level 1". This allows to traverse the list
4900 * from tail to head, useful for ZREVRANGE. */
4902 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
4903 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
4905 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
4907 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
4913 static zskiplist
*zslCreate(void) {
4917 zsl
= zmalloc(sizeof(*zsl
));
4920 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
4921 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
4922 zsl
->header
->forward
[j
] = NULL
;
4924 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
4925 if (j
< ZSKIPLIST_MAXLEVEL
-1)
4926 zsl
->header
->span
[j
] = 0;
4928 zsl
->header
->backward
= NULL
;
4933 static void zslFreeNode(zskiplistNode
*node
) {
4934 decrRefCount(node
->obj
);
4935 zfree(node
->forward
);
4940 static void zslFree(zskiplist
*zsl
) {
4941 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
4943 zfree(zsl
->header
->forward
);
4944 zfree(zsl
->header
->span
);
4947 next
= node
->forward
[0];
4954 static int zslRandomLevel(void) {
4956 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
4961 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
4962 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
4963 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
4967 for (i
= zsl
->level
-1; i
>= 0; i
--) {
4968 /* store rank that is crossed to reach the insert position */
4969 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
4971 while (x
->forward
[i
] &&
4972 (x
->forward
[i
]->score
< score
||
4973 (x
->forward
[i
]->score
== score
&&
4974 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
4975 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
4980 /* we assume the key is not already inside, since we allow duplicated
4981 * scores, and the re-insertion of score and redis object should never
4982 * happpen since the caller of zslInsert() should test in the hash table
4983 * if the element is already inside or not. */
4984 level
= zslRandomLevel();
4985 if (level
> zsl
->level
) {
4986 for (i
= zsl
->level
; i
< level
; i
++) {
4988 update
[i
] = zsl
->header
;
4989 update
[i
]->span
[i
-1] = zsl
->length
;
4993 x
= zslCreateNode(level
,score
,obj
);
4994 for (i
= 0; i
< level
; i
++) {
4995 x
->forward
[i
] = update
[i
]->forward
[i
];
4996 update
[i
]->forward
[i
] = x
;
4998 /* update span covered by update[i] as x is inserted here */
5000 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5001 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5005 /* increment span for untouched levels */
5006 for (i
= level
; i
< zsl
->level
; i
++) {
5007 update
[i
]->span
[i
-1]++;
5010 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
5012 x
->forward
[0]->backward
= x
;
5018 /* Delete an element with matching score/object from the skiplist. */
5019 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
5020 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5024 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5025 while (x
->forward
[i
] &&
5026 (x
->forward
[i
]->score
< score
||
5027 (x
->forward
[i
]->score
== score
&&
5028 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
5032 /* We may have multiple elements with the same score, what we need
5033 * is to find the element with both the right score and object. */
5035 if (x
&& score
== x
->score
&& compareStringObjects(x
->obj
,obj
) == 0) {
5036 for (i
= 0; i
< zsl
->level
; i
++) {
5037 if (update
[i
]->forward
[i
] == x
) {
5039 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5041 update
[i
]->forward
[i
] = x
->forward
[i
];
5043 /* invariant: i > 0, because update[0]->forward[0]
5044 * is always equal to x */
5045 update
[i
]->span
[i
-1] -= 1;
5048 if (x
->forward
[0]) {
5049 x
->forward
[0]->backward
= x
->backward
;
5051 zsl
->tail
= x
->backward
;
5054 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5059 return 0; /* not found */
5061 return 0; /* not found */
5064 /* Delete all the elements with score between min and max from the skiplist.
5065 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5066 * Note that this function takes the reference to the hash table view of the
5067 * sorted set, in order to remove the elements from the hash table too. */
5068 static unsigned long zslDeleteRange(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
5069 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5070 unsigned long removed
= 0;
5074 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5075 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
5079 /* We may have multiple elements with the same score, what we need
5080 * is to find the element with both the right score and object. */
5082 while (x
&& x
->score
<= max
) {
5083 zskiplistNode
*next
;
5085 for (i
= 0; i
< zsl
->level
; i
++) {
5086 if (update
[i
]->forward
[i
] == x
) {
5088 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5090 update
[i
]->forward
[i
] = x
->forward
[i
];
5092 /* invariant: i > 0, because update[0]->forward[0]
5093 * is always equal to x */
5094 update
[i
]->span
[i
-1] -= 1;
5097 if (x
->forward
[0]) {
5098 x
->forward
[0]->backward
= x
->backward
;
5100 zsl
->tail
= x
->backward
;
5102 next
= x
->forward
[0];
5103 dictDelete(dict
,x
->obj
);
5105 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5111 return removed
; /* not found */
5114 /* Find the first node having a score equal or greater than the specified one.
5115 * Returns NULL if there is no match. */
5116 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
5121 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5122 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
5125 /* We may have multiple elements with the same score, what we need
5126 * is to find the element with both the right score and object. */
5127 return x
->forward
[0];
5130 /* Find the rank for an element by both score and key.
5131 * Returns 0 when the element cannot be found, rank otherwise.
5132 * Note that the rank is 1-based due to the span of zsl->header to the
5134 static unsigned long zslGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
5136 unsigned long rank
= 0;
5140 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5141 while (x
->forward
[i
] &&
5142 (x
->forward
[i
]->score
< score
||
5143 (x
->forward
[i
]->score
== score
&&
5144 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
5145 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
5149 /* x might be equal to zsl->header, so test if obj is non-NULL */
5150 if (x
->obj
&& compareStringObjects(x
->obj
,o
) == 0) {
5157 /* Finds an element by its rank. The rank argument needs to be 1-based. */
5158 zskiplistNode
* zslGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
5160 unsigned long traversed
= 0;
5164 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5165 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) <= rank
) {
5166 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5170 if (traversed
== rank
) {
5177 /* The actual Z-commands implementations */
5179 /* This generic command implements both ZADD and ZINCRBY.
5180 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
5181 * the increment if the operation is a ZINCRBY (doincrement == 1). */
5182 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
5187 zsetobj
= lookupKeyWrite(c
->db
,key
);
5188 if (zsetobj
== NULL
) {
5189 zsetobj
= createZsetObject();
5190 dictAdd(c
->db
->dict
,key
,zsetobj
);
5193 if (zsetobj
->type
!= REDIS_ZSET
) {
5194 addReply(c
,shared
.wrongtypeerr
);
5200 /* Ok now since we implement both ZADD and ZINCRBY here the code
5201 * needs to handle the two different conditions. It's all about setting
5202 * '*score', that is, the new score to set, to the right value. */
5203 score
= zmalloc(sizeof(double));
5207 /* Read the old score. If the element was not present starts from 0 */
5208 de
= dictFind(zs
->dict
,ele
);
5210 double *oldscore
= dictGetEntryVal(de
);
5211 *score
= *oldscore
+ scoreval
;
5219 /* What follows is a simple remove and re-insert operation that is common
5220 * to both ZADD and ZINCRBY... */
5221 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
5222 /* case 1: New element */
5223 incrRefCount(ele
); /* added to hash */
5224 zslInsert(zs
->zsl
,*score
,ele
);
5225 incrRefCount(ele
); /* added to skiplist */
5228 addReplyDouble(c
,*score
);
5230 addReply(c
,shared
.cone
);
5235 /* case 2: Score update operation */
5236 de
= dictFind(zs
->dict
,ele
);
5237 redisAssert(de
!= NULL
);
5238 oldscore
= dictGetEntryVal(de
);
5239 if (*score
!= *oldscore
) {
5242 /* Remove and insert the element in the skip list with new score */
5243 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
5244 redisAssert(deleted
!= 0);
5245 zslInsert(zs
->zsl
,*score
,ele
);
5247 /* Update the score in the hash table */
5248 dictReplace(zs
->dict
,ele
,score
);
5254 addReplyDouble(c
,*score
);
5256 addReply(c
,shared
.czero
);
5260 static void zaddCommand(redisClient
*c
) {
5263 scoreval
= strtod(c
->argv
[2]->ptr
,NULL
);
5264 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
5267 static void zincrbyCommand(redisClient
*c
) {
5270 scoreval
= strtod(c
->argv
[2]->ptr
,NULL
);
5271 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
5274 static void zremCommand(redisClient
*c
) {
5278 zsetobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5279 if (zsetobj
== NULL
) {
5280 addReply(c
,shared
.czero
);
5286 if (zsetobj
->type
!= REDIS_ZSET
) {
5287 addReply(c
,shared
.wrongtypeerr
);
5291 de
= dictFind(zs
->dict
,c
->argv
[2]);
5293 addReply(c
,shared
.czero
);
5296 /* Delete from the skiplist */
5297 oldscore
= dictGetEntryVal(de
);
5298 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
5299 redisAssert(deleted
!= 0);
5301 /* Delete from the hash table */
5302 dictDelete(zs
->dict
,c
->argv
[2]);
5303 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5305 addReply(c
,shared
.cone
);
5309 static void zremrangebyscoreCommand(redisClient
*c
) {
5310 double min
= strtod(c
->argv
[2]->ptr
,NULL
);
5311 double max
= strtod(c
->argv
[3]->ptr
,NULL
);
5315 zsetobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5316 if (zsetobj
== NULL
) {
5317 addReply(c
,shared
.czero
);
5321 if (zsetobj
->type
!= REDIS_ZSET
) {
5322 addReply(c
,shared
.wrongtypeerr
);
5326 deleted
= zslDeleteRange(zs
->zsl
,min
,max
,zs
->dict
);
5327 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5328 server
.dirty
+= deleted
;
5329 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",deleted
));
5333 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
5334 int i
, j
, k
, zsetnum
;
5342 /* expect zsetnum input keys to be given */
5343 zsetnum
= atoi(c
->argv
[2]->ptr
);
5345 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNION/ZINTER\r\n"));
5349 /* test if the expected number of keys would overflow */
5350 if (3+zsetnum
> c
->argc
) {
5351 addReply(c
,shared
.syntaxerr
);
5355 /* read keys to be used for input */
5356 srcdicts
= zmalloc(sizeof(dict
*) * zsetnum
);
5357 weights
= zmalloc(sizeof(double) * zsetnum
);
5358 for (i
= 0, j
= 3; i
< zsetnum
; i
++, j
++) {
5359 robj
*zsetobj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
5363 if (zsetobj
->type
!= REDIS_ZSET
) {
5366 addReply(c
,shared
.wrongtypeerr
);
5369 srcdicts
[i
] = ((zset
*)zsetobj
->ptr
)->dict
;
5372 /* default all weights to 1 */
5376 /* parse optional extra arguments */
5378 int remaining
= c
->argc
-j
;
5381 if (!strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
5383 if (remaining
< zsetnum
) {
5386 addReplySds(c
,sdsnew("-ERR not enough weights for ZUNION/ZINTER\r\n"));
5389 for (i
= 0; i
< zsetnum
; i
++, j
++, remaining
--) {
5390 weights
[i
] = strtod(c
->argv
[j
]->ptr
, NULL
);
5395 addReply(c
,shared
.syntaxerr
);
5401 dstobj
= createZsetObject();
5402 dstzset
= dstobj
->ptr
;
5404 if (op
== REDIS_OP_INTER
) {
5405 /* store index of smallest zset in variable j */
5406 for (i
= 0, j
= 0; i
< zsetnum
; i
++) {
5407 if (!srcdicts
[i
] || dictSize(srcdicts
[i
]) == 0) {
5410 if (dictSize(srcdicts
[i
]) < dictSize(srcdicts
[j
])) {
5414 /* skip going over all entries if at least one dict was NULL or empty */
5416 /* precondition: all srcdicts are non-NULL and non-empty */
5417 di
= dictGetIterator(srcdicts
[j
]);
5418 while((de
= dictNext(di
)) != NULL
) {
5419 double *score
= zmalloc(sizeof(double));
5422 for (k
= 0; k
< zsetnum
; k
++) {
5423 dictEntry
*other
= (k
== j
) ? de
: dictFind(srcdicts
[k
],dictGetEntryKey(de
));
5425 *score
= *score
+ weights
[k
] * (*(double*)dictGetEntryVal(other
));
5431 /* skip entry when not present in every source dict */
5435 robj
*o
= dictGetEntryKey(de
);
5436 dictAdd(dstzset
->dict
,o
,score
);
5437 incrRefCount(o
); /* added to dictionary */
5438 zslInsert(dstzset
->zsl
,*score
,o
);
5439 incrRefCount(o
); /* added to skiplist */
5442 dictReleaseIterator(di
);
5444 } else if (op
== REDIS_OP_UNION
) {
5445 for (i
= 0; i
< zsetnum
; i
++) {
5446 if (!srcdicts
[i
]) continue;
5448 di
= dictGetIterator(srcdicts
[i
]);
5449 while((de
= dictNext(di
)) != NULL
) {
5450 /* skip key when already processed */
5451 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
5453 double *score
= zmalloc(sizeof(double));
5455 for (j
= 0; j
< zsetnum
; j
++) {
5456 if (!srcdicts
[j
]) continue;
5458 dictEntry
*other
= (i
== j
) ? de
: dictFind(srcdicts
[j
],dictGetEntryKey(de
));
5460 *score
= *score
+ weights
[j
] * (*(double*)dictGetEntryVal(other
));
5464 robj
*o
= dictGetEntryKey(de
);
5465 dictAdd(dstzset
->dict
,o
,score
);
5466 incrRefCount(o
); /* added to dictionary */
5467 zslInsert(dstzset
->zsl
,*score
,o
);
5468 incrRefCount(o
); /* added to skiplist */
5470 dictReleaseIterator(di
);
5473 /* unknown operator */
5474 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
5477 deleteKey(c
->db
,dstkey
);
5478 dictAdd(c
->db
->dict
,dstkey
,dstobj
);
5479 incrRefCount(dstkey
);
5481 addReplyLong(c
, dstzset
->zsl
->length
);
5487 static void zunionCommand(redisClient
*c
) {
5488 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
5491 static void zinterCommand(redisClient
*c
) {
5492 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
5495 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
5497 int start
= atoi(c
->argv
[2]->ptr
);
5498 int end
= atoi(c
->argv
[3]->ptr
);
5501 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
5503 } else if (c
->argc
>= 5) {
5504 addReply(c
,shared
.syntaxerr
);
5508 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
5510 addReply(c
,shared
.nullmultibulk
);
5512 if (o
->type
!= REDIS_ZSET
) {
5513 addReply(c
,shared
.wrongtypeerr
);
5515 zset
*zsetobj
= o
->ptr
;
5516 zskiplist
*zsl
= zsetobj
->zsl
;
5519 int llen
= zsl
->length
;
5523 /* convert negative indexes */
5524 if (start
< 0) start
= llen
+start
;
5525 if (end
< 0) end
= llen
+end
;
5526 if (start
< 0) start
= 0;
5527 if (end
< 0) end
= 0;
5529 /* indexes sanity checks */
5530 if (start
> end
|| start
>= llen
) {
5531 /* Out of range start or start > end result in empty list */
5532 addReply(c
,shared
.emptymultibulk
);
5535 if (end
>= llen
) end
= llen
-1;
5536 rangelen
= (end
-start
)+1;
5538 /* check if starting point is trivial, before searching
5539 * the element in log(N) time */
5541 ln
= start
== 0 ? zsl
->tail
: zslGetElementByRank(zsl
, llen
- start
);
5543 ln
= start
== 0 ? zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+ 1);
5546 /* Return the result in form of a multi-bulk reply */
5547 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
5548 withscores
? (rangelen
*2) : rangelen
));
5549 for (j
= 0; j
< rangelen
; j
++) {
5551 addReplyBulkLen(c
,ele
);
5553 addReply(c
,shared
.crlf
);
5555 addReplyDouble(c
,ln
->score
);
5556 ln
= reverse
? ln
->backward
: ln
->forward
[0];
5562 static void zrangeCommand(redisClient
*c
) {
5563 zrangeGenericCommand(c
,0);
5566 static void zrevrangeCommand(redisClient
*c
) {
5567 zrangeGenericCommand(c
,1);
5570 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
5571 * If justcount is non-zero, just the count is returned. */
5572 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
5575 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
5576 int offset
= 0, limit
= -1;
5580 /* Parse the min-max interval. If one of the values is prefixed
5581 * by the "(" character, it's considered "open". For instance
5582 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
5583 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
5584 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
5585 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
5588 min
= strtod(c
->argv
[2]->ptr
,NULL
);
5590 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
5591 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
5594 max
= strtod(c
->argv
[3]->ptr
,NULL
);
5597 /* Parse "WITHSCORES": note that if the command was called with
5598 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
5599 * enter the following paths to parse WITHSCORES and LIMIT. */
5600 if (c
->argc
== 5 || c
->argc
== 8) {
5601 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
5606 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
5610 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
5615 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
5616 addReply(c
,shared
.syntaxerr
);
5618 } else if (c
->argc
== (7 + withscores
)) {
5619 offset
= atoi(c
->argv
[5]->ptr
);
5620 limit
= atoi(c
->argv
[6]->ptr
);
5621 if (offset
< 0) offset
= 0;
5624 /* Ok, lookup the key and get the range */
5625 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
5627 addReply(c
,justcount
? shared
.czero
: shared
.nullmultibulk
);
5629 if (o
->type
!= REDIS_ZSET
) {
5630 addReply(c
,shared
.wrongtypeerr
);
5632 zset
*zsetobj
= o
->ptr
;
5633 zskiplist
*zsl
= zsetobj
->zsl
;
5635 robj
*ele
, *lenobj
= NULL
;
5636 unsigned long rangelen
= 0;
5638 /* Get the first node with the score >= min, or with
5639 * score > min if 'minex' is true. */
5640 ln
= zslFirstWithScore(zsl
,min
);
5641 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
5644 /* No element matching the speciifed interval */
5645 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
5649 /* We don't know in advance how many matching elements there
5650 * are in the list, so we push this object that will represent
5651 * the multi-bulk length in the output buffer, and will "fix"
5654 lenobj
= createObject(REDIS_STRING
,NULL
);
5656 decrRefCount(lenobj
);
5659 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
5662 ln
= ln
->forward
[0];
5665 if (limit
== 0) break;
5668 addReplyBulkLen(c
,ele
);
5670 addReply(c
,shared
.crlf
);
5672 addReplyDouble(c
,ln
->score
);
5674 ln
= ln
->forward
[0];
5676 if (limit
> 0) limit
--;
5679 addReplyLong(c
,(long)rangelen
);
5681 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
5682 withscores
? (rangelen
*2) : rangelen
);
5688 static void zrangebyscoreCommand(redisClient
*c
) {
5689 genericZrangebyscoreCommand(c
,0);
5692 static void zcountCommand(redisClient
*c
) {
5693 genericZrangebyscoreCommand(c
,1);
5696 static void zcardCommand(redisClient
*c
) {
5700 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
5702 addReply(c
,shared
.czero
);
5705 if (o
->type
!= REDIS_ZSET
) {
5706 addReply(c
,shared
.wrongtypeerr
);
5709 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",zs
->zsl
->length
));
5714 static void zscoreCommand(redisClient
*c
) {
5718 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
5720 addReply(c
,shared
.nullbulk
);
5723 if (o
->type
!= REDIS_ZSET
) {
5724 addReply(c
,shared
.wrongtypeerr
);
5729 de
= dictFind(zs
->dict
,c
->argv
[2]);
5731 addReply(c
,shared
.nullbulk
);
5733 double *score
= dictGetEntryVal(de
);
5735 addReplyDouble(c
,*score
);
5741 static void zrankCommand(redisClient
*c
) {
5743 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
5745 addReply(c
,shared
.nullbulk
);
5748 if (o
->type
!= REDIS_ZSET
) {
5749 addReply(c
,shared
.wrongtypeerr
);
5752 zskiplist
*zsl
= zs
->zsl
;
5756 de
= dictFind(zs
->dict
,c
->argv
[2]);
5758 addReply(c
,shared
.nullbulk
);
5762 double *score
= dictGetEntryVal(de
);
5763 rank
= zslGetRank(zsl
, *score
, c
->argv
[2]);
5765 addReplyLong(c
, rank
-1);
5767 addReply(c
,shared
.nullbulk
);
5772 /* =================================== Hashes =============================== */
5773 static void hsetCommand(redisClient
*c
) {
5775 robj
*o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5778 o
= createHashObject();
5779 dictAdd(c
->db
->dict
,c
->argv
[1],o
);
5780 incrRefCount(c
->argv
[1]);
5782 if (o
->type
!= REDIS_HASH
) {
5783 addReply(c
,shared
.wrongtypeerr
);
5787 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
5788 unsigned char *zm
= o
->ptr
;
5790 zm
= zipmapSet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
5791 c
->argv
[3]->ptr
,sdslen(c
->argv
[3]->ptr
),&update
);
5794 if (dictAdd(o
->ptr
,c
->argv
[2],c
->argv
[3]) == DICT_OK
) {
5795 incrRefCount(c
->argv
[2]);
5799 incrRefCount(c
->argv
[3]);
5802 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",update
== 0));
5805 static void hgetCommand(redisClient
*c
) {
5806 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[1]);
5809 addReply(c
,shared
.nullbulk
);
5812 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
5813 unsigned char *zm
= o
->ptr
;
5817 if (zipmapGet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
), &val
,&vlen
)) {
5818 addReplySds(c
,sdscatprintf(sdsempty(),"$%u\r\n", vlen
));
5819 addReplySds(c
,sdsnewlen(val
,vlen
));
5820 addReply(c
,shared
.crlf
);
5823 addReply(c
,shared
.nullbulk
);
5827 struct dictEntry
*de
;
5829 de
= dictFind(o
->ptr
,c
->argv
[2]);
5831 addReply(c
,shared
.nullbulk
);
5833 robj
*e
= dictGetEntryVal(de
);
5835 addReplyBulkLen(c
,e
);
5837 addReply(c
,shared
.crlf
);
5843 /* ========================= Non type-specific commands ==================== */
5845 static void flushdbCommand(redisClient
*c
) {
5846 server
.dirty
+= dictSize(c
->db
->dict
);
5847 dictEmpty(c
->db
->dict
);
5848 dictEmpty(c
->db
->expires
);
5849 addReply(c
,shared
.ok
);
5852 static void flushallCommand(redisClient
*c
) {
5853 server
.dirty
+= emptyDb();
5854 addReply(c
,shared
.ok
);
5855 rdbSave(server
.dbfilename
);
5859 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
5860 redisSortOperation
*so
= zmalloc(sizeof(*so
));
5862 so
->pattern
= pattern
;
5866 /* Return the value associated to the key with a name obtained
5867 * substituting the first occurence of '*' in 'pattern' with 'subst' */
5868 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
5872 int prefixlen
, sublen
, postfixlen
;
5873 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
5877 char buf
[REDIS_SORTKEY_MAX
+1];
5880 /* If the pattern is "#" return the substitution object itself in order
5881 * to implement the "SORT ... GET #" feature. */
5882 spat
= pattern
->ptr
;
5883 if (spat
[0] == '#' && spat
[1] == '\0') {
5887 /* The substitution object may be specially encoded. If so we create
5888 * a decoded object on the fly. Otherwise getDecodedObject will just
5889 * increment the ref count, that we'll decrement later. */
5890 subst
= getDecodedObject(subst
);
5893 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
5894 p
= strchr(spat
,'*');
5896 decrRefCount(subst
);
5901 sublen
= sdslen(ssub
);
5902 postfixlen
= sdslen(spat
)-(prefixlen
+1);
5903 memcpy(keyname
.buf
,spat
,prefixlen
);
5904 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
5905 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
5906 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
5907 keyname
.len
= prefixlen
+sublen
+postfixlen
;
5909 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2))
5910 decrRefCount(subst
);
5912 /* printf("lookup '%s' => %p\n", keyname.buf,de); */
5913 return lookupKeyRead(db
,&keyobj
);
5916 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
5917 * the additional parameter is not standard but a BSD-specific we have to
5918 * pass sorting parameters via the global 'server' structure */
5919 static int sortCompare(const void *s1
, const void *s2
) {
5920 const redisSortObject
*so1
= s1
, *so2
= s2
;
5923 if (!server
.sort_alpha
) {
5924 /* Numeric sorting. Here it's trivial as we precomputed scores */
5925 if (so1
->u
.score
> so2
->u
.score
) {
5927 } else if (so1
->u
.score
< so2
->u
.score
) {
5933 /* Alphanumeric sorting */
5934 if (server
.sort_bypattern
) {
5935 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
5936 /* At least one compare object is NULL */
5937 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
5939 else if (so1
->u
.cmpobj
== NULL
)
5944 /* We have both the objects, use strcoll */
5945 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
5948 /* Compare elements directly */
5951 dec1
= getDecodedObject(so1
->obj
);
5952 dec2
= getDecodedObject(so2
->obj
);
5953 cmp
= strcoll(dec1
->ptr
,dec2
->ptr
);
5958 return server
.sort_desc
? -cmp
: cmp
;
5961 /* The SORT command is the most complex command in Redis. Warning: this code
5962 * is optimized for speed and a bit less for readability */
5963 static void sortCommand(redisClient
*c
) {
5966 int desc
= 0, alpha
= 0;
5967 int limit_start
= 0, limit_count
= -1, start
, end
;
5968 int j
, dontsort
= 0, vectorlen
;
5969 int getop
= 0; /* GET operation counter */
5970 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
5971 redisSortObject
*vector
; /* Resulting vector to sort */
5973 /* Lookup the key to sort. It must be of the right types */
5974 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
5975 if (sortval
== NULL
) {
5976 addReply(c
,shared
.nullmultibulk
);
5979 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
5980 sortval
->type
!= REDIS_ZSET
)
5982 addReply(c
,shared
.wrongtypeerr
);
5986 /* Create a list of operations to perform for every sorted element.
5987 * Operations can be GET/DEL/INCR/DECR */
5988 operations
= listCreate();
5989 listSetFreeMethod(operations
,zfree
);
5992 /* Now we need to protect sortval incrementing its count, in the future
5993 * SORT may have options able to overwrite/delete keys during the sorting
5994 * and the sorted key itself may get destroied */
5995 incrRefCount(sortval
);
5997 /* The SORT command has an SQL-alike syntax, parse it */
5998 while(j
< c
->argc
) {
5999 int leftargs
= c
->argc
-j
-1;
6000 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
6002 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
6004 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
6006 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
6007 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
6008 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
6010 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
6011 storekey
= c
->argv
[j
+1];
6013 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
6014 sortby
= c
->argv
[j
+1];
6015 /* If the BY pattern does not contain '*', i.e. it is constant,
6016 * we don't need to sort nor to lookup the weight keys. */
6017 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
6019 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
6020 listAddNodeTail(operations
,createSortOperation(
6021 REDIS_SORT_GET
,c
->argv
[j
+1]));
6025 decrRefCount(sortval
);
6026 listRelease(operations
);
6027 addReply(c
,shared
.syntaxerr
);
6033 /* Load the sorting vector with all the objects to sort */
6034 switch(sortval
->type
) {
6035 case REDIS_LIST
: vectorlen
= listLength((list
*)sortval
->ptr
); break;
6036 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
6037 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
6038 default: vectorlen
= 0; redisAssert(0); /* Avoid GCC warning */
6040 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
6043 if (sortval
->type
== REDIS_LIST
) {
6044 list
*list
= sortval
->ptr
;
6048 listRewind(list
,&li
);
6049 while((ln
= listNext(&li
))) {
6050 robj
*ele
= ln
->value
;
6051 vector
[j
].obj
= ele
;
6052 vector
[j
].u
.score
= 0;
6053 vector
[j
].u
.cmpobj
= NULL
;
6061 if (sortval
->type
== REDIS_SET
) {
6064 zset
*zs
= sortval
->ptr
;
6068 di
= dictGetIterator(set
);
6069 while((setele
= dictNext(di
)) != NULL
) {
6070 vector
[j
].obj
= dictGetEntryKey(setele
);
6071 vector
[j
].u
.score
= 0;
6072 vector
[j
].u
.cmpobj
= NULL
;
6075 dictReleaseIterator(di
);
6077 redisAssert(j
== vectorlen
);
6079 /* Now it's time to load the right scores in the sorting vector */
6080 if (dontsort
== 0) {
6081 for (j
= 0; j
< vectorlen
; j
++) {
6085 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
6086 if (!byval
|| byval
->type
!= REDIS_STRING
) continue;
6088 vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
6090 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
6091 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
6093 /* Don't need to decode the object if it's
6094 * integer-encoded (the only encoding supported) so
6095 * far. We can just cast it */
6096 if (byval
->encoding
== REDIS_ENCODING_INT
) {
6097 vector
[j
].u
.score
= (long)byval
->ptr
;
6099 redisAssert(1 != 1);
6104 if (vector
[j
].obj
->encoding
== REDIS_ENCODING_RAW
)
6105 vector
[j
].u
.score
= strtod(vector
[j
].obj
->ptr
,NULL
);
6107 if (vector
[j
].obj
->encoding
== REDIS_ENCODING_INT
)
6108 vector
[j
].u
.score
= (long) vector
[j
].obj
->ptr
;
6110 redisAssert(1 != 1);
6117 /* We are ready to sort the vector... perform a bit of sanity check
6118 * on the LIMIT option too. We'll use a partial version of quicksort. */
6119 start
= (limit_start
< 0) ? 0 : limit_start
;
6120 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
6121 if (start
>= vectorlen
) {
6122 start
= vectorlen
-1;
6125 if (end
>= vectorlen
) end
= vectorlen
-1;
6127 if (dontsort
== 0) {
6128 server
.sort_desc
= desc
;
6129 server
.sort_alpha
= alpha
;
6130 server
.sort_bypattern
= sortby
? 1 : 0;
6131 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
6132 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
6134 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
6137 /* Send command output to the output buffer, performing the specified
6138 * GET/DEL/INCR/DECR operations if any. */
6139 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
6140 if (storekey
== NULL
) {
6141 /* STORE option not specified, sent the sorting result to client */
6142 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
6143 for (j
= start
; j
<= end
; j
++) {
6148 addReplyBulkLen(c
,vector
[j
].obj
);
6149 addReply(c
,vector
[j
].obj
);
6150 addReply(c
,shared
.crlf
);
6152 listRewind(operations
,&li
);
6153 while((ln
= listNext(&li
))) {
6154 redisSortOperation
*sop
= ln
->value
;
6155 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6158 if (sop
->type
== REDIS_SORT_GET
) {
6159 if (!val
|| val
->type
!= REDIS_STRING
) {
6160 addReply(c
,shared
.nullbulk
);
6162 addReplyBulkLen(c
,val
);
6164 addReply(c
,shared
.crlf
);
6167 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6172 robj
*listObject
= createListObject();
6173 list
*listPtr
= (list
*) listObject
->ptr
;
6175 /* STORE option specified, set the sorting result as a List object */
6176 for (j
= start
; j
<= end
; j
++) {
6181 listAddNodeTail(listPtr
,vector
[j
].obj
);
6182 incrRefCount(vector
[j
].obj
);
6184 listRewind(operations
,&li
);
6185 while((ln
= listNext(&li
))) {
6186 redisSortOperation
*sop
= ln
->value
;
6187 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6190 if (sop
->type
== REDIS_SORT_GET
) {
6191 if (!val
|| val
->type
!= REDIS_STRING
) {
6192 listAddNodeTail(listPtr
,createStringObject("",0));
6194 listAddNodeTail(listPtr
,val
);
6198 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6202 if (dictReplace(c
->db
->dict
,storekey
,listObject
)) {
6203 incrRefCount(storekey
);
6205 /* Note: we add 1 because the DB is dirty anyway since even if the
6206 * SORT result is empty a new key is set and maybe the old content
6208 server
.dirty
+= 1+outputlen
;
6209 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
6213 decrRefCount(sortval
);
6214 listRelease(operations
);
6215 for (j
= 0; j
< vectorlen
; j
++) {
6216 if (sortby
&& alpha
&& vector
[j
].u
.cmpobj
)
6217 decrRefCount(vector
[j
].u
.cmpobj
);
6222 /* Convert an amount of bytes into a human readable string in the form
6223 * of 100B, 2G, 100M, 4K, and so forth. */
6224 static void bytesToHuman(char *s
, unsigned long long n
) {
6229 sprintf(s
,"%lluB",n
);
6231 } else if (n
< (1024*1024)) {
6232 d
= (double)n
/(1024);
6233 sprintf(s
,"%.2fK",d
);
6234 } else if (n
< (1024LL*1024*1024)) {
6235 d
= (double)n
/(1024*1024);
6236 sprintf(s
,"%.2fM",d
);
6237 } else if (n
< (1024LL*1024*1024*1024)) {
6238 d
= (double)n
/(1024LL*1024*1024);
6239 sprintf(s
,"%.2fG",d
);
6243 /* Create the string returned by the INFO command. This is decoupled
6244 * by the INFO command itself as we need to report the same information
6245 * on memory corruption problems. */
6246 static sds
genRedisInfoString(void) {
6248 time_t uptime
= time(NULL
)-server
.stat_starttime
;
6252 bytesToHuman(hmem
,zmalloc_used_memory());
6253 info
= sdscatprintf(sdsempty(),
6254 "redis_version:%s\r\n"
6256 "multiplexing_api:%s\r\n"
6257 "process_id:%ld\r\n"
6258 "uptime_in_seconds:%ld\r\n"
6259 "uptime_in_days:%ld\r\n"
6260 "connected_clients:%d\r\n"
6261 "connected_slaves:%d\r\n"
6262 "blocked_clients:%d\r\n"
6263 "used_memory:%zu\r\n"
6264 "used_memory_human:%s\r\n"
6265 "changes_since_last_save:%lld\r\n"
6266 "bgsave_in_progress:%d\r\n"
6267 "last_save_time:%ld\r\n"
6268 "bgrewriteaof_in_progress:%d\r\n"
6269 "total_connections_received:%lld\r\n"
6270 "total_commands_processed:%lld\r\n"
6274 (sizeof(long) == 8) ? "64" : "32",
6279 listLength(server
.clients
)-listLength(server
.slaves
),
6280 listLength(server
.slaves
),
6281 server
.blpop_blocked_clients
,
6282 zmalloc_used_memory(),
6285 server
.bgsavechildpid
!= -1,
6287 server
.bgrewritechildpid
!= -1,
6288 server
.stat_numconnections
,
6289 server
.stat_numcommands
,
6290 server
.vm_enabled
!= 0,
6291 server
.masterhost
== NULL
? "master" : "slave"
6293 if (server
.masterhost
) {
6294 info
= sdscatprintf(info
,
6295 "master_host:%s\r\n"
6296 "master_port:%d\r\n"
6297 "master_link_status:%s\r\n"
6298 "master_last_io_seconds_ago:%d\r\n"
6301 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
6303 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
6306 if (server
.vm_enabled
) {
6308 info
= sdscatprintf(info
,
6309 "vm_conf_max_memory:%llu\r\n"
6310 "vm_conf_page_size:%llu\r\n"
6311 "vm_conf_pages:%llu\r\n"
6312 "vm_stats_used_pages:%llu\r\n"
6313 "vm_stats_swapped_objects:%llu\r\n"
6314 "vm_stats_swappin_count:%llu\r\n"
6315 "vm_stats_swappout_count:%llu\r\n"
6316 "vm_stats_io_newjobs_len:%lu\r\n"
6317 "vm_stats_io_processing_len:%lu\r\n"
6318 "vm_stats_io_processed_len:%lu\r\n"
6319 "vm_stats_io_active_threads:%lu\r\n"
6320 "vm_stats_blocked_clients:%lu\r\n"
6321 ,(unsigned long long) server
.vm_max_memory
,
6322 (unsigned long long) server
.vm_page_size
,
6323 (unsigned long long) server
.vm_pages
,
6324 (unsigned long long) server
.vm_stats_used_pages
,
6325 (unsigned long long) server
.vm_stats_swapped_objects
,
6326 (unsigned long long) server
.vm_stats_swapins
,
6327 (unsigned long long) server
.vm_stats_swapouts
,
6328 (unsigned long) listLength(server
.io_newjobs
),
6329 (unsigned long) listLength(server
.io_processing
),
6330 (unsigned long) listLength(server
.io_processed
),
6331 (unsigned long) server
.io_active_threads
,
6332 (unsigned long) server
.vm_blocked_clients
6336 for (j
= 0; j
< server
.dbnum
; j
++) {
6337 long long keys
, vkeys
;
6339 keys
= dictSize(server
.db
[j
].dict
);
6340 vkeys
= dictSize(server
.db
[j
].expires
);
6341 if (keys
|| vkeys
) {
6342 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
6349 static void infoCommand(redisClient
*c
) {
6350 sds info
= genRedisInfoString();
6351 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
6352 (unsigned long)sdslen(info
)));
6353 addReplySds(c
,info
);
6354 addReply(c
,shared
.crlf
);
6357 static void monitorCommand(redisClient
*c
) {
6358 /* ignore MONITOR if aleady slave or in monitor mode */
6359 if (c
->flags
& REDIS_SLAVE
) return;
6361 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
6363 listAddNodeTail(server
.monitors
,c
);
6364 addReply(c
,shared
.ok
);
6367 /* ================================= Expire ================================= */
6368 static int removeExpire(redisDb
*db
, robj
*key
) {
6369 if (dictDelete(db
->expires
,key
) == DICT_OK
) {
6376 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
6377 if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) {
6385 /* Return the expire time of the specified key, or -1 if no expire
6386 * is associated with this key (i.e. the key is non volatile) */
6387 static time_t getExpire(redisDb
*db
, robj
*key
) {
6390 /* No expire? return ASAP */
6391 if (dictSize(db
->expires
) == 0 ||
6392 (de
= dictFind(db
->expires
,key
)) == NULL
) return -1;
6394 return (time_t) dictGetEntryVal(de
);
6397 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
6401 /* No expire? return ASAP */
6402 if (dictSize(db
->expires
) == 0 ||
6403 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
6405 /* Lookup the expire */
6406 when
= (time_t) dictGetEntryVal(de
);
6407 if (time(NULL
) <= when
) return 0;
6409 /* Delete the key */
6410 dictDelete(db
->expires
,key
);
6411 return dictDelete(db
->dict
,key
) == DICT_OK
;
6414 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
6417 /* No expire? return ASAP */
6418 if (dictSize(db
->expires
) == 0 ||
6419 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
6421 /* Delete the key */
6423 dictDelete(db
->expires
,key
);
6424 return dictDelete(db
->dict
,key
) == DICT_OK
;
6427 static void expireGenericCommand(redisClient
*c
, robj
*key
, time_t seconds
) {
6430 de
= dictFind(c
->db
->dict
,key
);
6432 addReply(c
,shared
.czero
);
6436 if (deleteKey(c
->db
,key
)) server
.dirty
++;
6437 addReply(c
, shared
.cone
);
6440 time_t when
= time(NULL
)+seconds
;
6441 if (setExpire(c
->db
,key
,when
)) {
6442 addReply(c
,shared
.cone
);
6445 addReply(c
,shared
.czero
);
6451 static void expireCommand(redisClient
*c
) {
6452 expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10));
6455 static void expireatCommand(redisClient
*c
) {
6456 expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10)-time(NULL
));
6459 static void ttlCommand(redisClient
*c
) {
6463 expire
= getExpire(c
->db
,c
->argv
[1]);
6465 ttl
= (int) (expire
-time(NULL
));
6466 if (ttl
< 0) ttl
= -1;
6468 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
6471 /* ================================ MULTI/EXEC ============================== */
6473 /* Client state initialization for MULTI/EXEC */
6474 static void initClientMultiState(redisClient
*c
) {
6475 c
->mstate
.commands
= NULL
;
6476 c
->mstate
.count
= 0;
6479 /* Release all the resources associated with MULTI/EXEC state */
6480 static void freeClientMultiState(redisClient
*c
) {
6483 for (j
= 0; j
< c
->mstate
.count
; j
++) {
6485 multiCmd
*mc
= c
->mstate
.commands
+j
;
6487 for (i
= 0; i
< mc
->argc
; i
++)
6488 decrRefCount(mc
->argv
[i
]);
6491 zfree(c
->mstate
.commands
);
6494 /* Add a new command into the MULTI commands queue */
6495 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
6499 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
6500 sizeof(multiCmd
)*(c
->mstate
.count
+1));
6501 mc
= c
->mstate
.commands
+c
->mstate
.count
;
6504 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
6505 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
6506 for (j
= 0; j
< c
->argc
; j
++)
6507 incrRefCount(mc
->argv
[j
]);
6511 static void multiCommand(redisClient
*c
) {
6512 c
->flags
|= REDIS_MULTI
;
6513 addReply(c
,shared
.ok
);
6516 static void discardCommand(redisClient
*c
) {
6517 if (!(c
->flags
& REDIS_MULTI
)) {
6518 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
6522 freeClientMultiState(c
);
6523 initClientMultiState(c
);
6524 c
->flags
&= (~REDIS_MULTI
);
6525 addReply(c
,shared
.ok
);
6528 static void execCommand(redisClient
*c
) {
6533 if (!(c
->flags
& REDIS_MULTI
)) {
6534 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
6538 orig_argv
= c
->argv
;
6539 orig_argc
= c
->argc
;
6540 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
6541 for (j
= 0; j
< c
->mstate
.count
; j
++) {
6542 c
->argc
= c
->mstate
.commands
[j
].argc
;
6543 c
->argv
= c
->mstate
.commands
[j
].argv
;
6544 call(c
,c
->mstate
.commands
[j
].cmd
);
6546 c
->argv
= orig_argv
;
6547 c
->argc
= orig_argc
;
6548 freeClientMultiState(c
);
6549 initClientMultiState(c
);
6550 c
->flags
&= (~REDIS_MULTI
);
6553 /* =========================== Blocking Operations ========================= */
6555 /* Currently Redis blocking operations support is limited to list POP ops,
6556 * so the current implementation is not fully generic, but it is also not
6557 * completely specific so it will not require a rewrite to support new
6558 * kind of blocking operations in the future.
6560 * Still it's important to note that list blocking operations can be already
6561 * used as a notification mechanism in order to implement other blocking
6562 * operations at application level, so there must be a very strong evidence
6563 * of usefulness and generality before new blocking operations are implemented.
6565 * This is how the current blocking POP works, we use BLPOP as example:
6566 * - If the user calls BLPOP and the key exists and contains a non empty list
6567 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
6568 * if there is not to block.
6569 * - If instead BLPOP is called and the key does not exists or the list is
6570 * empty we need to block. In order to do so we remove the notification for
6571 * new data to read in the client socket (so that we'll not serve new
6572 * requests if the blocking request is not served). Also we put the client
6573 * in a dictionary (db->blockingkeys) mapping keys to a list of clients
6574 * blocking for this keys.
6575 * - If a PUSH operation against a key with blocked clients waiting is
6576 * performed, we serve the first in the list: basically instead to push
6577 * the new element inside the list we return it to the (first / oldest)
6578 * blocking client, unblock the client, and remove it form the list.
6580 * The above comment and the source code should be enough in order to understand
6581 * the implementation and modify / fix it later.
6584 /* Set a client in blocking mode for the specified key, with the specified
6586 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
6591 c
->blockingkeys
= zmalloc(sizeof(robj
*)*numkeys
);
6592 c
->blockingkeysnum
= numkeys
;
6593 c
->blockingto
= timeout
;
6594 for (j
= 0; j
< numkeys
; j
++) {
6595 /* Add the key in the client structure, to map clients -> keys */
6596 c
->blockingkeys
[j
] = keys
[j
];
6597 incrRefCount(keys
[j
]);
6599 /* And in the other "side", to map keys -> clients */
6600 de
= dictFind(c
->db
->blockingkeys
,keys
[j
]);
6604 /* For every key we take a list of clients blocked for it */
6606 retval
= dictAdd(c
->db
->blockingkeys
,keys
[j
],l
);
6607 incrRefCount(keys
[j
]);
6608 assert(retval
== DICT_OK
);
6610 l
= dictGetEntryVal(de
);
6612 listAddNodeTail(l
,c
);
6614 /* Mark the client as a blocked client */
6615 c
->flags
|= REDIS_BLOCKED
;
6616 server
.blpop_blocked_clients
++;
6619 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
6620 static void unblockClientWaitingData(redisClient
*c
) {
6625 assert(c
->blockingkeys
!= NULL
);
6626 /* The client may wait for multiple keys, so unblock it for every key. */
6627 for (j
= 0; j
< c
->blockingkeysnum
; j
++) {
6628 /* Remove this client from the list of clients waiting for this key. */
6629 de
= dictFind(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
6631 l
= dictGetEntryVal(de
);
6632 listDelNode(l
,listSearchKey(l
,c
));
6633 /* If the list is empty we need to remove it to avoid wasting memory */
6634 if (listLength(l
) == 0)
6635 dictDelete(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
6636 decrRefCount(c
->blockingkeys
[j
]);
6638 /* Cleanup the client structure */
6639 zfree(c
->blockingkeys
);
6640 c
->blockingkeys
= NULL
;
6641 c
->flags
&= (~REDIS_BLOCKED
);
6642 server
.blpop_blocked_clients
--;
6643 /* We want to process data if there is some command waiting
6644 * in the input buffer. Note that this is safe even if
6645 * unblockClientWaitingData() gets called from freeClient() because
6646 * freeClient() will be smart enough to call this function
6647 * *after* c->querybuf was set to NULL. */
6648 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
6651 /* This should be called from any function PUSHing into lists.
6652 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
6653 * 'ele' is the element pushed.
6655 * If the function returns 0 there was no client waiting for a list push
6658 * If the function returns 1 there was a client waiting for a list push
6659 * against this key, the element was passed to this client thus it's not
6660 * needed to actually add it to the list and the caller should return asap. */
6661 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
6662 struct dictEntry
*de
;
6663 redisClient
*receiver
;
6667 de
= dictFind(c
->db
->blockingkeys
,key
);
6668 if (de
== NULL
) return 0;
6669 l
= dictGetEntryVal(de
);
6672 receiver
= ln
->value
;
6674 addReplySds(receiver
,sdsnew("*2\r\n"));
6675 addReplyBulkLen(receiver
,key
);
6676 addReply(receiver
,key
);
6677 addReply(receiver
,shared
.crlf
);
6678 addReplyBulkLen(receiver
,ele
);
6679 addReply(receiver
,ele
);
6680 addReply(receiver
,shared
.crlf
);
6681 unblockClientWaitingData(receiver
);
6685 /* Blocking RPOP/LPOP */
6686 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
6691 for (j
= 1; j
< c
->argc
-1; j
++) {
6692 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
6694 if (o
->type
!= REDIS_LIST
) {
6695 addReply(c
,shared
.wrongtypeerr
);
6698 list
*list
= o
->ptr
;
6699 if (listLength(list
) != 0) {
6700 /* If the list contains elements fall back to the usual
6701 * non-blocking POP operation */
6702 robj
*argv
[2], **orig_argv
;
6705 /* We need to alter the command arguments before to call
6706 * popGenericCommand() as the command takes a single key. */
6707 orig_argv
= c
->argv
;
6708 orig_argc
= c
->argc
;
6709 argv
[1] = c
->argv
[j
];
6713 /* Also the return value is different, we need to output
6714 * the multi bulk reply header and the key name. The
6715 * "real" command will add the last element (the value)
6716 * for us. If this souds like an hack to you it's just
6717 * because it is... */
6718 addReplySds(c
,sdsnew("*2\r\n"));
6719 addReplyBulkLen(c
,argv
[1]);
6720 addReply(c
,argv
[1]);
6721 addReply(c
,shared
.crlf
);
6722 popGenericCommand(c
,where
);
6724 /* Fix the client structure with the original stuff */
6725 c
->argv
= orig_argv
;
6726 c
->argc
= orig_argc
;
6732 /* If the list is empty or the key does not exists we must block */
6733 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
6734 if (timeout
> 0) timeout
+= time(NULL
);
6735 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
6738 static void blpopCommand(redisClient
*c
) {
6739 blockingPopGenericCommand(c
,REDIS_HEAD
);
6742 static void brpopCommand(redisClient
*c
) {
6743 blockingPopGenericCommand(c
,REDIS_TAIL
);
6746 /* =============================== Replication ============================= */
6748 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
6749 ssize_t nwritten
, ret
= size
;
6750 time_t start
= time(NULL
);
6754 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
6755 nwritten
= write(fd
,ptr
,size
);
6756 if (nwritten
== -1) return -1;
6760 if ((time(NULL
)-start
) > timeout
) {
6768 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
6769 ssize_t nread
, totread
= 0;
6770 time_t start
= time(NULL
);
6774 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
6775 nread
= read(fd
,ptr
,size
);
6776 if (nread
== -1) return -1;
6781 if ((time(NULL
)-start
) > timeout
) {
6789 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
6796 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
6799 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
6810 static void syncCommand(redisClient
*c
) {
6811 /* ignore SYNC if aleady slave or in monitor mode */
6812 if (c
->flags
& REDIS_SLAVE
) return;
6814 /* SYNC can't be issued when the server has pending data to send to
6815 * the client about already issued commands. We need a fresh reply
6816 * buffer registering the differences between the BGSAVE and the current
6817 * dataset, so that we can copy to other slaves if needed. */
6818 if (listLength(c
->reply
) != 0) {
6819 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
6823 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
6824 /* Here we need to check if there is a background saving operation
6825 * in progress, or if it is required to start one */
6826 if (server
.bgsavechildpid
!= -1) {
6827 /* Ok a background save is in progress. Let's check if it is a good
6828 * one for replication, i.e. if there is another slave that is
6829 * registering differences since the server forked to save */
6834 listRewind(server
.slaves
,&li
);
6835 while((ln
= listNext(&li
))) {
6837 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
6840 /* Perfect, the server is already registering differences for
6841 * another slave. Set the right state, and copy the buffer. */
6842 listRelease(c
->reply
);
6843 c
->reply
= listDup(slave
->reply
);
6844 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
6845 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
6847 /* No way, we need to wait for the next BGSAVE in order to
6848 * register differences */
6849 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
6850 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
6853 /* Ok we don't have a BGSAVE in progress, let's start one */
6854 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
6855 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
6856 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
6857 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
6860 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
6863 c
->flags
|= REDIS_SLAVE
;
6865 listAddNodeTail(server
.slaves
,c
);
6869 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
6870 redisClient
*slave
= privdata
;
6872 REDIS_NOTUSED(mask
);
6873 char buf
[REDIS_IOBUF_LEN
];
6874 ssize_t nwritten
, buflen
;
6876 if (slave
->repldboff
== 0) {
6877 /* Write the bulk write count before to transfer the DB. In theory here
6878 * we don't know how much room there is in the output buffer of the
6879 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
6880 * operations) will never be smaller than the few bytes we need. */
6883 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
6885 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
6893 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
6894 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
6896 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
6897 (buflen
== 0) ? "premature EOF" : strerror(errno
));
6901 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
6902 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
6907 slave
->repldboff
+= nwritten
;
6908 if (slave
->repldboff
== slave
->repldbsize
) {
6909 close(slave
->repldbfd
);
6910 slave
->repldbfd
= -1;
6911 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
6912 slave
->replstate
= REDIS_REPL_ONLINE
;
6913 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
6914 sendReplyToClient
, slave
) == AE_ERR
) {
6918 addReplySds(slave
,sdsempty());
6919 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
6923 /* This function is called at the end of every backgrond saving.
6924 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
6925 * otherwise REDIS_ERR is passed to the function.
6927 * The goal of this function is to handle slaves waiting for a successful
6928 * background saving in order to perform non-blocking synchronization. */
6929 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
6931 int startbgsave
= 0;
6934 listRewind(server
.slaves
,&li
);
6935 while((ln
= listNext(&li
))) {
6936 redisClient
*slave
= ln
->value
;
6938 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
6940 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
6941 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
6942 struct redis_stat buf
;
6944 if (bgsaveerr
!= REDIS_OK
) {
6946 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
6949 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
6950 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
6952 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
6955 slave
->repldboff
= 0;
6956 slave
->repldbsize
= buf
.st_size
;
6957 slave
->replstate
= REDIS_REPL_SEND_BULK
;
6958 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
6959 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
6966 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
6969 listRewind(server
.slaves
,&li
);
6970 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
6971 while((ln
= listNext(&li
))) {
6972 redisClient
*slave
= ln
->value
;
6974 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
6981 static int syncWithMaster(void) {
6982 char buf
[1024], tmpfile
[256], authcmd
[1024];
6984 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
6988 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
6993 /* AUTH with the master if required. */
6994 if(server
.masterauth
) {
6995 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
6996 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
6998 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
7002 /* Read the AUTH result. */
7003 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7005 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
7009 if (buf
[0] != '+') {
7011 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
7016 /* Issue the SYNC command */
7017 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
7019 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
7023 /* Read the bulk write count */
7024 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7026 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
7030 if (buf
[0] != '$') {
7032 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
7035 dumpsize
= strtol(buf
+1,NULL
,10);
7036 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
7037 /* Read the bulk write data on a temp file */
7038 snprintf(tmpfile
,256,"temp-%d.%ld.rdb",(int)time(NULL
),(long int)random());
7039 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
,0644);
7042 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
7046 int nread
, nwritten
;
7048 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
7050 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
7056 nwritten
= write(dfd
,buf
,nread
);
7057 if (nwritten
== -1) {
7058 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
7066 if (rename(tmpfile
,server
.dbfilename
) == -1) {
7067 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
7073 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
7074 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
7078 server
.master
= createClient(fd
);
7079 server
.master
->flags
|= REDIS_MASTER
;
7080 server
.master
->authenticated
= 1;
7081 server
.replstate
= REDIS_REPL_CONNECTED
;
7085 static void slaveofCommand(redisClient
*c
) {
7086 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
7087 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
7088 if (server
.masterhost
) {
7089 sdsfree(server
.masterhost
);
7090 server
.masterhost
= NULL
;
7091 if (server
.master
) freeClient(server
.master
);
7092 server
.replstate
= REDIS_REPL_NONE
;
7093 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
7096 sdsfree(server
.masterhost
);
7097 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
7098 server
.masterport
= atoi(c
->argv
[2]->ptr
);
7099 if (server
.master
) freeClient(server
.master
);
7100 server
.replstate
= REDIS_REPL_CONNECT
;
7101 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
7102 server
.masterhost
, server
.masterport
);
7104 addReply(c
,shared
.ok
);
7107 /* ============================ Maxmemory directive ======================== */
7109 /* Try to free one object form the pre-allocated objects free list.
7110 * This is useful under low mem conditions as by default we take 1 million
7111 * free objects allocated. On success REDIS_OK is returned, otherwise
7113 static int tryFreeOneObjectFromFreelist(void) {
7116 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
7117 if (listLength(server
.objfreelist
)) {
7118 listNode
*head
= listFirst(server
.objfreelist
);
7119 o
= listNodeValue(head
);
7120 listDelNode(server
.objfreelist
,head
);
7121 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7125 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7130 /* This function gets called when 'maxmemory' is set on the config file to limit
7131 * the max memory used by the server, and we are out of memory.
7132 * This function will try to, in order:
7134 * - Free objects from the free list
7135 * - Try to remove keys with an EXPIRE set
7137 * It is not possible to free enough memory to reach used-memory < maxmemory
7138 * the server will start refusing commands that will enlarge even more the
7141 static void freeMemoryIfNeeded(void) {
7142 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
7143 int j
, k
, freed
= 0;
7145 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
7146 for (j
= 0; j
< server
.dbnum
; j
++) {
7148 robj
*minkey
= NULL
;
7149 struct dictEntry
*de
;
7151 if (dictSize(server
.db
[j
].expires
)) {
7153 /* From a sample of three keys drop the one nearest to
7154 * the natural expire */
7155 for (k
= 0; k
< 3; k
++) {
7158 de
= dictGetRandomKey(server
.db
[j
].expires
);
7159 t
= (time_t) dictGetEntryVal(de
);
7160 if (minttl
== -1 || t
< minttl
) {
7161 minkey
= dictGetEntryKey(de
);
7165 deleteKey(server
.db
+j
,minkey
);
7168 if (!freed
) return; /* nothing to free... */
7172 /* ============================== Append Only file ========================== */
7174 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
7175 sds buf
= sdsempty();
7181 /* The DB this command was targetting is not the same as the last command
7182 * we appendend. To issue a SELECT command is needed. */
7183 if (dictid
!= server
.appendseldb
) {
7186 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
7187 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
7188 (unsigned long)strlen(seldb
),seldb
);
7189 server
.appendseldb
= dictid
;
7192 /* "Fix" the argv vector if the command is EXPIRE. We want to translate
7193 * EXPIREs into EXPIREATs calls */
7194 if (cmd
->proc
== expireCommand
) {
7197 tmpargv
[0] = createStringObject("EXPIREAT",8);
7198 tmpargv
[1] = argv
[1];
7199 incrRefCount(argv
[1]);
7200 when
= time(NULL
)+strtol(argv
[2]->ptr
,NULL
,10);
7201 tmpargv
[2] = createObject(REDIS_STRING
,
7202 sdscatprintf(sdsempty(),"%ld",when
));
7206 /* Append the actual command */
7207 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
7208 for (j
= 0; j
< argc
; j
++) {
7211 o
= getDecodedObject(o
);
7212 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
7213 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
7214 buf
= sdscatlen(buf
,"\r\n",2);
7218 /* Free the objects from the modified argv for EXPIREAT */
7219 if (cmd
->proc
== expireCommand
) {
7220 for (j
= 0; j
< 3; j
++)
7221 decrRefCount(argv
[j
]);
7224 /* We want to perform a single write. This should be guaranteed atomic
7225 * at least if the filesystem we are writing is a real physical one.
7226 * While this will save us against the server being killed I don't think
7227 * there is much to do about the whole server stopping for power problems
7229 nwritten
= write(server
.appendfd
,buf
,sdslen(buf
));
7230 if (nwritten
!= (signed)sdslen(buf
)) {
7231 /* Ooops, we are in troubles. The best thing to do for now is
7232 * to simply exit instead to give the illusion that everything is
7233 * working as expected. */
7234 if (nwritten
== -1) {
7235 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
7237 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
7241 /* If a background append only file rewriting is in progress we want to
7242 * accumulate the differences between the child DB and the current one
7243 * in a buffer, so that when the child process will do its work we
7244 * can append the differences to the new append only file. */
7245 if (server
.bgrewritechildpid
!= -1)
7246 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
7250 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
7251 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
7252 now
-server
.lastfsync
> 1))
7254 fsync(server
.appendfd
); /* Let's try to get this data on the disk */
7255 server
.lastfsync
= now
;
7259 /* In Redis commands are always executed in the context of a client, so in
7260 * order to load the append only file we need to create a fake client. */
7261 static struct redisClient
*createFakeClient(void) {
7262 struct redisClient
*c
= zmalloc(sizeof(*c
));
7266 c
->querybuf
= sdsempty();
7270 /* We set the fake client as a slave waiting for the synchronization
7271 * so that Redis will not try to send replies to this client. */
7272 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7273 c
->reply
= listCreate();
7274 listSetFreeMethod(c
->reply
,decrRefCount
);
7275 listSetDupMethod(c
->reply
,dupClientReplyValue
);
7279 static void freeFakeClient(struct redisClient
*c
) {
7280 sdsfree(c
->querybuf
);
7281 listRelease(c
->reply
);
7285 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
7286 * error (the append only file is zero-length) REDIS_ERR is returned. On
7287 * fatal error an error message is logged and the program exists. */
7288 int loadAppendOnlyFile(char *filename
) {
7289 struct redisClient
*fakeClient
;
7290 FILE *fp
= fopen(filename
,"r");
7291 struct redis_stat sb
;
7292 unsigned long long loadedkeys
= 0;
7294 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
7298 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
7302 fakeClient
= createFakeClient();
7309 struct redisCommand
*cmd
;
7311 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
7317 if (buf
[0] != '*') goto fmterr
;
7319 argv
= zmalloc(sizeof(robj
*)*argc
);
7320 for (j
= 0; j
< argc
; j
++) {
7321 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
7322 if (buf
[0] != '$') goto fmterr
;
7323 len
= strtol(buf
+1,NULL
,10);
7324 argsds
= sdsnewlen(NULL
,len
);
7325 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
7326 argv
[j
] = createObject(REDIS_STRING
,argsds
);
7327 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
7330 /* Command lookup */
7331 cmd
= lookupCommand(argv
[0]->ptr
);
7333 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
7336 /* Try object sharing and encoding */
7337 if (server
.shareobjects
) {
7339 for(j
= 1; j
< argc
; j
++)
7340 argv
[j
] = tryObjectSharing(argv
[j
]);
7342 if (cmd
->flags
& REDIS_CMD_BULK
)
7343 tryObjectEncoding(argv
[argc
-1]);
7344 /* Run the command in the context of a fake client */
7345 fakeClient
->argc
= argc
;
7346 fakeClient
->argv
= argv
;
7347 cmd
->proc(fakeClient
);
7348 /* Discard the reply objects list from the fake client */
7349 while(listLength(fakeClient
->reply
))
7350 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
7351 /* Clean up, ready for the next command */
7352 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
7354 /* Handle swapping while loading big datasets when VM is on */
7356 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
7357 while (zmalloc_used_memory() > server
.vm_max_memory
) {
7358 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
7363 freeFakeClient(fakeClient
);
7368 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
7370 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
7374 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
7378 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
7379 static int fwriteBulk(FILE *fp
, robj
*obj
) {
7383 /* Avoid the incr/decr ref count business if possible to help
7384 * copy-on-write (we are often in a child process when this function
7386 * Also makes sure that key objects don't get incrRefCount-ed when VM
7388 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
7389 obj
= getDecodedObject(obj
);
7392 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
));
7393 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
;
7394 if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0)
7396 if (fwrite("\r\n",2,1,fp
) == 0) goto err
;
7397 if (decrrc
) decrRefCount(obj
);
7400 if (decrrc
) decrRefCount(obj
);
7404 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
7405 static int fwriteBulkDouble(FILE *fp
, double d
) {
7406 char buf
[128], dbuf
[128];
7408 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
7409 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
7410 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7411 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
7415 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
7416 static int fwriteBulkLong(FILE *fp
, long l
) {
7417 char buf
[128], lbuf
[128];
7419 snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
);
7420 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2);
7421 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7422 if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0;
7426 /* Write a sequence of commands able to fully rebuild the dataset into
7427 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
7428 static int rewriteAppendOnlyFile(char *filename
) {
7429 dictIterator
*di
= NULL
;
7434 time_t now
= time(NULL
);
7436 /* Note that we have to use a different temp name here compared to the
7437 * one used by rewriteAppendOnlyFileBackground() function. */
7438 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
7439 fp
= fopen(tmpfile
,"w");
7441 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
7444 for (j
= 0; j
< server
.dbnum
; j
++) {
7445 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
7446 redisDb
*db
= server
.db
+j
;
7448 if (dictSize(d
) == 0) continue;
7449 di
= dictGetIterator(d
);
7455 /* SELECT the new DB */
7456 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
7457 if (fwriteBulkLong(fp
,j
) == 0) goto werr
;
7459 /* Iterate this DB writing every entry */
7460 while((de
= dictNext(di
)) != NULL
) {
7465 key
= dictGetEntryKey(de
);
7466 /* If the value for this key is swapped, load a preview in memory.
7467 * We use a "swapped" flag to remember if we need to free the
7468 * value object instead to just increment the ref count anyway
7469 * in order to avoid copy-on-write of pages if we are forked() */
7470 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
7471 key
->storage
== REDIS_VM_SWAPPING
) {
7472 o
= dictGetEntryVal(de
);
7475 o
= vmPreviewObject(key
);
7478 expiretime
= getExpire(db
,key
);
7480 /* Save the key and associated value */
7481 if (o
->type
== REDIS_STRING
) {
7482 /* Emit a SET command */
7483 char cmd
[]="*3\r\n$3\r\nSET\r\n";
7484 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7486 if (fwriteBulk(fp
,key
) == 0) goto werr
;
7487 if (fwriteBulk(fp
,o
) == 0) goto werr
;
7488 } else if (o
->type
== REDIS_LIST
) {
7489 /* Emit the RPUSHes needed to rebuild the list */
7490 list
*list
= o
->ptr
;
7494 listRewind(list
,&li
);
7495 while((ln
= listNext(&li
))) {
7496 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
7497 robj
*eleobj
= listNodeValue(ln
);
7499 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7500 if (fwriteBulk(fp
,key
) == 0) goto werr
;
7501 if (fwriteBulk(fp
,eleobj
) == 0) goto werr
;
7503 } else if (o
->type
== REDIS_SET
) {
7504 /* Emit the SADDs needed to rebuild the set */
7506 dictIterator
*di
= dictGetIterator(set
);
7509 while((de
= dictNext(di
)) != NULL
) {
7510 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
7511 robj
*eleobj
= dictGetEntryKey(de
);
7513 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7514 if (fwriteBulk(fp
,key
) == 0) goto werr
;
7515 if (fwriteBulk(fp
,eleobj
) == 0) goto werr
;
7517 dictReleaseIterator(di
);
7518 } else if (o
->type
== REDIS_ZSET
) {
7519 /* Emit the ZADDs needed to rebuild the sorted set */
7521 dictIterator
*di
= dictGetIterator(zs
->dict
);
7524 while((de
= dictNext(di
)) != NULL
) {
7525 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
7526 robj
*eleobj
= dictGetEntryKey(de
);
7527 double *score
= dictGetEntryVal(de
);
7529 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7530 if (fwriteBulk(fp
,key
) == 0) goto werr
;
7531 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
7532 if (fwriteBulk(fp
,eleobj
) == 0) goto werr
;
7534 dictReleaseIterator(di
);
7536 redisAssert(0 != 0);
7538 /* Save the expire time */
7539 if (expiretime
!= -1) {
7540 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
7541 /* If this key is already expired skip it */
7542 if (expiretime
< now
) continue;
7543 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7544 if (fwriteBulk(fp
,key
) == 0) goto werr
;
7545 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
;
7547 if (swapped
) decrRefCount(o
);
7549 dictReleaseIterator(di
);
7552 /* Make sure data will not remain on the OS's output buffers */
7557 /* Use RENAME to make sure the DB file is changed atomically only
7558 * if the generate DB file is ok. */
7559 if (rename(tmpfile
,filename
) == -1) {
7560 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
7564 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
7570 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
7571 if (di
) dictReleaseIterator(di
);
7575 /* This is how rewriting of the append only file in background works:
7577 * 1) The user calls BGREWRITEAOF
7578 * 2) Redis calls this function, that forks():
7579 * 2a) the child rewrite the append only file in a temp file.
7580 * 2b) the parent accumulates differences in server.bgrewritebuf.
7581 * 3) When the child finished '2a' exists.
7582 * 4) The parent will trap the exit code, if it's OK, will append the
7583 * data accumulated into server.bgrewritebuf into the temp file, and
7584 * finally will rename(2) the temp file in the actual file name.
7585 * The the new file is reopened as the new append only file. Profit!
7587 static int rewriteAppendOnlyFileBackground(void) {
7590 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
7591 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
7592 if ((childpid
= fork()) == 0) {
7596 if (server
.vm_enabled
) vmReopenSwapFile();
7598 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
7599 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
7606 if (childpid
== -1) {
7607 redisLog(REDIS_WARNING
,
7608 "Can't rewrite append only file in background: fork: %s",
7612 redisLog(REDIS_NOTICE
,
7613 "Background append only file rewriting started by pid %d",childpid
);
7614 server
.bgrewritechildpid
= childpid
;
7615 /* We set appendseldb to -1 in order to force the next call to the
7616 * feedAppendOnlyFile() to issue a SELECT command, so the differences
7617 * accumulated by the parent into server.bgrewritebuf will start
7618 * with a SELECT statement and it will be safe to merge. */
7619 server
.appendseldb
= -1;
7622 return REDIS_OK
; /* unreached */
7625 static void bgrewriteaofCommand(redisClient
*c
) {
7626 if (server
.bgrewritechildpid
!= -1) {
7627 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
7630 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
7631 char *status
= "+Background append only file rewriting started\r\n";
7632 addReplySds(c
,sdsnew(status
));
7634 addReply(c
,shared
.err
);
7638 static void aofRemoveTempFile(pid_t childpid
) {
7641 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
7645 /* Virtual Memory is composed mainly of two subsystems:
7646 * - Blocking Virutal Memory
7647 * - Threaded Virtual Memory I/O
7648 * The two parts are not fully decoupled, but functions are split among two
7649 * different sections of the source code (delimited by comments) in order to
7650 * make more clear what functionality is about the blocking VM and what about
7651 * the threaded (not blocking) VM.
7655 * Redis VM is a blocking VM (one that blocks reading swapped values from
7656 * disk into memory when a value swapped out is needed in memory) that is made
7657 * unblocking by trying to examine the command argument vector in order to
7658 * load in background values that will likely be needed in order to exec
7659 * the command. The command is executed only once all the relevant keys
7660 * are loaded into memory.
7662 * This basically is almost as simple of a blocking VM, but almost as parallel
7663 * as a fully non-blocking VM.
7666 /* =================== Virtual Memory - Blocking Side ====================== */
7668 /* substitute the first occurrence of '%p' with the process pid in the
7669 * swap file name. */
7670 static void expandVmSwapFilename(void) {
7671 char *p
= strstr(server
.vm_swap_file
,"%p");
7677 new = sdscat(new,server
.vm_swap_file
);
7678 new = sdscatprintf(new,"%ld",(long) getpid());
7679 new = sdscat(new,p
+2);
7680 zfree(server
.vm_swap_file
);
7681 server
.vm_swap_file
= new;
7684 static void vmInit(void) {
7689 if (server
.vm_max_threads
!= 0)
7690 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
7692 expandVmSwapFilename();
7693 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
7694 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
7695 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
7697 if (server
.vm_fp
== NULL
) {
7698 redisLog(REDIS_WARNING
,
7699 "Impossible to open the swap file: %s. Exiting.",
7703 server
.vm_fd
= fileno(server
.vm_fp
);
7704 server
.vm_next_page
= 0;
7705 server
.vm_near_pages
= 0;
7706 server
.vm_stats_used_pages
= 0;
7707 server
.vm_stats_swapped_objects
= 0;
7708 server
.vm_stats_swapouts
= 0;
7709 server
.vm_stats_swapins
= 0;
7710 totsize
= server
.vm_pages
*server
.vm_page_size
;
7711 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
7712 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
7713 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
7717 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
7719 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
7720 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
7721 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
7722 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
7724 /* Initialize threaded I/O (used by Virtual Memory) */
7725 server
.io_newjobs
= listCreate();
7726 server
.io_processing
= listCreate();
7727 server
.io_processed
= listCreate();
7728 server
.io_ready_clients
= listCreate();
7729 pthread_mutex_init(&server
.io_mutex
,NULL
);
7730 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
7731 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
7732 server
.io_active_threads
= 0;
7733 if (pipe(pipefds
) == -1) {
7734 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
7738 server
.io_ready_pipe_read
= pipefds
[0];
7739 server
.io_ready_pipe_write
= pipefds
[1];
7740 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
7741 /* LZF requires a lot of stack */
7742 pthread_attr_init(&server
.io_threads_attr
);
7743 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
7744 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
7745 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
7746 /* Listen for events in the threaded I/O pipe */
7747 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
7748 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
7749 oom("creating file event");
7752 /* Mark the page as used */
7753 static void vmMarkPageUsed(off_t page
) {
7754 off_t byte
= page
/8;
7756 redisAssert(vmFreePage(page
) == 1);
7757 server
.vm_bitmap
[byte
] |= 1<<bit
;
7760 /* Mark N contiguous pages as used, with 'page' being the first. */
7761 static void vmMarkPagesUsed(off_t page
, off_t count
) {
7764 for (j
= 0; j
< count
; j
++)
7765 vmMarkPageUsed(page
+j
);
7766 server
.vm_stats_used_pages
+= count
;
7767 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
7768 (long long)count
, (long long)page
);
7771 /* Mark the page as free */
7772 static void vmMarkPageFree(off_t page
) {
7773 off_t byte
= page
/8;
7775 redisAssert(vmFreePage(page
) == 0);
7776 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
7779 /* Mark N contiguous pages as free, with 'page' being the first. */
7780 static void vmMarkPagesFree(off_t page
, off_t count
) {
7783 for (j
= 0; j
< count
; j
++)
7784 vmMarkPageFree(page
+j
);
7785 server
.vm_stats_used_pages
-= count
;
7786 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
7787 (long long)count
, (long long)page
);
7790 /* Test if the page is free */
7791 static int vmFreePage(off_t page
) {
7792 off_t byte
= page
/8;
7794 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
7797 /* Find N contiguous free pages storing the first page of the cluster in *first.
7798 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
7799 * REDIS_ERR is returned.
7801 * This function uses a simple algorithm: we try to allocate
7802 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
7803 * again from the start of the swap file searching for free spaces.
7805 * If it looks pretty clear that there are no free pages near our offset
7806 * we try to find less populated places doing a forward jump of
7807 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
7808 * without hurry, and then we jump again and so forth...
7810 * This function can be improved using a free list to avoid to guess
7811 * too much, since we could collect data about freed pages.
7813 * note: I implemented this function just after watching an episode of
7814 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
7816 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
7817 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
7819 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
7820 server
.vm_near_pages
= 0;
7821 server
.vm_next_page
= 0;
7823 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
7824 base
= server
.vm_next_page
;
7826 while(offset
< server
.vm_pages
) {
7827 off_t
this = base
+offset
;
7829 /* If we overflow, restart from page zero */
7830 if (this >= server
.vm_pages
) {
7831 this -= server
.vm_pages
;
7833 /* Just overflowed, what we found on tail is no longer
7834 * interesting, as it's no longer contiguous. */
7838 if (vmFreePage(this)) {
7839 /* This is a free page */
7841 /* Already got N free pages? Return to the caller, with success */
7843 *first
= this-(n
-1);
7844 server
.vm_next_page
= this+1;
7845 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
7849 /* The current one is not a free page */
7853 /* Fast-forward if the current page is not free and we already
7854 * searched enough near this place. */
7856 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
7857 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
7859 /* Note that even if we rewind after the jump, we are don't need
7860 * to make sure numfree is set to zero as we only jump *if* it
7861 * is set to zero. */
7863 /* Otherwise just check the next page */
7870 /* Write the specified object at the specified page of the swap file */
7871 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
7872 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
7873 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
7874 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
7875 redisLog(REDIS_WARNING
,
7876 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
7880 rdbSaveObject(server
.vm_fp
,o
);
7881 fflush(server
.vm_fp
);
7882 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
7886 /* Swap the 'val' object relative to 'key' into disk. Store all the information
7887 * needed to later retrieve the object into the key object.
7888 * If we can't find enough contiguous empty pages to swap the object on disk
7889 * REDIS_ERR is returned. */
7890 static int vmSwapObjectBlocking(robj
*key
, robj
*val
) {
7891 off_t pages
= rdbSavedObjectPages(val
,NULL
);
7894 assert(key
->storage
== REDIS_VM_MEMORY
);
7895 assert(key
->refcount
== 1);
7896 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
;
7897 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
;
7898 key
->vm
.page
= page
;
7899 key
->vm
.usedpages
= pages
;
7900 key
->storage
= REDIS_VM_SWAPPED
;
7901 key
->vtype
= val
->type
;
7902 decrRefCount(val
); /* Deallocate the object from memory. */
7903 vmMarkPagesUsed(page
,pages
);
7904 redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)",
7905 (unsigned char*) key
->ptr
,
7906 (unsigned long long) page
, (unsigned long long) pages
);
7907 server
.vm_stats_swapped_objects
++;
7908 server
.vm_stats_swapouts
++;
7912 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
7915 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
7916 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
7917 redisLog(REDIS_WARNING
,
7918 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
7922 o
= rdbLoadObject(type
,server
.vm_fp
);
7924 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
7927 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
7931 /* Load the value object relative to the 'key' object from swap to memory.
7932 * The newly allocated object is returned.
7934 * If preview is true the unserialized object is returned to the caller but
7935 * no changes are made to the key object, nor the pages are marked as freed */
7936 static robj
*vmGenericLoadObject(robj
*key
, int preview
) {
7939 redisAssert(key
->storage
== REDIS_VM_SWAPPED
|| key
->storage
== REDIS_VM_LOADING
);
7940 val
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
);
7942 key
->storage
= REDIS_VM_MEMORY
;
7943 key
->vm
.atime
= server
.unixtime
;
7944 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
7945 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk",
7946 (unsigned char*) key
->ptr
);
7947 server
.vm_stats_swapped_objects
--;
7949 redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk",
7950 (unsigned char*) key
->ptr
);
7952 server
.vm_stats_swapins
++;
7956 /* Plain object loading, from swap to memory */
7957 static robj
*vmLoadObject(robj
*key
) {
7958 /* If we are loading the object in background, stop it, we
7959 * need to load this object synchronously ASAP. */
7960 if (key
->storage
== REDIS_VM_LOADING
)
7961 vmCancelThreadedIOJob(key
);
7962 return vmGenericLoadObject(key
,0);
7965 /* Just load the value on disk, without to modify the key.
7966 * This is useful when we want to perform some operation on the value
7967 * without to really bring it from swap to memory, like while saving the
7968 * dataset or rewriting the append only log. */
7969 static robj
*vmPreviewObject(robj
*key
) {
7970 return vmGenericLoadObject(key
,1);
7973 /* How a good candidate is this object for swapping?
7974 * The better candidate it is, the greater the returned value.
7976 * Currently we try to perform a fast estimation of the object size in
7977 * memory, and combine it with aging informations.
7979 * Basically swappability = idle-time * log(estimated size)
7981 * Bigger objects are preferred over smaller objects, but not
7982 * proportionally, this is why we use the logarithm. This algorithm is
7983 * just a first try and will probably be tuned later. */
7984 static double computeObjectSwappability(robj
*o
) {
7985 time_t age
= server
.unixtime
- o
->vm
.atime
;
7989 struct dictEntry
*de
;
7992 if (age
<= 0) return 0;
7995 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
7998 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
8003 listNode
*ln
= listFirst(l
);
8005 asize
= sizeof(list
);
8007 robj
*ele
= ln
->value
;
8010 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8011 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8013 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
8018 z
= (o
->type
== REDIS_ZSET
);
8019 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
8021 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8022 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
8027 de
= dictGetRandomKey(d
);
8028 ele
= dictGetEntryKey(de
);
8029 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8030 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8032 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8033 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
8037 return (double)age
*log(1+asize
);
8040 /* Try to swap an object that's a good candidate for swapping.
8041 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
8042 * to swap any object at all.
8044 * If 'usethreaded' is true, Redis will try to swap the object in background
8045 * using I/O threads. */
8046 static int vmSwapOneObject(int usethreads
) {
8048 struct dictEntry
*best
= NULL
;
8049 double best_swappability
= 0;
8050 redisDb
*best_db
= NULL
;
8053 for (j
= 0; j
< server
.dbnum
; j
++) {
8054 redisDb
*db
= server
.db
+j
;
8055 /* Why maxtries is set to 100?
8056 * Because this way (usually) we'll find 1 object even if just 1% - 2%
8057 * are swappable objects */
8060 if (dictSize(db
->dict
) == 0) continue;
8061 for (i
= 0; i
< 5; i
++) {
8063 double swappability
;
8065 if (maxtries
) maxtries
--;
8066 de
= dictGetRandomKey(db
->dict
);
8067 key
= dictGetEntryKey(de
);
8068 val
= dictGetEntryVal(de
);
8069 /* Only swap objects that are currently in memory.
8071 * Also don't swap shared objects if threaded VM is on, as we
8072 * try to ensure that the main thread does not touch the
8073 * object while the I/O thread is using it, but we can't
8074 * control other keys without adding additional mutex. */
8075 if (key
->storage
!= REDIS_VM_MEMORY
||
8076 (server
.vm_max_threads
!= 0 && val
->refcount
!= 1)) {
8077 if (maxtries
) i
--; /* don't count this try */
8080 swappability
= computeObjectSwappability(val
);
8081 if (!best
|| swappability
> best_swappability
) {
8083 best_swappability
= swappability
;
8088 if (best
== NULL
) return REDIS_ERR
;
8089 key
= dictGetEntryKey(best
);
8090 val
= dictGetEntryVal(best
);
8092 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
8093 key
->ptr
, best_swappability
);
8095 /* Unshare the key if needed */
8096 if (key
->refcount
> 1) {
8097 robj
*newkey
= dupStringObject(key
);
8099 key
= dictGetEntryKey(best
) = newkey
;
8103 vmSwapObjectThreaded(key
,val
,best_db
);
8106 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
8107 dictGetEntryVal(best
) = NULL
;
8115 static int vmSwapOneObjectBlocking() {
8116 return vmSwapOneObject(0);
8119 static int vmSwapOneObjectThreaded() {
8120 return vmSwapOneObject(1);
8123 /* Return true if it's safe to swap out objects in a given moment.
8124 * Basically we don't want to swap objects out while there is a BGSAVE
8125 * or a BGAEOREWRITE running in backgroud. */
8126 static int vmCanSwapOut(void) {
8127 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
8130 /* Delete a key if swapped. Returns 1 if the key was found, was swapped
8131 * and was deleted. Otherwise 0 is returned. */
8132 static int deleteIfSwapped(redisDb
*db
, robj
*key
) {
8136 if ((de
= dictFind(db
->dict
,key
)) == NULL
) return 0;
8137 foundkey
= dictGetEntryKey(de
);
8138 if (foundkey
->storage
== REDIS_VM_MEMORY
) return 0;
8143 /* =================== Virtual Memory - Threaded I/O ======================= */
8145 static void freeIOJob(iojob
*j
) {
8146 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
8147 j
->type
== REDIS_IOJOB_DO_SWAP
||
8148 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
8149 decrRefCount(j
->val
);
8150 decrRefCount(j
->key
);
8154 /* Every time a thread finished a Job, it writes a byte into the write side
8155 * of an unix pipe in order to "awake" the main thread, and this function
8157 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
8161 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
8163 REDIS_NOTUSED(mask
);
8164 REDIS_NOTUSED(privdata
);
8166 /* For every byte we read in the read side of the pipe, there is one
8167 * I/O job completed to process. */
8168 while((retval
= read(fd
,buf
,1)) == 1) {
8172 struct dictEntry
*de
;
8174 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
8176 /* Get the processed element (the oldest one) */
8178 assert(listLength(server
.io_processed
) != 0);
8179 if (toprocess
== -1) {
8180 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
8181 if (toprocess
<= 0) toprocess
= 1;
8183 ln
= listFirst(server
.io_processed
);
8185 listDelNode(server
.io_processed
,ln
);
8187 /* If this job is marked as canceled, just ignore it */
8192 /* Post process it in the main thread, as there are things we
8193 * can do just here to avoid race conditions and/or invasive locks */
8194 redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
);
8195 de
= dictFind(j
->db
->dict
,j
->key
);
8197 key
= dictGetEntryKey(de
);
8198 if (j
->type
== REDIS_IOJOB_LOAD
) {
8201 /* Key loaded, bring it at home */
8202 key
->storage
= REDIS_VM_MEMORY
;
8203 key
->vm
.atime
= server
.unixtime
;
8204 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8205 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
8206 (unsigned char*) key
->ptr
);
8207 server
.vm_stats_swapped_objects
--;
8208 server
.vm_stats_swapins
++;
8209 dictGetEntryVal(de
) = j
->val
;
8210 incrRefCount(j
->val
);
8213 /* Handle clients waiting for this key to be loaded. */
8214 handleClientsBlockedOnSwappedKey(db
,key
);
8215 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
8216 /* Now we know the amount of pages required to swap this object.
8217 * Let's find some space for it, and queue this task again
8218 * rebranded as REDIS_IOJOB_DO_SWAP. */
8219 if (!vmCanSwapOut() ||
8220 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
8222 /* Ooops... no space or we can't swap as there is
8223 * a fork()ed Redis trying to save stuff on disk. */
8225 key
->storage
= REDIS_VM_MEMORY
; /* undo operation */
8227 /* Note that we need to mark this pages as used now,
8228 * if the job will be canceled, we'll mark them as freed
8230 vmMarkPagesUsed(j
->page
,j
->pages
);
8231 j
->type
= REDIS_IOJOB_DO_SWAP
;
8236 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
8239 /* Key swapped. We can finally free some memory. */
8240 if (key
->storage
!= REDIS_VM_SWAPPING
) {
8241 printf("key->storage: %d\n",key
->storage
);
8242 printf("key->name: %s\n",(char*)key
->ptr
);
8243 printf("key->refcount: %d\n",key
->refcount
);
8244 printf("val: %p\n",(void*)j
->val
);
8245 printf("val->type: %d\n",j
->val
->type
);
8246 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
8248 redisAssert(key
->storage
== REDIS_VM_SWAPPING
);
8249 val
= dictGetEntryVal(de
);
8250 key
->vm
.page
= j
->page
;
8251 key
->vm
.usedpages
= j
->pages
;
8252 key
->storage
= REDIS_VM_SWAPPED
;
8253 key
->vtype
= j
->val
->type
;
8254 decrRefCount(val
); /* Deallocate the object from memory. */
8255 dictGetEntryVal(de
) = NULL
;
8256 redisLog(REDIS_DEBUG
,
8257 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
8258 (unsigned char*) key
->ptr
,
8259 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
8260 server
.vm_stats_swapped_objects
++;
8261 server
.vm_stats_swapouts
++;
8263 /* Put a few more swap requests in queue if we are still
8265 if (trytoswap
&& vmCanSwapOut() &&
8266 zmalloc_used_memory() > server
.vm_max_memory
)
8271 more
= listLength(server
.io_newjobs
) <
8272 (unsigned) server
.vm_max_threads
;
8274 /* Don't waste CPU time if swappable objects are rare. */
8275 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
8283 if (processed
== toprocess
) return;
8285 if (retval
< 0 && errno
!= EAGAIN
) {
8286 redisLog(REDIS_WARNING
,
8287 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
8292 static void lockThreadedIO(void) {
8293 pthread_mutex_lock(&server
.io_mutex
);
8296 static void unlockThreadedIO(void) {
8297 pthread_mutex_unlock(&server
.io_mutex
);
8300 /* Remove the specified object from the threaded I/O queue if still not
8301 * processed, otherwise make sure to flag it as canceled. */
8302 static void vmCancelThreadedIOJob(robj
*o
) {
8304 server
.io_newjobs
, /* 0 */
8305 server
.io_processing
, /* 1 */
8306 server
.io_processed
/* 2 */
8310 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
8313 /* Search for a matching key in one of the queues */
8314 for (i
= 0; i
< 3; i
++) {
8318 listRewind(lists
[i
],&li
);
8319 while ((ln
= listNext(&li
)) != NULL
) {
8320 iojob
*job
= ln
->value
;
8322 if (job
->canceled
) continue; /* Skip this, already canceled. */
8323 if (compareStringObjects(job
->key
,o
) == 0) {
8324 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n",
8325 (void*)job
, (char*)o
->ptr
, job
->type
, i
);
8326 /* Mark the pages as free since the swap didn't happened
8327 * or happened but is now discarded. */
8328 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
8329 vmMarkPagesFree(job
->page
,job
->pages
);
8330 /* Cancel the job. It depends on the list the job is
8333 case 0: /* io_newjobs */
8334 /* If the job was yet not processed the best thing to do
8335 * is to remove it from the queue at all */
8337 listDelNode(lists
[i
],ln
);
8339 case 1: /* io_processing */
8340 /* Oh Shi- the thread is messing with the Job:
8342 * Probably it's accessing the object if this is a
8343 * PREPARE_SWAP or DO_SWAP job.
8344 * If it's a LOAD job it may be reading from disk and
8345 * if we don't wait for the job to terminate before to
8346 * cancel it, maybe in a few microseconds data can be
8347 * corrupted in this pages. So the short story is:
8349 * Better to wait for the job to move into the
8350 * next queue (processed)... */
8352 /* We try again and again until the job is completed. */
8354 /* But let's wait some time for the I/O thread
8355 * to finish with this job. After all this condition
8356 * should be very rare. */
8359 case 2: /* io_processed */
8360 /* The job was already processed, that's easy...
8361 * just mark it as canceled so that we'll ignore it
8362 * when processing completed jobs. */
8366 /* Finally we have to adjust the storage type of the object
8367 * in order to "UNDO" the operaiton. */
8368 if (o
->storage
== REDIS_VM_LOADING
)
8369 o
->storage
= REDIS_VM_SWAPPED
;
8370 else if (o
->storage
== REDIS_VM_SWAPPING
)
8371 o
->storage
= REDIS_VM_MEMORY
;
8378 assert(1 != 1); /* We should never reach this */
8381 static void *IOThreadEntryPoint(void *arg
) {
8386 pthread_detach(pthread_self());
8388 /* Get a new job to process */
8390 if (listLength(server
.io_newjobs
) == 0) {
8391 /* No new jobs in queue, exit. */
8392 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
8393 (long) pthread_self());
8394 server
.io_active_threads
--;
8398 ln
= listFirst(server
.io_newjobs
);
8400 listDelNode(server
.io_newjobs
,ln
);
8401 /* Add the job in the processing queue */
8402 j
->thread
= pthread_self();
8403 listAddNodeTail(server
.io_processing
,j
);
8404 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
8406 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
8407 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
8409 /* Process the Job */
8410 if (j
->type
== REDIS_IOJOB_LOAD
) {
8411 j
->val
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
);
8412 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
8413 FILE *fp
= fopen("/dev/null","w+");
8414 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
8416 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
8417 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
8421 /* Done: insert the job into the processed queue */
8422 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
8423 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
8425 listDelNode(server
.io_processing
,ln
);
8426 listAddNodeTail(server
.io_processed
,j
);
8429 /* Signal the main thread there is new stuff to process */
8430 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
8432 return NULL
; /* never reached */
8435 static void spawnIOThread(void) {
8437 sigset_t mask
, omask
;
8440 sigaddset(&mask
,SIGCHLD
);
8441 sigaddset(&mask
,SIGHUP
);
8442 sigaddset(&mask
,SIGPIPE
);
8443 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
8444 pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
);
8445 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
8446 server
.io_active_threads
++;
8449 /* We need to wait for the last thread to exit before we are able to
8450 * fork() in order to BGSAVE or BGREWRITEAOF. */
8451 static void waitEmptyIOJobsQueue(void) {
8453 int io_processed_len
;
8456 if (listLength(server
.io_newjobs
) == 0 &&
8457 listLength(server
.io_processing
) == 0 &&
8458 server
.io_active_threads
== 0)
8463 /* While waiting for empty jobs queue condition we post-process some
8464 * finshed job, as I/O threads may be hanging trying to write against
8465 * the io_ready_pipe_write FD but there are so much pending jobs that
8467 io_processed_len
= listLength(server
.io_processed
);
8469 if (io_processed_len
) {
8470 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
8471 usleep(1000); /* 1 millisecond */
8473 usleep(10000); /* 10 milliseconds */
8478 static void vmReopenSwapFile(void) {
8479 /* Note: we don't close the old one as we are in the child process
8480 * and don't want to mess at all with the original file object. */
8481 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
8482 if (server
.vm_fp
== NULL
) {
8483 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
8484 server
.vm_swap_file
);
8487 server
.vm_fd
= fileno(server
.vm_fp
);
8490 /* This function must be called while with threaded IO locked */
8491 static void queueIOJob(iojob
*j
) {
8492 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
8493 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
8494 listAddNodeTail(server
.io_newjobs
,j
);
8495 if (server
.io_active_threads
< server
.vm_max_threads
)
8499 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
8502 assert(key
->storage
== REDIS_VM_MEMORY
);
8503 assert(key
->refcount
== 1);
8505 j
= zmalloc(sizeof(*j
));
8506 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
8508 j
->key
= dupStringObject(key
);
8512 j
->thread
= (pthread_t
) -1;
8513 key
->storage
= REDIS_VM_SWAPPING
;
8521 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
8523 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
8524 * If there is not already a job loading the key, it is craeted.
8525 * The key is added to the io_keys list in the client structure, and also
8526 * in the hash table mapping swapped keys to waiting clients, that is,
8527 * server.io_waited_keys. */
8528 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
8529 struct dictEntry
*de
;
8533 /* If the key does not exist or is already in RAM we don't need to
8534 * block the client at all. */
8535 de
= dictFind(c
->db
->dict
,key
);
8536 if (de
== NULL
) return 0;
8537 o
= dictGetEntryKey(de
);
8538 if (o
->storage
== REDIS_VM_MEMORY
) {
8540 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
8541 /* We were swapping the key, undo it! */
8542 vmCancelThreadedIOJob(o
);
8546 /* OK: the key is either swapped, or being loaded just now. */
8548 /* Add the key to the list of keys this client is waiting for.
8549 * This maps clients to keys they are waiting for. */
8550 listAddNodeTail(c
->io_keys
,key
);
8553 /* Add the client to the swapped keys => clients waiting map. */
8554 de
= dictFind(c
->db
->io_keys
,key
);
8558 /* For every key we take a list of clients blocked for it */
8560 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
8562 assert(retval
== DICT_OK
);
8564 l
= dictGetEntryVal(de
);
8566 listAddNodeTail(l
,c
);
8568 /* Are we already loading the key from disk? If not create a job */
8569 if (o
->storage
== REDIS_VM_SWAPPED
) {
8572 o
->storage
= REDIS_VM_LOADING
;
8573 j
= zmalloc(sizeof(*j
));
8574 j
->type
= REDIS_IOJOB_LOAD
;
8576 j
->key
= dupStringObject(key
);
8577 j
->key
->vtype
= o
->vtype
;
8578 j
->page
= o
->vm
.page
;
8581 j
->thread
= (pthread_t
) -1;
8589 /* Is this client attempting to run a command against swapped keys?
8590 * If so, block it ASAP, load the keys in background, then resume it.
8592 * The important idea about this function is that it can fail! If keys will
8593 * still be swapped when the client is resumed, this key lookups will
8594 * just block loading keys from disk. In practical terms this should only
8595 * happen with SORT BY command or if there is a bug in this function.
8597 * Return 1 if the client is marked as blocked, 0 if the client can
8598 * continue as the keys it is going to access appear to be in memory. */
8599 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
) {
8602 if (cmd
->vm_firstkey
== 0) return 0;
8603 last
= cmd
->vm_lastkey
;
8604 if (last
< 0) last
= c
->argc
+last
;
8605 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
)
8606 waitForSwappedKey(c
,c
->argv
[j
]);
8607 /* If the client was blocked for at least one key, mark it as blocked. */
8608 if (listLength(c
->io_keys
)) {
8609 c
->flags
|= REDIS_IO_WAIT
;
8610 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
8611 server
.vm_blocked_clients
++;
8618 /* Remove the 'key' from the list of blocked keys for a given client.
8620 * The function returns 1 when there are no longer blocking keys after
8621 * the current one was removed (and the client can be unblocked). */
8622 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
8626 struct dictEntry
*de
;
8628 /* Remove the key from the list of keys this client is waiting for. */
8629 listRewind(c
->io_keys
,&li
);
8630 while ((ln
= listNext(&li
)) != NULL
) {
8631 if (compareStringObjects(ln
->value
,key
) == 0) {
8632 listDelNode(c
->io_keys
,ln
);
8638 /* Remove the client form the key => waiting clients map. */
8639 de
= dictFind(c
->db
->io_keys
,key
);
8641 l
= dictGetEntryVal(de
);
8642 ln
= listSearchKey(l
,c
);
8645 if (listLength(l
) == 0)
8646 dictDelete(c
->db
->io_keys
,key
);
8648 return listLength(c
->io_keys
) == 0;
8651 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
8652 struct dictEntry
*de
;
8657 de
= dictFind(db
->io_keys
,key
);
8660 l
= dictGetEntryVal(de
);
8661 len
= listLength(l
);
8662 /* Note: we can't use something like while(listLength(l)) as the list
8663 * can be freed by the calling function when we remove the last element. */
8666 redisClient
*c
= ln
->value
;
8668 if (dontWaitForSwappedKey(c
,key
)) {
8669 /* Put the client in the list of clients ready to go as we
8670 * loaded all the keys about it. */
8671 listAddNodeTail(server
.io_ready_clients
,c
);
8676 /* ================================= Debugging ============================== */
8678 static void debugCommand(redisClient
*c
) {
8679 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
8681 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
8682 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
8683 addReply(c
,shared
.err
);
8687 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
8688 addReply(c
,shared
.err
);
8691 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
8692 addReply(c
,shared
.ok
);
8693 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
8695 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
8696 addReply(c
,shared
.err
);
8699 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
8700 addReply(c
,shared
.ok
);
8701 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
8702 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
8706 addReply(c
,shared
.nokeyerr
);
8709 key
= dictGetEntryKey(de
);
8710 val
= dictGetEntryVal(de
);
8711 if (!server
.vm_enabled
|| (key
->storage
== REDIS_VM_MEMORY
||
8712 key
->storage
== REDIS_VM_SWAPPING
)) {
8713 addReplySds(c
,sdscatprintf(sdsempty(),
8714 "+Key at:%p refcount:%d, value at:%p refcount:%d "
8715 "encoding:%d serializedlength:%lld\r\n",
8716 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
,
8717 val
->encoding
, (long long) rdbSavedObjectLen(val
,NULL
)));
8719 addReplySds(c
,sdscatprintf(sdsempty(),
8720 "+Key at:%p refcount:%d, value swapped at: page %llu "
8721 "using %llu pages\r\n",
8722 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
,
8723 (unsigned long long) key
->vm
.usedpages
));
8725 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
8726 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
8729 if (!server
.vm_enabled
) {
8730 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
8734 addReply(c
,shared
.nokeyerr
);
8737 key
= dictGetEntryKey(de
);
8738 val
= dictGetEntryVal(de
);
8739 /* If the key is shared we want to create a copy */
8740 if (key
->refcount
> 1) {
8741 robj
*newkey
= dupStringObject(key
);
8743 key
= dictGetEntryKey(de
) = newkey
;
8746 if (key
->storage
!= REDIS_VM_MEMORY
) {
8747 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
8748 } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
8749 dictGetEntryVal(de
) = NULL
;
8750 addReply(c
,shared
.ok
);
8752 addReply(c
,shared
.err
);
8755 addReplySds(c
,sdsnew(
8756 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPOUT <key>|RELOAD]\r\n"));
8760 static void _redisAssert(char *estr
, char *file
, int line
) {
8761 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
8762 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true\n",file
,line
,estr
);
8763 #ifdef HAVE_BACKTRACE
8764 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
8769 /* =================================== Main! ================================ */
8772 int linuxOvercommitMemoryValue(void) {
8773 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
8777 if (fgets(buf
,64,fp
) == NULL
) {
8786 void linuxOvercommitMemoryWarning(void) {
8787 if (linuxOvercommitMemoryValue() == 0) {
8788 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low condition memory. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
8791 #endif /* __linux__ */
8793 static void daemonize(void) {
8797 if (fork() != 0) exit(0); /* parent exits */
8798 setsid(); /* create a new session */
8800 /* Every output goes to /dev/null. If Redis is daemonized but
8801 * the 'logfile' is set to 'stdout' in the configuration file
8802 * it will not log at all. */
8803 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
8804 dup2(fd
, STDIN_FILENO
);
8805 dup2(fd
, STDOUT_FILENO
);
8806 dup2(fd
, STDERR_FILENO
);
8807 if (fd
> STDERR_FILENO
) close(fd
);
8809 /* Try to write the pid file */
8810 fp
= fopen(server
.pidfile
,"w");
8812 fprintf(fp
,"%d\n",getpid());
8817 int main(int argc
, char **argv
) {
8822 resetServerSaveParams();
8823 loadServerConfig(argv
[1]);
8824 } else if (argc
> 2) {
8825 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
8828 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
8830 if (server
.daemonize
) daemonize();
8832 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
8834 linuxOvercommitMemoryWarning();
8837 if (server
.appendonly
) {
8838 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
8839 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
8841 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
8842 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
8844 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
8845 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
8847 aeDeleteEventLoop(server
.el
);
8851 /* ============================= Backtrace support ========================= */
8853 #ifdef HAVE_BACKTRACE
8854 static char *findFuncName(void *pointer
, unsigned long *offset
);
8856 static void *getMcontextEip(ucontext_t
*uc
) {
8857 #if defined(__FreeBSD__)
8858 return (void*) uc
->uc_mcontext
.mc_eip
;
8859 #elif defined(__dietlibc__)
8860 return (void*) uc
->uc_mcontext
.eip
;
8861 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
8863 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
8865 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
8867 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
8868 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
8869 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
8871 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
8873 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
8874 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
8875 #elif defined(__ia64__) /* Linux IA64 */
8876 return (void*) uc
->uc_mcontext
.sc_ip
;
8882 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
8884 char **messages
= NULL
;
8885 int i
, trace_size
= 0;
8886 unsigned long offset
=0;
8887 ucontext_t
*uc
= (ucontext_t
*) secret
;
8889 REDIS_NOTUSED(info
);
8891 redisLog(REDIS_WARNING
,
8892 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
8893 infostring
= genRedisInfoString();
8894 redisLog(REDIS_WARNING
, "%s",infostring
);
8895 /* It's not safe to sdsfree() the returned string under memory
8896 * corruption conditions. Let it leak as we are going to abort */
8898 trace_size
= backtrace(trace
, 100);
8899 /* overwrite sigaction with caller's address */
8900 if (getMcontextEip(uc
) != NULL
) {
8901 trace
[1] = getMcontextEip(uc
);
8903 messages
= backtrace_symbols(trace
, trace_size
);
8905 for (i
=1; i
<trace_size
; ++i
) {
8906 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
8908 p
= strchr(messages
[i
],'+');
8909 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
8910 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
8912 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
8915 /* free(messages); Don't call free() with possibly corrupted memory. */
8919 static void setupSigSegvAction(void) {
8920 struct sigaction act
;
8922 sigemptyset (&act
.sa_mask
);
8923 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
8924 * is used. Otherwise, sa_handler is used */
8925 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
8926 act
.sa_sigaction
= segvHandler
;
8927 sigaction (SIGSEGV
, &act
, NULL
);
8928 sigaction (SIGBUS
, &act
, NULL
);
8929 sigaction (SIGFPE
, &act
, NULL
);
8930 sigaction (SIGILL
, &act
, NULL
);
8931 sigaction (SIGBUS
, &act
, NULL
);
8935 #include "staticsymbols.h"
8936 /* This function try to convert a pointer into a function name. It's used in
8937 * oreder to provide a backtrace under segmentation fault that's able to
8938 * display functions declared as static (otherwise the backtrace is useless). */
8939 static char *findFuncName(void *pointer
, unsigned long *offset
){
8941 unsigned long off
, minoff
= 0;
8943 /* Try to match against the Symbol with the smallest offset */
8944 for (i
=0; symsTable
[i
].pointer
; i
++) {
8945 unsigned long lp
= (unsigned long) pointer
;
8947 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
8948 off
=lp
-symsTable
[i
].pointer
;
8949 if (ret
< 0 || off
< minoff
) {
8955 if (ret
== -1) return NULL
;
8957 return symsTable
[ret
].name
;
8959 #else /* HAVE_BACKTRACE */
8960 static void setupSigSegvAction(void) {
8962 #endif /* HAVE_BACKTRACE */